From f0a85cb9c7149bce49b8fa01bbcfa42181ebdae9 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 26 Oct 2023 11:59:45 +0200 Subject: [PATCH 001/171] ADD: distrib launcher mockup --- experimental/distrib_launcher.py | 274 +++++++++++++++++++++++++++++++ experimental/elastic_agent.py | 52 ++++++ 2 files changed, 326 insertions(+) create mode 100644 experimental/distrib_launcher.py create mode 100644 experimental/elastic_agent.py diff --git a/experimental/distrib_launcher.py b/experimental/distrib_launcher.py new file mode 100644 index 00000000..95186955 --- /dev/null +++ b/experimental/distrib_launcher.py @@ -0,0 +1,274 @@ +import os +import abc +import datetime +from typing import Any + +import torch +from torch import nn +from torch.nn.parallel import DistributedDataParallel +from torch import optim +from torch.utils.data import DataLoader, Dataset, DistributedSampler +from torch.distributed.elastic.agent.server.local_elastic_agent import ( + LocalElasticAgent +) +from torch.distributed.elastic.agent.server import WorkerSpec +from torch.distributed.elastic.rendezvous.dynamic_rendezvous import ( + DynamicRendezvousHandler +) +from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import ( + C10dRendezvousBackend +) +from torch.distributed import TCPStore, init_process_group +from torch.distributed.elastic.multiprocessing import Std + +from lightning.pytorch.plugins.environments import ( + ClusterEnvironment, SLURMEnvironment, + TorchElasticEnvironment, LightningEnvironment +) + + +class LocalEnvironment(LightningEnvironment): + ... + + +class Strategy(abc.ABC): + cluster: ClusterEnvironment + + @property + @abc.abstractmethod + def device(self) -> int: + """Device used by this worker""" + + @abc.abstractmethod + def setup(self) -> None: + """Setup the strategy once in a distributed environment.""" + + @abc.abstractmethod + def teardown(self) -> None: + """Frees the distributed strategy resources.""" + + @abc.abstractmethod + def is_main_worker(self) -> bool: + """Returns True if called from the main process of the pool.""" + + @abc.abstractmethod + def _is_env_setup(self) -> bool: + """Checks whether the distributed environment is correctly setup.""" + + @abc.abstractmethod + def distribute_model(self, model: Any) -> Any: + """Distributes a neural network.""" + + @abc.abstractmethod + def distribute_optimizer(self, optimizer: Any) -> Any: + """Distributes an optimizer.""" + + @abc.abstractmethod + def distribute_dataloader(self, dataloader: Any) -> Any: + """Distributes a dataloader.""" + + +class DDPStrategy(Strategy): + def __init__( + self, + cluster: ClusterEnvironment, + backend: str = 'nccl' + ) -> None: + super().__init__() + self.cluster = cluster + self.backend = backend + + @property + def device(self) -> int: + """Returns the local rank. Assumes one worker per GPU.""" + return self.cluster.local_rank() + + def setup(self) -> None: + """Setup the strategy in a distributed context.""" + if not self._is_env_setup(): + raise RuntimeError( + "Distributed environment not setup correctly. Use a launcher.") + + # Initializes the default distributed process group + # and the distributed package + init_process_group(backend=self.backend) + + def teardown(self) -> None: + torch.distributed.barrier() + torch.distributed.destroy_process_group() + + def _is_env_setup(self) -> bool: + if (os.environ.get('RANK') is not None): + # and torch.distributed.is_available()): + return True + return False + + def is_main_worker(self) -> bool: + return self.cluster.global_rank() == 0 + + def distribute_model(self, model: nn.Module) -> nn.Module: + model = model.to(f"cuda:{self.device}") + return DistributedDataParallel( + model, + device_ids=[self.device], + output_device=self.device + ) + + def distribute_optimizer( + self, + optimizer: optim.Optimizer + ) -> optim.Optimizer: + return optimizer + + def distribute_dataloader( + self, + dataloader: DataLoader, + shuffle: bool = True + ) -> DataLoader: + """Makes a torch DataLoader distributed by substituting its sampler.""" + sampler = DistributedSampler( + dataloader.dataset, + num_replicas=self.cluster.world_size(), + rank=self.cluster.global_rank(), + shuffle=shuffle + ) + # Recreate dataloader, with updated sampler + return DataLoader( + dataloader.dataset, + batch_size=dataloader.batch_size, + sampler=sampler, + num_workers=dataloader.num_workers, + collate_fn=dataloader.collate_fn, + pin_memory=dataloader.pin_memory, + drop_last=dataloader.drop_last, + timeout=dataloader.timeout, + worker_init_fn=dataloader.worker_init_fn, + multiprocessing_context=dataloader.multiprocessing_context, + generator=dataloader.generator, + prefetch_factor=dataloader.prefetch_factor, + persistent_workers=dataloader.persistent_workers, + pin_memory_device=dataloader.pin_memory_device + ) + + +# ################## USER CODE ################## # + + +class UniformRndDataset(Dataset): + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn(a, strategy: Strategy): + strategy.setup() + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} {os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + +STRATEGY = 'ddp' + +RUN_ID = "my_run_id" +MIN_NODES = 1 +MAX_NODES = 1 +NPROC_PRE_NODE = 4 +MAX_RESTARTS = 2 + +if __name__ == "__main__": + # STRATEGY BUILDER + # Instantiate ClusterEnv + if SLURMEnvironment.detect(): + cluster = SLURMEnvironment() + elif TorchElasticEnvironment.detect(): + cluster = TorchElasticEnvironment() + elif LocalEnvironment.detect(): + cluster = LocalEnvironment() + else: + raise NotImplementedError("Unrecognized cluster env") + + print(cluster) + + # Instantiate Launcher + + # Torch Elastic launcher + store = TCPStore(host_name="localhost", port=29400, + world_size=NPROC_PRE_NODE, is_master=True, + timeout=datetime.timedelta(seconds=3)) + backend = C10dRendezvousBackend(store, RUN_ID) + rdzv_handler = DynamicRendezvousHandler.from_backend( + run_id=RUN_ID, + store=store, + backend=backend, + min_nodes=MIN_NODES, + max_nodes=MAX_NODES + ) + + # Instantiate Strategy + if STRATEGY == 'ddp' and torch.cuda.is_available() and torch.cuda.device_count() > 1: + strategy = DDPStrategy(cluster=cluster, backend='nccl') + else: + raise NotImplementedError + + # CLIENT CODE + # Launch training from launcher + spec = WorkerSpec( + role="trainer", + local_world_size=NPROC_PRE_NODE, + entrypoint=trainer_entrypoint_fn, + args=("foobar", strategy), + rdzv_handler=rdzv_handler, + max_restarts=MAX_RESTARTS, + # monitor_interval=args.monitor_interval, + # # redirects={0: Std.ALL} # do no print, but save to file. linked to Agent's log_dir + redirects=Std.ALL, # suppress all printing to console + # tee={0: Std.ALL} reactivates print to console + save to log file for RANK 0 + tee={0: Std.ALL} + ) + + agent = LocalElasticAgent(spec, start_method="spawn", log_dir='logs') + # try: + run_result = agent.run() + if run_result.is_failed(): + print(f"worker 0 failed with: {run_result.failures[0]}") + else: + print(f"worker 0 return value is: {run_result.return_values[0]}") + # except Exception ex: + # # handle exception diff --git a/experimental/elastic_agent.py b/experimental/elastic_agent.py new file mode 100644 index 00000000..4498ce1e --- /dev/null +++ b/experimental/elastic_agent.py @@ -0,0 +1,52 @@ +import os +import datetime +from torch.distributed.elastic.agent.server.local_elastic_agent import LocalElasticAgent +from torch.distributed.elastic.agent.server import WorkerSpec +from torch.distributed.elastic.rendezvous.dynamic_rendezvous import DynamicRendezvousHandler +from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import C10dRendezvousBackend +from torch.distributed import TCPStore +from torch.distributed.elastic.multiprocessing import Std + +nproc_per_node = 4 +max_restarts = 2 + + +def trainer_entrypoint_fn(a): + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} {os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + return 123 + + +if __name__ == "__main__": + store = TCPStore(host_name="localhost", port=29400, + world_size=nproc_per_node, is_master=True, timeout=datetime.timedelta(seconds=3)) + backend = C10dRendezvousBackend(store, "my_run_id") + rdzv_handler = DynamicRendezvousHandler.from_backend( + run_id="my_run_id", + store=store, + backend=backend, + min_nodes=1, + max_nodes=1 + ) + spec = WorkerSpec( + role="trainer", + local_world_size=nproc_per_node, + entrypoint=trainer_entrypoint_fn, + args=("foobar",), + rdzv_handler=rdzv_handler, + max_restarts=max_restarts, + # monitor_interval=args.monitor_interval, + # # redirects={0: Std.ALL} # do no print, but save to file. linked to Agent's log_dir + redirects=Std.ALL, # suppress all printing to console + # tee={0: Std.ALL} reactivates print to console + save to log file for RANK 0 + tee={0: Std.ALL} + ) + + agent = LocalElasticAgent(spec, start_method="spawn", log_dir='logs') + # try: + run_result = agent.run() + if run_result.is_failed(): + print(f"worker 0 failed with: {run_result.failures[0]}") + else: + print(f"worker 0 return value is: {run_result.return_values[0]}") + # except Exception ex: + # # handle exception From dadf0d3700517bb04b342725898aa5397cbaad59 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 26 Oct 2023 14:39:44 +0200 Subject: [PATCH 002/171] REFACTOR: cluster env, strategy and launcher --- experimental/cluster.py | 37 ++++++ experimental/distrib_launcher.py | 200 +++---------------------------- experimental/elastic_agent.py | 52 -------- experimental/launcher.py | 141 ++++++++++++++++++++++ experimental/strategy.py | 145 ++++++++++++++++++++++ 5 files changed, 340 insertions(+), 235 deletions(-) create mode 100644 experimental/cluster.py delete mode 100644 experimental/elastic_agent.py create mode 100644 experimental/launcher.py create mode 100644 experimental/strategy.py diff --git a/experimental/cluster.py b/experimental/cluster.py new file mode 100644 index 00000000..608bcf0b --- /dev/null +++ b/experimental/cluster.py @@ -0,0 +1,37 @@ +import abc +import os + +from lightning.pytorch.plugins.environments import ( + ClusterEnvironment as LightningClusterEnvironment, + SLURMEnvironment as LightningSLURMEnvironment, + TorchElasticEnvironment as LightningTorchElasticEnvironment, + LightningEnvironment +) + + +class ClusterEnvironment(LightningClusterEnvironment): + @abc.abstractmethod + def num_nodes(self) -> int: + """Returns the number of nodes allocated for the current job.""" + + +class SLURMEnvironment(LightningSLURMEnvironment): + def num_nodes(self) -> int: + """Returns the number of nodes allocated for the current job.""" + if os.environ.get('SLURM_JOB_NUM_NODES'): + return int(os.environ['SLURM_JOB_NUM_NODES']) + return int(os.environ['SLURM_NNODES']) + + +class TorchElasticEnvironment(LightningTorchElasticEnvironment): + def num_nodes(self) -> int: + """Returns the number of nodes allocated for the current job.""" + gwsize = int(os.environ['WORLD_SIZE']) + lwsize = int(os.environ['LOCAL_WORLD_SIZE']) + return gwsize//lwsize + + +class LocalEnvironment(LightningEnvironment): + def num_nodes(self) -> int: + """Returns the number of nodes allocated for the current job.""" + return 1 diff --git a/experimental/distrib_launcher.py b/experimental/distrib_launcher.py index 95186955..60d7d9e6 100644 --- a/experimental/distrib_launcher.py +++ b/experimental/distrib_launcher.py @@ -1,159 +1,17 @@ import os -import abc -import datetime -from typing import Any import torch from torch import nn -from torch.nn.parallel import DistributedDataParallel -from torch import optim -from torch.utils.data import DataLoader, Dataset, DistributedSampler -from torch.distributed.elastic.agent.server.local_elastic_agent import ( - LocalElasticAgent -) -from torch.distributed.elastic.agent.server import WorkerSpec -from torch.distributed.elastic.rendezvous.dynamic_rendezvous import ( - DynamicRendezvousHandler -) -from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import ( - C10dRendezvousBackend -) -from torch.distributed import TCPStore, init_process_group -from torch.distributed.elastic.multiprocessing import Std +from torch.utils.data import DataLoader, Dataset -from lightning.pytorch.plugins.environments import ( - ClusterEnvironment, SLURMEnvironment, - TorchElasticEnvironment, LightningEnvironment +from strategy import Strategy, DDPStrategy +from launcher import DummyTorchElasticLauncher +from cluster import ( + LocalEnvironment, SLURMEnvironment, + TorchElasticEnvironment ) -class LocalEnvironment(LightningEnvironment): - ... - - -class Strategy(abc.ABC): - cluster: ClusterEnvironment - - @property - @abc.abstractmethod - def device(self) -> int: - """Device used by this worker""" - - @abc.abstractmethod - def setup(self) -> None: - """Setup the strategy once in a distributed environment.""" - - @abc.abstractmethod - def teardown(self) -> None: - """Frees the distributed strategy resources.""" - - @abc.abstractmethod - def is_main_worker(self) -> bool: - """Returns True if called from the main process of the pool.""" - - @abc.abstractmethod - def _is_env_setup(self) -> bool: - """Checks whether the distributed environment is correctly setup.""" - - @abc.abstractmethod - def distribute_model(self, model: Any) -> Any: - """Distributes a neural network.""" - - @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any) -> Any: - """Distributes an optimizer.""" - - @abc.abstractmethod - def distribute_dataloader(self, dataloader: Any) -> Any: - """Distributes a dataloader.""" - - -class DDPStrategy(Strategy): - def __init__( - self, - cluster: ClusterEnvironment, - backend: str = 'nccl' - ) -> None: - super().__init__() - self.cluster = cluster - self.backend = backend - - @property - def device(self) -> int: - """Returns the local rank. Assumes one worker per GPU.""" - return self.cluster.local_rank() - - def setup(self) -> None: - """Setup the strategy in a distributed context.""" - if not self._is_env_setup(): - raise RuntimeError( - "Distributed environment not setup correctly. Use a launcher.") - - # Initializes the default distributed process group - # and the distributed package - init_process_group(backend=self.backend) - - def teardown(self) -> None: - torch.distributed.barrier() - torch.distributed.destroy_process_group() - - def _is_env_setup(self) -> bool: - if (os.environ.get('RANK') is not None): - # and torch.distributed.is_available()): - return True - return False - - def is_main_worker(self) -> bool: - return self.cluster.global_rank() == 0 - - def distribute_model(self, model: nn.Module) -> nn.Module: - model = model.to(f"cuda:{self.device}") - return DistributedDataParallel( - model, - device_ids=[self.device], - output_device=self.device - ) - - def distribute_optimizer( - self, - optimizer: optim.Optimizer - ) -> optim.Optimizer: - return optimizer - - def distribute_dataloader( - self, - dataloader: DataLoader, - shuffle: bool = True - ) -> DataLoader: - """Makes a torch DataLoader distributed by substituting its sampler.""" - sampler = DistributedSampler( - dataloader.dataset, - num_replicas=self.cluster.world_size(), - rank=self.cluster.global_rank(), - shuffle=shuffle - ) - # Recreate dataloader, with updated sampler - return DataLoader( - dataloader.dataset, - batch_size=dataloader.batch_size, - sampler=sampler, - num_workers=dataloader.num_workers, - collate_fn=dataloader.collate_fn, - pin_memory=dataloader.pin_memory, - drop_last=dataloader.drop_last, - timeout=dataloader.timeout, - worker_init_fn=dataloader.worker_init_fn, - multiprocessing_context=dataloader.multiprocessing_context, - generator=dataloader.generator, - prefetch_factor=dataloader.prefetch_factor, - persistent_workers=dataloader.persistent_workers, - pin_memory_device=dataloader.pin_memory_device - ) - - -# ################## USER CODE ################## # - - class UniformRndDataset(Dataset): def __init__(self, x_size: int, y_size: int, len: int = 100): super().__init__() @@ -169,8 +27,10 @@ def __getitem__(self, index): def trainer_entrypoint_fn(a, strategy: Strategy): + """Dummy training function.""" strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} {os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") # Local model model = nn.Linear(3, 4) @@ -214,6 +74,7 @@ def trainer_entrypoint_fn(a, strategy: Strategy): if __name__ == "__main__": # STRATEGY BUILDER + # Instantiate ClusterEnv if SLURMEnvironment.detect(): cluster = SLURMEnvironment() @@ -227,48 +88,21 @@ def trainer_entrypoint_fn(a, strategy: Strategy): print(cluster) # Instantiate Launcher - - # Torch Elastic launcher - store = TCPStore(host_name="localhost", port=29400, - world_size=NPROC_PRE_NODE, is_master=True, - timeout=datetime.timedelta(seconds=3)) - backend = C10dRendezvousBackend(store, RUN_ID) - rdzv_handler = DynamicRendezvousHandler.from_backend( - run_id=RUN_ID, - store=store, - backend=backend, + launcher = DummyTorchElasticLauncher( + cluster=cluster, + n_workers_per_node=NPROC_PRE_NODE, min_nodes=MIN_NODES, max_nodes=MAX_NODES ) # Instantiate Strategy - if STRATEGY == 'ddp' and torch.cuda.is_available() and torch.cuda.device_count() > 1: + if (STRATEGY == 'ddp' + and torch.cuda.is_available() + and torch.cuda.device_count() > 1): strategy = DDPStrategy(cluster=cluster, backend='nccl') else: raise NotImplementedError # CLIENT CODE # Launch training from launcher - spec = WorkerSpec( - role="trainer", - local_world_size=NPROC_PRE_NODE, - entrypoint=trainer_entrypoint_fn, - args=("foobar", strategy), - rdzv_handler=rdzv_handler, - max_restarts=MAX_RESTARTS, - # monitor_interval=args.monitor_interval, - # # redirects={0: Std.ALL} # do no print, but save to file. linked to Agent's log_dir - redirects=Std.ALL, # suppress all printing to console - # tee={0: Std.ALL} reactivates print to console + save to log file for RANK 0 - tee={0: Std.ALL} - ) - - agent = LocalElasticAgent(spec, start_method="spawn", log_dir='logs') - # try: - run_result = agent.run() - if run_result.is_failed(): - print(f"worker 0 failed with: {run_result.failures[0]}") - else: - print(f"worker 0 return value is: {run_result.return_values[0]}") - # except Exception ex: - # # handle exception + launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/elastic_agent.py b/experimental/elastic_agent.py deleted file mode 100644 index 4498ce1e..00000000 --- a/experimental/elastic_agent.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import datetime -from torch.distributed.elastic.agent.server.local_elastic_agent import LocalElasticAgent -from torch.distributed.elastic.agent.server import WorkerSpec -from torch.distributed.elastic.rendezvous.dynamic_rendezvous import DynamicRendezvousHandler -from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import C10dRendezvousBackend -from torch.distributed import TCPStore -from torch.distributed.elastic.multiprocessing import Std - -nproc_per_node = 4 -max_restarts = 2 - - -def trainer_entrypoint_fn(a): - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} {os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - return 123 - - -if __name__ == "__main__": - store = TCPStore(host_name="localhost", port=29400, - world_size=nproc_per_node, is_master=True, timeout=datetime.timedelta(seconds=3)) - backend = C10dRendezvousBackend(store, "my_run_id") - rdzv_handler = DynamicRendezvousHandler.from_backend( - run_id="my_run_id", - store=store, - backend=backend, - min_nodes=1, - max_nodes=1 - ) - spec = WorkerSpec( - role="trainer", - local_world_size=nproc_per_node, - entrypoint=trainer_entrypoint_fn, - args=("foobar",), - rdzv_handler=rdzv_handler, - max_restarts=max_restarts, - # monitor_interval=args.monitor_interval, - # # redirects={0: Std.ALL} # do no print, but save to file. linked to Agent's log_dir - redirects=Std.ALL, # suppress all printing to console - # tee={0: Std.ALL} reactivates print to console + save to log file for RANK 0 - tee={0: Std.ALL} - ) - - agent = LocalElasticAgent(spec, start_method="spawn", log_dir='logs') - # try: - run_result = agent.run() - if run_result.is_failed(): - print(f"worker 0 failed with: {run_result.failures[0]}") - else: - print(f"worker 0 return value is: {run_result.return_values[0]}") - # except Exception ex: - # # handle exception diff --git a/experimental/launcher.py b/experimental/launcher.py new file mode 100644 index 00000000..1577b7d2 --- /dev/null +++ b/experimental/launcher.py @@ -0,0 +1,141 @@ +import datetime +import abc +import time +from typing import Callable, Tuple, Any, Union, List + +from torch.distributed.elastic.agent.server.local_elastic_agent import ( + LocalElasticAgent +) +from torch.distributed.elastic.agent.server import WorkerSpec +from torch.distributed.elastic.rendezvous.dynamic_rendezvous import ( + DynamicRendezvousHandler +) +from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import ( + C10dRendezvousBackend +) +from torch.distributed import TCPStore +from torch.distributed.elastic.multiprocessing import Std + +# from lightning.pytorch.plugins.environments import ( +# ClusterEnvironment, SLURMEnvironment, +# TorchElasticEnvironment, LightningEnvironment +# ) + +from cluster import ClusterEnvironment + + +class Launcher(abc.ABC): + cluster: ClusterEnvironment + + @abc.abstractmethod + def run(*args): + """Launches the distributed execution.""" + + +class DummyTorchElasticLauncher(Launcher): + """Simplified Torch Elastic launcher.""" + + def __init__( + self, + cluster: ClusterEnvironment, + n_workers_per_node: int = 1, + min_nodes: int = 1, + max_nodes: int = 1, + max_restarts: int = 1 + ) -> None: + super().__init__() + self.cluster = cluster + self.n_workers_per_node = n_workers_per_node + self.min_nodes = min_nodes + self.max_nodes = max_nodes + self.max_restarts = max_restarts + self.run_id = str(time.time()) + + if cluster.creates_processes_externally and n_workers_per_node > 1: + print("WARNING: the cluster may already spawn worker " + "processes for you... Consider setting " + "'n_workers_per_node=1'") + + g_world_size = cluster.num_nodes() * self.n_workers_per_node + + store = TCPStore( + host_name=cluster.main_address, + port=cluster.main_port, # could conflict! + world_size=g_world_size, + is_master=cluster.global_rank() == 0, + timeout=datetime.timedelta(seconds=3) + ) + backend = C10dRendezvousBackend(store, self.run_id) + self.rdzv_handler = DynamicRendezvousHandler.from_backend( + run_id=self.run_id, + store=store, + backend=backend, + min_nodes=self.min_nodes, + max_nodes=self.max_nodes + ) + + def run( + self, + func: Callable, + args: Tuple = (), + redirect: bool = False, + log_dir: str = 'launcher_logs', + tee_ranks: Union[str, int, List[int]] = None + ) -> Any: + """Launches the distributed execution with Torch Elastic.""" + # Suppress all printing to console: + # redirects={0: Std.ALL} # do no print, but save to file. + # linked to Agent's log_dir + redirects = Std.ALL if redirect else Std.NONE + + # Fore back printing to console, while redirecting to file + # tee={0: Std.ALL} reactivates print to console + save to + # log file for RANK 0 + if tee_ranks == 'all': + tee = Std.ALL + elif tee_ranks is None: + tee = Std.NONE + elif isinstance(tee_ranks, int): + tee = {tee_ranks: Std.ALL} + elif isinstance(tee_ranks, list): + # tee_ranks is a list of int + tee = {rnk: Std.ALL for rnk in tee_ranks} + else: + raise ValueError(f"unrecognized 'tee_ranks={tee_ranks}'") + + spec = WorkerSpec( + role="worker", + local_world_size=self.n_workers_per_node, + entrypoint=func, + args=args, + rdzv_handler=self.rdzv_handler, + max_restarts=self.max_restarts, + # monitor_interval=monitor_interval, + redirects=redirects, + tee=tee + ) + + agent = LocalElasticAgent(spec, start_method="spawn", log_dir=log_dir) + # try: + run_result = agent.run() + if run_result.is_failed(): + print(f"worker 0 failed with: {run_result.failures[0]}") + result = None + else: + print(f"worker 0 return value is: {run_result.return_values[0]}") + result = run_result.return_values + # except Exception ex: + # # handle exception + return result + + +class TorchElasticLauncher(Launcher): + """Official Torch Elastic launcher.""" + + +class SimpleLauncher(Launcher): + """Simple launcher based on multiprocessing.""" + + +class DeepSpeedLauncher(Launcher): + """Official DeepSpeed launcher.""" diff --git a/experimental/strategy.py b/experimental/strategy.py new file mode 100644 index 00000000..60923400 --- /dev/null +++ b/experimental/strategy.py @@ -0,0 +1,145 @@ +import os +import abc +from typing import Any + +import torch +from torch import nn +from torch.nn.parallel import DistributedDataParallel +from torch import optim +from torch.utils.data import DataLoader, DistributedSampler +from torch.distributed import init_process_group + +# from lightning.pytorch.plugins.environments import ClusterEnvironment +from cluster import ClusterEnvironment + + +class Strategy(abc.ABC): + cluster: ClusterEnvironment + + @property + @abc.abstractmethod + def device(self) -> int: + """Device used by this worker""" + + @abc.abstractmethod + def setup(self) -> None: + """Setup the strategy once in a distributed environment.""" + + @abc.abstractmethod + def teardown(self) -> None: + """Frees the distributed strategy resources.""" + + @abc.abstractmethod + def is_main_worker(self) -> bool: + """Returns True if called from the main process of the pool.""" + + @abc.abstractmethod + def _is_env_setup(self) -> bool: + """Checks whether the distributed environment is correctly setup.""" + + @abc.abstractmethod + def distribute_model(self, model: Any) -> Any: + """Distributes a neural network.""" + + @abc.abstractmethod + def distribute_optimizer(self, optimizer: Any) -> Any: + """Distributes an optimizer.""" + + @abc.abstractmethod + def distribute_dataloader(self, dataloader: Any) -> Any: + """Distributes a dataloader.""" + + +class DDPStrategy(Strategy): + def __init__( + self, + cluster: ClusterEnvironment, + backend: str = 'nccl' + ) -> None: + super().__init__() + self.cluster = cluster + self.backend = backend + + @property + def device(self) -> int: + """Returns the local rank. Assumes one worker per GPU.""" + return self.cluster.local_rank() + + def setup(self) -> None: + """Setup the strategy in a distributed context.""" + if not self._is_env_setup(): + raise RuntimeError( + "Distributed environment not setup correctly. Use a launcher.") + + # Initializes the default distributed process group + # and the distributed package + init_process_group(backend=self.backend) + + def teardown(self) -> None: + torch.distributed.barrier() + torch.distributed.destroy_process_group() + + def _is_env_setup(self) -> bool: + if (os.environ.get('RANK') is not None): + # and torch.distributed.is_available()): + return True + return False + + def is_main_worker(self) -> bool: + return self.cluster.global_rank() == 0 + + def distribute_model(self, model: nn.Module) -> nn.Module: + model = model.to(f"cuda:{self.device}") + return DistributedDataParallel( + model, + device_ids=[self.device], + output_device=self.device + ) + + def distribute_optimizer( + self, + optimizer: optim.Optimizer + ) -> optim.Optimizer: + return optimizer + + def distribute_dataloader( + self, + dataloader: DataLoader, + shuffle: bool = True + ) -> DataLoader: + """Makes a torch DataLoader distributed by substituting its sampler.""" + sampler = DistributedSampler( + dataloader.dataset, + num_replicas=self.cluster.world_size(), + rank=self.cluster.global_rank(), + shuffle=shuffle + ) + # Recreate dataloader, with updated sampler + return DataLoader( + dataloader.dataset, + batch_size=dataloader.batch_size, + sampler=sampler, + num_workers=dataloader.num_workers, + collate_fn=dataloader.collate_fn, + pin_memory=dataloader.pin_memory, + drop_last=dataloader.drop_last, + timeout=dataloader.timeout, + worker_init_fn=dataloader.worker_init_fn, + multiprocessing_context=dataloader.multiprocessing_context, + generator=dataloader.generator, + prefetch_factor=dataloader.prefetch_factor, + persistent_workers=dataloader.persistent_workers, + pin_memory_device=dataloader.pin_memory_device + ) + + +class LocalStrategy(Strategy): + ... + + +class HorovodStrategy(Strategy): + ... + + +class DeepSpeedStrategy(Strategy): + ... From c98d4caa334f50acb689f697a5969eb9c738d532 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 30 Oct 2023 17:23:26 +0100 Subject: [PATCH 003/171] ADD: Torch Elastic Launcher --- experimental/cluster.py | 11 ++++ experimental/distrib_launcher.py | 16 +++--- experimental/launcher.py | 95 +++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 10 deletions(-) diff --git a/experimental/cluster.py b/experimental/cluster.py index 608bcf0b..981c8de2 100644 --- a/experimental/cluster.py +++ b/experimental/cluster.py @@ -32,6 +32,17 @@ def num_nodes(self) -> int: class LocalEnvironment(LightningEnvironment): + + def world_size(self) -> int: + if os.environ.get('WORLD_SIZE'): + return int(os.environ.get('WORLD_SIZE')) + return self._world_size + + def global_rank(self) -> int: + if os.environ.get('RANK'): + return int(os.environ.get('RANK')) + return self._global_rank + def num_nodes(self) -> int: """Returns the number of nodes allocated for the current job.""" return 1 diff --git a/experimental/distrib_launcher.py b/experimental/distrib_launcher.py index 60d7d9e6..16f3a249 100644 --- a/experimental/distrib_launcher.py +++ b/experimental/distrib_launcher.py @@ -5,7 +5,7 @@ from torch.utils.data import DataLoader, Dataset from strategy import Strategy, DDPStrategy -from launcher import DummyTorchElasticLauncher +from launcher import DummyTorchElasticLauncher, TorchElasticLauncher from cluster import ( LocalEnvironment, SLURMEnvironment, TorchElasticEnvironment @@ -48,6 +48,7 @@ def trainer_entrypoint_fn(a, strategy: Strategy): for epoch in range(2): for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") x = x.to(strategy.device) y = y.to(strategy.device) @@ -88,12 +89,13 @@ def trainer_entrypoint_fn(a, strategy: Strategy): print(cluster) # Instantiate Launcher - launcher = DummyTorchElasticLauncher( - cluster=cluster, - n_workers_per_node=NPROC_PRE_NODE, - min_nodes=MIN_NODES, - max_nodes=MAX_NODES - ) + # launcher = DummyTorchElasticLauncher( + # cluster=cluster, + # n_workers_per_node=NPROC_PRE_NODE, + # min_nodes=MIN_NODES, + # max_nodes=MAX_NODES + # ) + launcher = TorchElasticLauncher(nproc_per_node=NPROC_PRE_NODE) # Instantiate Strategy if (STRATEGY == 'ddp' diff --git a/experimental/launcher.py b/experimental/launcher.py index 1577b7d2..c2d35837 100644 --- a/experimental/launcher.py +++ b/experimental/launcher.py @@ -1,7 +1,8 @@ import datetime import abc import time -from typing import Callable, Tuple, Any, Union, List +import uuid +from typing import Callable, Tuple, Any, Union, List, Optional from torch.distributed.elastic.agent.server.local_elastic_agent import ( LocalElasticAgent @@ -16,11 +17,14 @@ from torch.distributed import TCPStore from torch.distributed.elastic.multiprocessing import Std +from torch.distributed.launcher.api import LaunchConfig, elastic_launch +from torch.distributed.run import config_from_args + # from lightning.pytorch.plugins.environments import ( # ClusterEnvironment, SLURMEnvironment, # TorchElasticEnvironment, LightningEnvironment # ) - +# from torch.distributed.argparse_util import check_env, env from cluster import ClusterEnvironment @@ -130,7 +134,92 @@ def run( class TorchElasticLauncher(Launcher): - """Official Torch Elastic launcher.""" + """ + Official Torch Elastic launcher. + Adapted from: + https://github.com/pytorch/pytorch/blob/main/torch/distributed/run.py + """ + + def __init__( + self, + nnodes: str = '1:1', + nproc_per_node: str = '1', + rdzv_backend: str = 'static', + rdzv_endpoint: str = '', + rdzv_id: str = 'none', + rdzv_conf: str = '', + standalone: bool = False, + max_restarts: int = 0, + monitor_interval: float = 5, + start_method: str = 'spawn', + role: str = 'default', + module: bool = False, + no_python: bool = False, + run_path: bool = False, + log_dir: Optional[str] = None, + redirects: str = '0', + tee: str = '0', + node_rank: int = 0, + master_addr: str = "127.0.0.1", + master_port: int = 29500, + local_addr: Optional[str] = None + ) -> None: + super().__init__() + # emulate CLI args + # TODO: include logic for 'action=check_env' or 'action=env' + self.nnodes = nnodes + self.nproc_per_node = nproc_per_node + self.rdzv_backend = rdzv_backend + self.rdzv_endpoint = rdzv_endpoint + self.rdzv_id = rdzv_id + self.rdzv_conf = rdzv_conf + self.standalone = standalone + self.max_restarts = max_restarts + self.monitor_interval = monitor_interval + self.start_method = start_method + self.role = role + self.module = module + self.no_python = no_python + self.run_path = run_path + self.log_dir = log_dir + self.redirects = redirects + self.tee = tee + self.node_rank = node_rank + self.master_addr = master_addr + self.master_port = master_port + self.local_addr = local_addr + # Placeholders + self.training_script = "placeholder.py" + self.training_script_args = [] + + def config_from_args( + self + ) -> Tuple[LaunchConfig, Union[Callable, str], List[str]]: + return config_from_args(self) + + def run( + self, + func: Callable, + args: Tuple = () + ): + if self.standalone: + self.rdzv_backend = "c10d" + self.rdzv_endpoint = "localhost:29400" + self.rdzv_id = str(uuid.uuid4()) + # log.info( + # f"\n**************************************\n" + # f"Rendezvous info:\n" + # f"--rdzv_backend={self.rdzv_backend} " + # f"--rdzv_endpoint={self.rdzv_endpoint} " + # f"--rdzv_id={self.rdzv_id}\n" + # f"**************************************\n" + # ) + + config, cmd, cmd_args = self.config_from_args() + elastic_launch( + config=config, + entrypoint=func, + )(*args) class SimpleLauncher(Launcher): From d4d3decebfbfa45bfdcbad4621c58b498674c621 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 30 Oct 2023 17:31:04 +0100 Subject: [PATCH 004/171] ADD: info on env vars --- experimental/launcher.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/experimental/launcher.py b/experimental/launcher.py index c2d35837..4670d27e 100644 --- a/experimental/launcher.py +++ b/experimental/launcher.py @@ -136,6 +136,8 @@ def run( class TorchElasticLauncher(Launcher): """ Official Torch Elastic launcher. + Does NOT support passing values as environment variables. + Adapted from: https://github.com/pytorch/pytorch/blob/main/torch/distributed/run.py """ @@ -215,7 +217,7 @@ def run( # f"**************************************\n" # ) - config, cmd, cmd_args = self.config_from_args() + config, _, _ = self.config_from_args() elastic_launch( config=config, entrypoint=func, From be7d2727161f7ec34e7bbf741892d1f24a4307dc Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 31 Oct 2023 16:40:06 +0100 Subject: [PATCH 005/171] ADD: distributed tooling and examples --- experimental/cluster.py | 59 ++++++++++-- experimental/distrib_launcher.py | 71 ++++++++------- experimental/distributed_tools.py | 68 ++++++++++++++ experimental/example_0.py | 85 ++++++++++++++++++ experimental/example_1.py | 106 ++++++++++++++++++++++ experimental/example_2.py | 107 ++++++++++++++++++++++ experimental/example_3.py | 77 ++++++++++++++++ experimental/launcher.py | 89 +++++++++++++++--- experimental/launcher_factory.py | 144 ++++++++++++++++++++++++++++++ experimental/strategy.py | 13 ++- 10 files changed, 765 insertions(+), 54 deletions(-) create mode 100644 experimental/distributed_tools.py create mode 100644 experimental/example_0.py create mode 100644 experimental/example_1.py create mode 100644 experimental/example_2.py create mode 100644 experimental/example_3.py create mode 100644 experimental/launcher_factory.py diff --git a/experimental/cluster.py b/experimental/cluster.py index 981c8de2..78ae8ead 100644 --- a/experimental/cluster.py +++ b/experimental/cluster.py @@ -1,5 +1,6 @@ import abc import os +import time from lightning.pytorch.plugins.environments import ( ClusterEnvironment as LightningClusterEnvironment, @@ -14,13 +15,23 @@ class ClusterEnvironment(LightningClusterEnvironment): def num_nodes(self) -> int: """Returns the number of nodes allocated for the current job.""" + @abc.abstractmethod + def job_id(self) -> str: + """Returns the current job ID inferred from the cluster.""" + class SLURMEnvironment(LightningSLURMEnvironment): def num_nodes(self) -> int: """Returns the number of nodes allocated for the current job.""" if os.environ.get('SLURM_JOB_NUM_NODES'): return int(os.environ['SLURM_JOB_NUM_NODES']) - return int(os.environ['SLURM_NNODES']) + if os.environ.get('SLURM_NNODES'): + return int(os.environ['SLURM_NNODES']) + raise RuntimeError('Number of nodes not found in SLURM env variables') + + def job_id(self) -> str: + """Returns the current job ID inferred from the cluster.""" + return os.environ['SLURM_JOB_ID'] class TorchElasticEnvironment(LightningTorchElasticEnvironment): @@ -30,19 +41,57 @@ def num_nodes(self) -> int: lwsize = int(os.environ['LOCAL_WORLD_SIZE']) return gwsize//lwsize + def job_id(self) -> str: + """Returns the current job ID inferred from the cluster.""" + return os.environ['TORCHELASTIC_RUN_ID'] + class LocalEnvironment(LightningEnvironment): + _job_id: str = None + def world_size(self) -> int: - if os.environ.get('WORLD_SIZE'): - return int(os.environ.get('WORLD_SIZE')) + # if os.environ.get('WORLD_SIZE'): + # return int(os.environ.get('WORLD_SIZE')) + print( + "WARNING: world_size() method in 'LocalEnvironment' returns " + f"a fixed-value placeholder world_size={self._world_size}. " + "Use it carefully!" + ) return self._world_size def global_rank(self) -> int: - if os.environ.get('RANK'): - return int(os.environ.get('RANK')) + # if os.environ.get('RANK'): + # return int(os.environ.get('RANK')) + print( + "WARNING: global_rank() method in 'LocalEnvironment' returns " + f"a fixed-value placeholder global_rank={self._global_rank}. " + "Use it carefully!" + ) return self._global_rank def num_nodes(self) -> int: """Returns the number of nodes allocated for the current job.""" return 1 + + def job_id(self) -> str: + """Returns the current job ID inferred from the cluster.""" + if self._job_id is None: + self._job_id = str(time.time()) + return self._job_id + + +def detect_cluster() -> ClusterEnvironment: + """Defines a protocol to select the ClusterEnvironment + depending on availability and priority. + """ + + if SLURMEnvironment.detect(): + cluster = SLURMEnvironment() + elif TorchElasticEnvironment.detect(): + cluster = TorchElasticEnvironment() + elif LocalEnvironment.detect(): + cluster = LocalEnvironment() + else: + raise NotImplementedError("Unrecognized cluster env") + return cluster diff --git a/experimental/distrib_launcher.py b/experimental/distrib_launcher.py index 16f3a249..d8f4e881 100644 --- a/experimental/distrib_launcher.py +++ b/experimental/distrib_launcher.py @@ -6,10 +6,12 @@ from strategy import Strategy, DDPStrategy from launcher import DummyTorchElasticLauncher, TorchElasticLauncher -from cluster import ( - LocalEnvironment, SLURMEnvironment, - TorchElasticEnvironment +from launcher_factory import ( + LauncherFactory, + SimpleLauncherFactory, + TorchElasticLauncherFactory ) +from distributed_tools import DistributedTooling class UniformRndDataset(Dataset): @@ -65,6 +67,7 @@ def trainer_entrypoint_fn(a, strategy: Strategy): return 123 +LAUNCHER = 'torch-elastic-no' STRATEGY = 'ddp' RUN_ID = "my_run_id" @@ -74,36 +77,40 @@ def trainer_entrypoint_fn(a, strategy: Strategy): MAX_RESTARTS = 2 if __name__ == "__main__": - # STRATEGY BUILDER - - # Instantiate ClusterEnv - if SLURMEnvironment.detect(): - cluster = SLURMEnvironment() - elif TorchElasticEnvironment.detect(): - cluster = TorchElasticEnvironment() - elif LocalEnvironment.detect(): - cluster = LocalEnvironment() - else: - raise NotImplementedError("Unrecognized cluster env") - - print(cluster) - - # Instantiate Launcher - # launcher = DummyTorchElasticLauncher( - # cluster=cluster, - # n_workers_per_node=NPROC_PRE_NODE, - # min_nodes=MIN_NODES, - # max_nodes=MAX_NODES + # # STRATEGY BUILDER + + # # Instantiate Launcher Factory + # # launcher = DummyTorchElasticLauncher( + # # n_workers_per_node=NPROC_PRE_NODE, + # # min_nodes=MIN_NODES, + # # max_nodes=MAX_NODES + # # ) + # # launcher = TorchElasticLauncher( + # # rdzv_id=RUN_ID, + # # nproc_per_node=NPROC_PRE_NODE, + # # nnodes=f"{MIN_NODES}:{MAX_NODES}", + # # max_restarts=MAX_RESTARTS + # # ) + # if LAUNCHER == 'torch-elastic': + # launcher_builder: LauncherFactory = TorchElasticLauncherFactory() + # else: + # launcher_builder: LauncherFactory = SimpleLauncherFactory() + + # # Instantiate launcher + # launcher = launcher_builder.createLauncher( + # n_workers_per_node=NPROC_PRE_NODE # ) - launcher = TorchElasticLauncher(nproc_per_node=NPROC_PRE_NODE) - - # Instantiate Strategy - if (STRATEGY == 'ddp' - and torch.cuda.is_available() - and torch.cuda.device_count() > 1): - strategy = DDPStrategy(cluster=cluster, backend='nccl') - else: - raise NotImplementedError + + # # Instantiate Strategy + # if (STRATEGY == 'ddp' + # and torch.cuda.is_available() + # and torch.cuda.device_count() > 1): + # strategy = DDPStrategy(cluster=None, backend='nccl') + # else: + # raise NotImplementedError + + dist_tools = DistributedTooling(n_workers_per_node=NPROC_PRE_NODE) + launcher, strategy = dist_tools.getTools('ddp') # CLIENT CODE # Launch training from launcher diff --git a/experimental/distributed_tools.py b/experimental/distributed_tools.py new file mode 100644 index 00000000..83bf241f --- /dev/null +++ b/experimental/distributed_tools.py @@ -0,0 +1,68 @@ +from typing import Tuple +import abc + +from launcher import Launcher +from strategy import Strategy, DDPStrategy +from launcher_factory import TorchElasticLauncherFactory + + +class Assembler(abc.ABC): + """Abstract Assembler class.""" + + +class DistributedTooling(Assembler): + """ + Assembles a set of objects used to enable distributed ML. + Suggests working presets of Launcher and Strategy, providing + an easy entry point for the end user. + """ + + def __init__(self, n_workers_per_node: int = 1) -> None: + super().__init__() + self.n_workers_per_node = n_workers_per_node + + def getTools(self, strategy: str) -> Tuple[Launcher, Strategy]: + if strategy == 'ddp': + return self.getTorchDDPTools() + if strategy == 'deepspeed': + return self.getDeepSpeedTools() + if strategy == 'horovod': + return self.getHorovodTools() + raise ValueError(f"Unrecognized strategy={strategy}") + + def getTorchDDPTools(self) -> Tuple[Launcher, Strategy]: + """ + Returns a suggested preset of Launcher + Strategy + for torch distributed data parallel. + """ + import torch + if not torch.cuda.is_available(): + raise RuntimeError( + "Torch DDP cannot be used. GPUs not available." + ) + if not torch.cuda.device_count() > 1: + raise RuntimeError( + "Torch DDP cannot be used. Only one GPU is available." + ) + launcher_builder = TorchElasticLauncherFactory() + elastic_launcher = launcher_builder.createLauncher( + n_workers_per_node=self.n_workers_per_node + ) + strategy = DDPStrategy(backend='nccl') + return elastic_launcher, strategy + + def getDeepSpeedTools(self) -> Tuple[Launcher, Strategy]: + """ + Returns a suggested preset of Launcher + Strategy + for DeepSpeed distributed ML. + """ + # TODO: complete + raise NotImplementedError + + def getHorovodTools(self) -> Tuple[Launcher, Strategy]: + """ + Returns a suggested preset of Launcher + Strategy + for Horovod distributed ML. + """ + # TODO: complete + raise NotImplementedError diff --git a/experimental/example_0.py b/experimental/example_0.py new file mode 100644 index 00000000..d18a40db --- /dev/null +++ b/experimental/example_0.py @@ -0,0 +1,85 @@ +""" +Run this with torchrun +""" + +import os + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset + +from strategy import Strategy, DDPStrategy, HorovodStrategy + + +class UniformRndDataset(Dataset): + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn(a, strategy: Strategy): + """Dummy training function.""" + strategy.setup() + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + +STRATEGY = 'ddp' + + +if __name__ == "__main__": + + # Instantiate Strategy + if STRATEGY == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPStrategy(cluster=None, backend='nccl') + elif STRATEGY == 'horovod': + strategy = HorovodStrategy() + else: + raise NotImplementedError + + # Launch distributed training + trainer_entrypoint_fn("foobar", strategy) diff --git a/experimental/example_1.py b/experimental/example_1.py new file mode 100644 index 00000000..3cc2e452 --- /dev/null +++ b/experimental/example_1.py @@ -0,0 +1,106 @@ +""" +Introduction of launcher. Torchrun is not needed anymore. +""" +import os + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset + +from strategy import Strategy, DDPStrategy, HorovodStrategy +from launcher import TorchElasticLauncher, SimpleLauncher + + +class UniformRndDataset(Dataset): + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn(a, strategy: Strategy): + """Dummy training function.""" + strategy.setup() + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + +LAUNCHER = 'torch-elastic' +STRATEGY = 'ddp' +RUN_ID = "my_run_id" +MIN_NODES = 1 +MAX_NODES = 1 +NPROC_PRE_NODE = 4 +MAX_RESTARTS = 2 + +if __name__ == "__main__": + + # Instantiate Launcher Factory + if LAUNCHER == 'torch-elastic': + launcher = TorchElasticLauncher( + rdzv_id=RUN_ID, + nproc_per_node=NPROC_PRE_NODE, + nnodes=f"{MIN_NODES}:{MAX_NODES}", + max_restarts=MAX_RESTARTS + ) + elif LAUNCHER == 'simple-launcher': + launcher = SimpleLauncher( + nproc_per_node=NPROC_PRE_NODE + ) + else: + raise NotImplementedError + + # Instantiate Strategy + if STRATEGY == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPStrategy(cluster=None, backend='nccl') + elif STRATEGY == 'horovod': + strategy = HorovodStrategy() + else: + raise NotImplementedError + + # CLIENT CODE + # Launch training from launcher + launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/example_2.py b/experimental/example_2.py new file mode 100644 index 00000000..14685753 --- /dev/null +++ b/experimental/example_2.py @@ -0,0 +1,107 @@ +""" +Unified interface for launchers. +Most of the complexity is hidden inside "factory" classes. +""" + +import os + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset + +from strategy import Strategy, DDPStrategy, HorovodStrategy +from launcher_factory import ( + LauncherFactory, + SimpleLauncherFactory, + TorchElasticLauncherFactory +) + + +class UniformRndDataset(Dataset): + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn(a, strategy: Strategy): + """Dummy training function.""" + strategy.setup() + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + +LAUNCHER = 'torch-elastic' +STRATEGY = 'ddp' +NPROC_PRE_NODE = 4 + +if __name__ == "__main__": + # STRATEGY BUILDER + + # Instantiate Launcher Factory + if LAUNCHER == 'torch-elastic': + launcher_builder: LauncherFactory = TorchElasticLauncherFactory() + elif LAUNCHER == 'simple-launcher': + launcher_builder: LauncherFactory = SimpleLauncherFactory() + else: + raise NotImplementedError + + # Instantiate launcher + launcher = launcher_builder.createLauncher( + n_workers_per_node=NPROC_PRE_NODE + ) + + # Instantiate Strategy + if STRATEGY == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPStrategy(cluster=None, backend='nccl') + elif STRATEGY == 'horovod': + strategy = HorovodStrategy() + else: + raise NotImplementedError + + # CLIENT CODE + # Launch training from launcher + launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/example_3.py b/experimental/example_3.py new file mode 100644 index 00000000..d38dd78c --- /dev/null +++ b/experimental/example_3.py @@ -0,0 +1,77 @@ +""" +Hide the selection of launcher and strategy inside a class. +""" +import os + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset + +from strategy import Strategy +from distributed_tools import DistributedTooling + + +class UniformRndDataset(Dataset): + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn(a, strategy: Strategy): + """Dummy training function.""" + strategy.setup() + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + +STRATEGY = 'ddp' +NPROC_PRE_NODE = 4 + + +if __name__ == "__main__": + dist_tools = DistributedTooling(n_workers_per_node=NPROC_PRE_NODE) + launcher, strategy = dist_tools.getTools('ddp') + + # CLIENT CODE + # Launch training from launcher + launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/launcher.py b/experimental/launcher.py index 4670d27e..d9733b8f 100644 --- a/experimental/launcher.py +++ b/experimental/launcher.py @@ -1,4 +1,6 @@ import datetime +import os +import shutil import abc import time import uuid @@ -15,24 +17,19 @@ C10dRendezvousBackend ) from torch.distributed import TCPStore -from torch.distributed.elastic.multiprocessing import Std +from torch.distributed.elastic.multiprocessing import Std, start_processes from torch.distributed.launcher.api import LaunchConfig, elastic_launch from torch.distributed.run import config_from_args -# from lightning.pytorch.plugins.environments import ( -# ClusterEnvironment, SLURMEnvironment, -# TorchElasticEnvironment, LightningEnvironment -# ) -# from torch.distributed.argparse_util import check_env, env -from cluster import ClusterEnvironment +from cluster import ClusterEnvironment, detect_cluster class Launcher(abc.ABC): cluster: ClusterEnvironment @abc.abstractmethod - def run(*args): + def run(self, *args) -> Any: """Launches the distributed execution.""" @@ -41,14 +38,16 @@ class DummyTorchElasticLauncher(Launcher): def __init__( self, - cluster: ClusterEnvironment, + cluster: Optional[ClusterEnvironment] = None, n_workers_per_node: int = 1, min_nodes: int = 1, max_nodes: int = 1, max_restarts: int = 1 ) -> None: super().__init__() - self.cluster = cluster + # detect_cluster() is preferred + self.cluster = cluster if cluster is not None else detect_cluster() + print(f"DummyTorchElasticLauncher with cluster '{self.cluster}'") self.n_workers_per_node = n_workers_per_node self.min_nodes = min_nodes self.max_nodes = max_nodes @@ -85,7 +84,7 @@ def run( redirect: bool = False, log_dir: str = 'launcher_logs', tee_ranks: Union[str, int, List[int]] = None - ) -> Any: + ) -> List[Any]: """Launches the distributed execution with Torch Elastic.""" # Suppress all printing to console: # redirects={0: Std.ALL} # do no print, but save to file. @@ -203,7 +202,7 @@ def run( self, func: Callable, args: Tuple = () - ): + ) -> Any: if self.standalone: self.rdzv_backend = "c10d" self.rdzv_endpoint = "localhost:29400" @@ -225,8 +224,72 @@ def run( class SimpleLauncher(Launcher): - """Simple launcher based on multiprocessing.""" + """Simple launcher based on multiprocessing. + Use ONLY for single node applications. + """ + + def __init__( + self, + nproc_per_node: int, + run_id: Optional[str] = None, + master_addr: str = "127.0.0.1", + master_port: int = 29500 + ) -> None: + super().__init__() + self.nproc_per_node = nproc_per_node + self.run_id = run_id if run_id is not None else f"RunID:{time.time()}" + self.master_addr = master_addr + self.master_port = master_port + self.log_dir = f'{self.__class__.__name__}_logs' + if os.path.exists(self.log_dir): + shutil.rmtree(self.log_dir) + os.makedirs(self.log_dir) + + def run( + self, + func: Callable, + args: Tuple = () + ) -> Any: + # Adapted from: + # https://pytorch.org/docs/stable/elastic/multiprocessing.html + w_args = {i: args for i in range(self.nproc_per_node)} + # Emulates the env variables set by torch Elastic + w_envs = { + i: dict( + RANK=str(i), + LOCAL_RANK=str(i), + GROUP_RANK=str(0), + ROLE_RANK=str(i), + WORLD_SIZE=str(self.nproc_per_node), + LOCAL_WORLD_SIZE=str(self.nproc_per_node), + ROLE_WORLD_SIZE=str(self.nproc_per_node), + TORCHELASTIC_RUN_ID=str(self.run_id), + MASTER_ADDR=str(self.master_addr), + MASTER_PORT=str(self.master_port) + ) + for i in range(self.nproc_per_node) + } + ctx = start_processes( + name=self.__class__.__name__, + entrypoint=func, + args=w_args, + envs=w_envs, + log_dir=self.log_dir + ) + ctx.wait() + return ctx.return_values class DeepSpeedLauncher(Launcher): """Official DeepSpeed launcher.""" + + def __init__(self) -> None: + super().__init__() + + def run( + self, + func: Callable, + args: Tuple = () + ) -> Any: + # TODO: complete + raise NotImplementedError diff --git a/experimental/launcher_factory.py b/experimental/launcher_factory.py new file mode 100644 index 00000000..fce12a0c --- /dev/null +++ b/experimental/launcher_factory.py @@ -0,0 +1,144 @@ +""" +Factories to instantiate Launcher classes. +They introduce a level of indirection to provide a unified interface +for all the launchers. The common interface is provided by the +`createLauncher` factory method. +""" + +from typing import Optional, Dict, Any +import abc + +from launcher import ( + Launcher, + TorchElasticLauncher, + SimpleLauncher, + DeepSpeedLauncher +) +from cluster import detect_cluster + + +class LauncherFactory(abc.ABC): + """ + Factory class to instantiate a Launcher classes. + It introduces a level of indirection to provide a unified interface + for all the launchers. The common interface is provided by the + `createLauncher` factory method. + """ + + def createLauncher( + self, + n_workers_per_node: int, + run_id: Optional[str] = None, + master_addr: Optional[str] = None, + master_port: Optional[int] = None, + **kwargs + ) -> Launcher: + """ + Simplifies the instantiation of a Launcher. + Advanced configuration is pre-computed in the body + of this method, leaving few parameters to the end user. + """ + + +class TorchElasticLauncherFactory(LauncherFactory): + """Factory class to instantiate a TorchElasticLauncher class.""" + + def createLauncher( + self, + n_workers_per_node: int, + run_id: Optional[str] = None, + master_addr: Optional[str] = None, + master_port: Optional[int] = None, + **kwargs + ) -> Launcher: + """ + Simplifies the instantiation of a TorchElasticLauncher. + Advanced configuration is pre-computed in the body + of this method, leaving few parameters to the end user. + """ + cluster = detect_cluster() + + kwargs['nproc_per_node'] = n_workers_per_node + # If given, propagate the args + if run_id: + kwargs['rdzv_id'] = run_id + if master_addr: + kwargs['master_addr'] = master_addr + if master_port: + kwargs['master_port'] = master_port + + # Compute and add TorchElastic specific args, if not + # provided as **kwargs + n_nodes = cluster.num_nodes() + safe_add(kwargs, 'nnodes', f"{n_nodes}:{n_nodes}") + safe_add(kwargs, 'rdzv_id', cluster.job_id()) + is_host_flag = '1' if cluster.node_rank() == 0 else '0' + safe_add(kwargs, 'rdzv_conf', f'is_host={is_host_flag}') + safe_add(kwargs, 'rdzv_backend', 'c10d') + safe_add( + kwargs, + 'rdzv_endpoint', + f'{cluster.main_address}:{cluster.main_port}' + ) + safe_add(kwargs, 'max_restarts', 3) + + return TorchElasticLauncher(**kwargs) + + +class SimpleLauncherFactory(LauncherFactory): + """Factory class to instantiate a SimpleLauncherFactory class.""" + + def createLauncher( + self, + n_workers_per_node: int, + run_id: Optional[str] = None, + master_addr: Optional[str] = None, + master_port: Optional[int] = None, + **kwargs + ) -> Launcher: + """ + Simplifies the instantiation of a SimpleLauncher. + Advanced configuration is pre-computed in the body + of this method, leaving few parameters to the end user. + """ + + kwargs['nproc_per_node'] = n_workers_per_node + # If given, propagate the args + if run_id: + kwargs['run_id'] = run_id + if master_addr: + kwargs['master_addr'] = master_addr + if master_port: + kwargs['master_port'] = master_port + + return SimpleLauncher(**kwargs) + + +class DeepSpeedLauncherFactory(LauncherFactory): + """Factory class to instantiate a DeepSpeedLauncher class.""" + + def createLauncher( + self, + n_workers_per_node: int, + run_id: Optional[str] = None, + master_addr: Optional[str] = None, + master_port: Optional[int] = None, + **kwargs + ) -> Launcher: + """ + Simplifies the instantiation of a DeepSpeedLauncher. + Advanced configuration is pre-computed in the body + of this method, leaving few parameters to the end user. + """ + # TODO: complete + raise NotImplementedError + return DeepSpeedLauncher(...) + + +def safe_add(map: Dict, key: str, value: Any) -> None: + """ + Add a key-value pair to a dict if the key + is not already present. + """ + if map.get(key) is None: + map[key] = value diff --git a/experimental/strategy.py b/experimental/strategy.py index 60923400..8fad3429 100644 --- a/experimental/strategy.py +++ b/experimental/strategy.py @@ -1,6 +1,6 @@ import os import abc -from typing import Any +from typing import Any, Optional import torch from torch import nn @@ -10,7 +10,7 @@ from torch.distributed import init_process_group # from lightning.pytorch.plugins.environments import ClusterEnvironment -from cluster import ClusterEnvironment +from cluster import ClusterEnvironment, detect_cluster class Strategy(abc.ABC): @@ -53,8 +53,8 @@ def distribute_dataloader(self, dataloader: Any) -> Any: class DDPStrategy(Strategy): def __init__( self, - cluster: ClusterEnvironment, - backend: str = 'nccl' + backend: str = 'nccl', + cluster: Optional[ClusterEnvironment] = None ) -> None: super().__init__() self.cluster = cluster @@ -71,6 +71,11 @@ def setup(self) -> None: raise RuntimeError( "Distributed environment not setup correctly. Use a launcher.") + # detect_cluster() is preferred + if self.cluster is None: + self.cluster = detect_cluster() + print(f"DDPStrategy executed on '{self.cluster}' cluster") + # Initializes the default distributed process group # and the distributed package init_process_group(backend=self.backend) From 56e0724e1d3f5ae241cf2bf38652317312039a44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20R=C3=BCttgers?= Date: Fri, 3 Nov 2023 09:07:58 +0100 Subject: [PATCH 006/171] new folder --- experimental/trainer/general_startscript | 136 ++++++ experimental/trainer/general_trainer.py | 548 +++++++++++++++++++++++ 2 files changed, 684 insertions(+) create mode 100755 experimental/trainer/general_startscript create mode 100755 experimental/trainer/general_trainer.py diff --git a/experimental/trainer/general_startscript b/experimental/trainer/general_startscript new file mode 100755 index 00000000..a39d034f --- /dev/null +++ b/experimental/trainer/general_startscript @@ -0,0 +1,136 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TorchTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# parallelization strategy (DDP, HVD, DS) +strategy='DS' + +# parameters +debug=false # do debug +bs=32 # batch-size +epochs=4 # epochs +lr=0.01 # learning rate + +# AT +dataDir="/p/scratch/raise-ctp2/data_MNIST/" + +# set modules +ml --force purge + +ml Stages/2022 NVHPC/22.1 ParaStationMPI/5.5.0-1-mt NCCL/2.11.4-CUDA-11.5 cuDNN/8.3.1.22-CUDA-11.5 +ml Python/3.9.6 CMake HDF5 PnetCDF libaio/0.3.112 mpi-settings/CUDA + +# set env +source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/envAI_hdfml/bin/activate + +# sleep a sec +sleep 1 + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +COMMAND="general_trainer.py" + +#launch +if [[ $strategy == *"HVD"* ]]; +then + EXEC="$COMMAND \ + --strat $strategy \ + --batch-size $bs \ + --epochs $epochs \ + --lr $lr \ + --data-dir $dataDir" + + srun --cpu-bind=none python3 -u $EXEC + +elif [[ $strategy == *"DDP"* ]]; +then + EXEC="$COMMAND \ + --strategy $strategy \ + --batch-size $bs \ + --epochs $epochs \ + --lr $lr \ + --nworker $SLURM_CPUS_PER_TASK \ + --data-dir $dataDir" + + srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $EXEC" + +else + EXEC="$COMMAND \ + --strategy $strategy \ + --batch-size $bs \ + --epochs $epochs \ + --lr $lr \ + --nworker $SLURM_CPUS_PER_TASK \ + --data-dir $dataDir" + + #### do not change this part + # create node-list + sysN=$(eval "scontrol show hostnames") + for i in $sysN; do + x+=\"$i\":[$CUDA_VISIBLE_DEVICES], + done + WID=`echo {${x::-1}} | base64 -w 0` + + # modify config file with parameters + sed -i "2s|.*| \"train_micro_batch_size_per_gpu\": ${bs},|" DS_config.json + sed -i "7s|.*| \"lr\": ${lr}|" DS_config.json + #### + + # launch + srun python -m deepspeed.launcher.launch \ + --node_rank $SLURM_PROCID \ + --master_addr ${SLURMD_NODENAME}i \ + --master_port 29500 \ + --world_info $WID \ + $EXEC --deepspeed_mpi --deepspeed_config DS_config.json + +fi + diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py new file mode 100755 index 00000000..e7e51003 --- /dev/null +++ b/experimental/trainer/general_trainer.py @@ -0,0 +1,548 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# author: RS, adapted from https://gitlab.jsc.fz-juelich.de/CoE-RAISE/FZJ/ai4hpc +# version: 211029a + +# std libs +import argparse, sys, os, time, numpy as np, random + +# ml libs +import deepspeed +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms + +# parsed settings +def pars_ini(): + global args + parser = argparse.ArgumentParser(description='PyTorch MNIST Example') + + # IO parsers + parser.add_argument('--data-dir', default='./', + help='location of the training dataset in the local filesystem') + parser.add_argument('--restart-int', type=int, default=10, + help='restart interval per epoch (default: 10)') + + # model parsers + parser.add_argument('--strategy', type=str, default='DDP', + help='strategy for parallelization (DDP, HVD, DS)') + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--concM', type=int, default=100, + help='conc MNIST to this factor (default: 100)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # debug parsers + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run with seed (default: False)') + parser.add_argument('--nseed', type=int, default=0, + help='seed integer for reproducibility (default: 0)') + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + + # parallel parsers + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--nworker', type=int, default=0, + help='number of workers in DataLoader (default: 0 - only main)') + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + + try: + parser = deepspeed.add_config_arguments(parser) + except: + pass + + args = parser.parse_args() + + +class ddpDistributedTrainer: + #def __init__(self, model): + # self.model=model + + def distributedModel(self,model,device): + if torch.cuda.is_available(): + dist_model = nn.parallel.DistributedDataParallel(model,\ + device_ids=[device], output_device=device) + else: + dist_model = model + + return dist_model + + def initBackend(self): + if torch.cuda.is_available(): + dist.init_process_group(backend=args.backend) + + def cleanUp(self): + if torch.cuda.is_available(): + dist.barrier() + dist.destroy_process_group() + +class dsDistributedTrainer: + #def __init__(self, model): + # self.model=model + + def distributedModel(self,model,train_dataset): + # 1) Distributed model + # 2) DeepSpeed optimizer + # 3) Distributed data loader + distrib_model, __, train_loader, __ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) + + def initBackend(self): + deepspeed.init_distributed(dist_backend=args.backend) + + def cleanUp(self): + deepspeed.sys.exit() + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) + +# train loop +def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): + model.train() + t_list = [] + loss_acc=0 + if grank==0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = time.perf_counter() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and grank==0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: {loss.item():.6f}') + t_list.append(time.perf_counter() - t) + loss_acc+= loss.item() + if grank==0: + print('TIMER: train time', sum(t_list) / len(t_list),'s') + return loss_acc + +# test loop +def test(model, device, test_loader, grank, gwsize, args): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if grank==0: + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test + + +# save state of the training +def save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best): + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = par_allgather_obj(is_best,gwsize) + + + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m)==True)[0][0] + + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer' : optimizer.state_dict()} + + # write on worker with is_best + if grank == is_best_rank: + torch.save(state,'./'+res_name) + print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer' : optimizer.state_dict()} + + torch.save(state,'./'+res_name) + print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + + +# deterministic dataloader +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +# PARALLEL HELPERS +# sum of field over GPGPUs +def par_sum(field): + res = torch.tensor(field).float() + res = res.cuda() if args.cuda else res.cpu() + dist.all_reduce(res,op=dist.ReduceOp.SUM,group=None,async_op=True).wait() + return res + +# mean of field over GPGPUs +def par_mean(field,gwsize): + res = torch.tensor(field).float() + res = res.cuda() if args.cuda else res.cpu() + dist.all_reduce(res,op=dist.ReduceOp.SUM,group=None,async_op=True).wait() + res/=gwsize + return res + +# max(field) over GPGPUs +def par_max(field): + res = torch.tensor(field).float() + res = res.cuda() if args.cuda else res.cpu() + dist.all_reduce(res,op=dist.ReduceOp.MAX,group=None,async_op=True).wait() + return res + +# min(field) over GPGPUs +def par_min(field): + res = torch.tensor(field).float() + res = res.cuda() if args.cuda else res.cpu() + dist.all_reduce(res,op=dist.ReduceOp.MIN,group=None,async_op=True).wait() + return res + +# reduce field to destination with an operation +def par_reduce(field,dest,oper): + ''' + dest=0 will send the result to GPU on rank 0 (any rank is possible) + op=oper has to be in form "dist.ReduceOp.", where is + SUM + PRODUCT + MIN + MAX + BAND + BOR + BXOR + ''' + res = torch.Tensor([field]) + res = res.cuda() if args.cuda else res.cpu() + dist.reduce(res,dst=dest,op=oper,group=None,async_op=False) + return res + +# gathers tensors from the whole group in a list (to all workers) +def par_allgather(field,gwsize): + if args.cuda: + sen = torch.Tensor([field]).cuda() + res = [torch.Tensor([field]).cuda() for i in range(gwsize)] + else: + sen = torch.Tensor([field]) + res = [torch.Tensor([field]) for i in range(gwsize)] + dist.all_gather(res,sen,group=None) + return res + +# gathers any object from the whole group in a list (to all workers) +def par_allgather_obj(obj,gwsize): + res = [None]*gwsize + dist.all_gather_object(res,obj,group=None) + return res +# +# +# MAIN +# +# +def main(): + # get parse args + print("check_0", flush=True) + pars_ini() + + print("check_1", flush=True) + + # check CUDA availibility + args.cuda = not args.no_cuda and torch.cuda.is_available() + + #Strategy for distributed training + if args.strategy=='DDP': + + my_trainer = ddpDistributedTrainer() + + elif args.strategy=='DS': + + my_trainer = dsDistributedTrainer() + + # limit # of CPU threads to be used per worker + torch.set_num_threads(1) + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + + # initializes the distributed backend which will take care of sychronizing nodes/GPUs + my_trainer.initBackend() + +# deterministic testrun + if args.testrun: + torch.manual_seed(args.nseed) + g = torch.Generator() + g.manual_seed(args.nseed) + + # get job rank info - rank==0 master gpu + if torch.cuda.is_available(): + lwsize = torch.cuda.device_count() if args.cuda else 0 # local world size - per node + gwsize = dist.get_world_size() # global world size - per run + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank()%lwsize # local rank - assign per node + else: + gwsize = 1 + grank = 0 + + # some debug + if grank==0: + print('TIMER: initialise:', time.time()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:',sys.version,'\n') + + print('DEBUG: IO parsers:') + print('DEBUG: args.data_dir:',args.data_dir) + print('DEBUG: args.restart_int:',args.restart_int,'\n') + + print('DEBUG: model parsers:') + print('DEBUG: args.batch_size:',args.batch_size) + print('DEBUG: args.epochs:',args.epochs) + print('DEBUG: args.lr:',args.lr) + print('DEBUG: args.concM:',args.concM) + print('DEBUG: args.momentum:',args.momentum) + print('DEBUG: args.shuff:',args.shuff,'\n') + + print('DEBUG: debug parsers:') + print('DEBUG: args.testrun:',args.testrun) + print('DEBUG: args.nseed:',args.nseed) + print('DEBUG: args.log_int:',args.log_int,'\n') + + print('DEBUG: parallel parsers:') + print('DEBUG: args.backend:',args.backend) + print('DEBUG: args.nworker:',args.nworker) + print('DEBUG: args.prefetch:',args.prefetch) + print('DEBUG: args.cuda:',args.cuda,'\n') + + # encapsulate the model on the GPU assigned to the current process + device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu',lrank) + if args.cuda: + torch.cuda.set_device(lrank) + # deterministic testrun + if args.testrun: + torch.cuda.manual_seed(args.nseed) + +# read data + data_dir = args.data_dir + mnist_scale = args.concM + largeData = [] + for i in range(mnist_scale): + largeData.append( + datasets.MNIST(data_dir, train=True, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) + + # concat data + train_dataset = torch.utils.data.ConcatDataset(largeData) + + mnist_scale = args.concM + largeData = [] + for i in range(mnist_scale): + largeData.append( + datasets.MNIST(data_dir, train=False, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) + + # concat data + test_dataset = torch.utils.data.ConcatDataset(largeData) + + # restricts data loading to a subset of the dataset exclusive to the current process + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + test_sampler = torch.utils.data.distributed.DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + +# distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker>1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + else: + train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size) + + if grank==0: + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = Net().to(device) + + # distribute model to workers + distrib_model = my_trainer.distributedModel(model, device) + + # optimizer + optimizer = torch.optim.SGD(distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + + +# resume state + start_epoch = 1 + best_acc = np.Inf + res_name='checkpoint.pth.tar' + if os.path.isfile(res_name): + try: + if torch.cuda.is_available(): + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else {'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) + else: + checkpoint = torch.load(program_dir+'/'+res_name) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if torch.cuda.is_available(): + if grank==0: + print(f'WARNING: restarting from {start_epoch} epoch') + else: + print(f'WARNING: restarting from {start_epoch} epoch') + except: + if torch.cuda.is_available(): + if grank==0: + print(f'WARNING: restart file cannot be loaded, restarting!') + else: + print(f'WARNING: restart file cannot be loaded, restarting!') + + if start_epoch>=args.epochs: + if torch.cuda.is_available(): + if grank==0: + print(f'WARNING: given epochs are less than the one in the restart file!\n' + f'WARNING: SYS.EXIT is issued') + + my_trainer.cleanUp() + sys.exit() + else: + print(f'WARNING: given epochs are less than the one in the restart file!\n' + f'WARNING: SYS.EXIT is issued') + sys.exit() + +# start trainin/testing loop + if grank==0: + print('TIMER: broadcast:', time.time()-st, 's') + print(f'\nDEBUG: start training') + print(f'--------------------------------------------------------') + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + loss_acc = train(distrib_model, device, train_loader, optimizer, epoch, grank, gwsize, args) + + # testing + acc_test = test(distrib_model, device, test_loader, grank, gwsize, args) + + # save first epoch timer + if epoch == start_epoch: + first_ep_t = time.time()-lt + + # final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + test_loader.last_epoch = True + + if grank==0: + print('TIMER: epoch time:', time.time()-lt, 's') + print('DEBUG: accuracy:', acc_test, '%') + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0: + save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best) + # reset best_acc + best_acc = min(loss_acc, best_acc) + +# finalise + # save final state + save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,True) + if torch.cuda.is_available(): + dist.barrier() + + # some debug + if grank==0: + print(f'\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', time.time()-lt, ' s') + print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', time.time()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('DEBUG: last accuracy:', acc_test, '%') + print('DEBUG: memory req:',int(torch.cuda.memory_reserved(lrank)/1024/1024),'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n',torch.cuda.memory_summary(0)) if args.cuda else '' + + if grank==0: + print(f'TIMER: final time: {time.time()-st} s\n') + + my_trainer.cleanUp() + +if __name__ == "__main__": + main() + sys.exit() + +#eof + From 112b9849f1177481c440e7c5c7f46f588edbdd52 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 6 Nov 2023 14:45:51 +0100 Subject: [PATCH 007/171] UPDATE: distributed strategy setup --- experimental/example_0.py | 40 +++ experimental/trainer/general_trainer.py | 326 ++++++++++++++---------- 2 files changed, 235 insertions(+), 131 deletions(-) diff --git a/experimental/example_0.py b/experimental/example_0.py index d18a40db..5a67cfd8 100644 --- a/experimental/example_0.py +++ b/experimental/example_0.py @@ -64,6 +64,46 @@ def trainer_entrypoint_fn(a, strategy: Strategy): return 123 +def trainer_entrypoint_fn_mario(a, strategy: Strategy): + """Dummy training function.""" + + print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + train_loader = DataLoader(train_set, batch_size=10, num_workers=1) + + strategy.setup(model, train_set, optim) + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) + # Distributed dataloader + train_loader: DataLoader = strategy.distribute_dataloader(train_loader) + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{strategy.device}") + x = x.to(strategy.device) + y = y.to(strategy.device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.teardown() + return 123 + + STRATEGY = 'ddp' diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index e7e51003..c7187811 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -4,7 +4,12 @@ # version: 211029a # std libs -import argparse, sys, os, time, numpy as np, random +import argparse +import sys +import os +import time +import numpy as np +import random # ml libs import deepspeed @@ -16,6 +21,8 @@ from torchvision import datasets, transforms # parsed settings + + def pars_ini(): global args parser = argparse.ArgumentParser(description='PyTorch MNIST Example') @@ -71,13 +78,16 @@ def pars_ini(): class ddpDistributedTrainer: - #def __init__(self, model): + # def __init__(self, model): # self.model=model - def distributedModel(self,model,device): + def setup(self, *args, **kwargs): + self.initBackend() + + def distributedModel(self, model, device): if torch.cuda.is_available(): - dist_model = nn.parallel.DistributedDataParallel(model,\ - device_ids=[device], output_device=device) + dist_model = nn.parallel.DistributedDataParallel(model, + device_ids=[device], output_device=device) else: dist_model = model @@ -85,23 +95,36 @@ def distributedModel(self,model,device): def initBackend(self): if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) + dist.init_process_group(backend=args.backend) def cleanUp(self): if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() + dist.barrier() + dist.destroy_process_group() + class dsDistributedTrainer: - #def __init__(self, model): + # def __init__(self, model): # self.model=model - def distributedModel(self,model,train_dataset): + def setup(self, model, training_dataset, optim): + self.initBackend() + distrib_model, __, train_loader, __ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) + + self.distrib_model = distrib_model + self.train_loader = train_loader + + def distributedModel(self, model): # 1) Distributed model # 2) DeepSpeed optimizer # 3) Distributed data loader - distrib_model, __, train_loader, __ = deepspeed.initialize( - args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) + # distrib_model, __, train_loader, __ = deepspeed.initialize( + # args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) + return self.distrib_model + + def distributeDataloader(self, dataloader): + return self.train_loader def initBackend(self): deepspeed.init_distributed(dist_backend=args.backend) @@ -109,6 +132,7 @@ def initBackend(self): def cleanUp(self): deepspeed.sys.exit() + class Net(nn.Module): def __init__(self): super(Net, self).__init__() @@ -128,11 +152,13 @@ def forward(self, x): return F.log_softmax(x) # train loop + + def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): model.train() t_list = [] - loss_acc=0 - if grank==0: + loss_acc = 0 + if grank == 0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = time.perf_counter() @@ -142,17 +168,19 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if batch_idx % args.log_int == 0 and grank==0: + if batch_idx % args.log_int == 0 and grank == 0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: {loss.item():.6f}') t_list.append(time.perf_counter() - t) - loss_acc+= loss.item() - if grank==0: - print('TIMER: train time', sum(t_list) / len(t_list),'s') + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc # test loop + + def test(model, device, test_loader, grank, gwsize, args): model.eval() test_loss = 0 @@ -161,11 +189,13 @@ def test(model, device, test_loader, grank, gwsize, args): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) - test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss - pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) - if grank==0: + if grank == 0: print( f'Test set: average loss: {test_loss:.4f}\t' f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') @@ -174,37 +204,38 @@ def test(model, device, test_loader, grank, gwsize, args): # save state of the training -def save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best): +def save_state(epoch, distrib_model, loss_acc, optimizer, res_name, grank, gwsize, is_best): rt = time.time() # find if is_best happened in any worker if torch.cuda.is_available(): - is_best_m = par_allgather_obj(is_best,gwsize) - + is_best_m = par_allgather_obj(is_best, gwsize) if torch.cuda.is_available(): if any(is_best_m): # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m)==True)[0][0] + is_best_rank = np.where(np.array(is_best_m) == True)[0][0] # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer' : optimizer.state_dict()} + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} # write on worker with is_best - if grank == is_best_rank: - torch.save(state,'./'+res_name) - print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') else: # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer' : optimizer.state_dict()} - - torch.save(state,'./'+res_name) - print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') # deterministic dataloader @@ -215,36 +246,50 @@ def seed_worker(worker_id): # PARALLEL HELPERS # sum of field over GPGPUs + + def par_sum(field): res = torch.tensor(field).float() res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res,op=dist.ReduceOp.SUM,group=None,async_op=True).wait() + dist.all_reduce(res, op=dist.ReduceOp.SUM, + group=None, async_op=True).wait() return res # mean of field over GPGPUs -def par_mean(field,gwsize): + + +def par_mean(field, gwsize): res = torch.tensor(field).float() res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res,op=dist.ReduceOp.SUM,group=None,async_op=True).wait() - res/=gwsize + dist.all_reduce(res, op=dist.ReduceOp.SUM, + group=None, async_op=True).wait() + res /= gwsize return res # max(field) over GPGPUs + + def par_max(field): res = torch.tensor(field).float() res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res,op=dist.ReduceOp.MAX,group=None,async_op=True).wait() + dist.all_reduce(res, op=dist.ReduceOp.MAX, + group=None, async_op=True).wait() return res # min(field) over GPGPUs + + def par_min(field): res = torch.tensor(field).float() res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res,op=dist.ReduceOp.MIN,group=None,async_op=True).wait() + dist.all_reduce(res, op=dist.ReduceOp.MIN, + group=None, async_op=True).wait() return res # reduce field to destination with an operation -def par_reduce(field,dest,oper): + + +def par_reduce(field, dest, oper): ''' dest=0 will send the result to GPU on rank 0 (any rank is possible) op=oper has to be in form "dist.ReduceOp.", where is @@ -258,48 +303,54 @@ def par_reduce(field,dest,oper): ''' res = torch.Tensor([field]) res = res.cuda() if args.cuda else res.cpu() - dist.reduce(res,dst=dest,op=oper,group=None,async_op=False) + dist.reduce(res, dst=dest, op=oper, group=None, async_op=False) return res # gathers tensors from the whole group in a list (to all workers) -def par_allgather(field,gwsize): + + +def par_allgather(field, gwsize): if args.cuda: sen = torch.Tensor([field]).cuda() res = [torch.Tensor([field]).cuda() for i in range(gwsize)] else: sen = torch.Tensor([field]) res = [torch.Tensor([field]) for i in range(gwsize)] - dist.all_gather(res,sen,group=None) + dist.all_gather(res, sen, group=None) return res # gathers any object from the whole group in a list (to all workers) -def par_allgather_obj(obj,gwsize): + + +def par_allgather_obj(obj, gwsize): res = [None]*gwsize - dist.all_gather_object(res,obj,group=None) + dist.all_gather_object(res, obj, group=None) return res # # # MAIN # # + + def main(): # get parse args print("check_0", flush=True) pars_ini() - + print("check_1", flush=True) # check CUDA availibility args.cuda = not args.no_cuda and torch.cuda.is_available() - #Strategy for distributed training - if args.strategy=='DDP': + # Strategy for distributed training + if args.strategy == 'DDP': + + my_trainer = ddpDistributedTrainer() - my_trainer = ddpDistributedTrainer() - - elif args.strategy=='DS': + elif args.strategy == 'DS': - my_trainer = dsDistributedTrainer() + my_trainer = dsDistributedTrainer() # limit # of CPU threads to be used per worker torch.set_num_threads(1) @@ -310,7 +361,6 @@ def main(): # start the time.time for profiling st = time.time() - # initializes the distributed backend which will take care of sychronizing nodes/GPUs my_trainer.initBackend() @@ -322,63 +372,64 @@ def main(): # get job rank info - rank==0 master gpu if torch.cuda.is_available(): - lwsize = torch.cuda.device_count() if args.cuda else 0 # local world size - per node + lwsize = torch.cuda.device_count() if args.cuda else 0 # local world size - per node gwsize = dist.get_world_size() # global world size - per run grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank()%lwsize # local rank - assign per node + lrank = dist.get_rank() % lwsize # local rank - assign per node else: gwsize = 1 grank = 0 # some debug - if grank==0: + if grank == 0: print('TIMER: initialise:', time.time()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:',sys.version,'\n') + print('DEBUG: sys.version:', sys.version, '\n') print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:',args.data_dir) - print('DEBUG: args.restart_int:',args.restart_int,'\n') + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.restart_int:', args.restart_int, '\n') print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:',args.batch_size) - print('DEBUG: args.epochs:',args.epochs) - print('DEBUG: args.lr:',args.lr) - print('DEBUG: args.concM:',args.concM) - print('DEBUG: args.momentum:',args.momentum) - print('DEBUG: args.shuff:',args.shuff,'\n') + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.concM:', args.concM) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff, '\n') print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:',args.testrun) - print('DEBUG: args.nseed:',args.nseed) - print('DEBUG: args.log_int:',args.log_int,'\n') + print('DEBUG: args.testrun:', args.testrun) + print('DEBUG: args.nseed:', args.nseed) + print('DEBUG: args.log_int:', args.log_int, '\n') print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:',args.backend) - print('DEBUG: args.nworker:',args.nworker) - print('DEBUG: args.prefetch:',args.prefetch) - print('DEBUG: args.cuda:',args.cuda,'\n') + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.cuda:', args.cuda, '\n') # encapsulate the model on the GPU assigned to the current process - device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu',lrank) + device = torch.device( + 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) if args.cuda: torch.cuda.set_device(lrank) # deterministic testrun if args.testrun: torch.cuda.manual_seed(args.nseed) -# read data +# read data data_dir = args.data_dir mnist_scale = args.concM largeData = [] for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=True, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data train_dataset = torch.utils.data.ConcatDataset(largeData) @@ -388,11 +439,11 @@ def main(): for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=False, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data test_dataset = torch.utils.data.ConcatDataset(largeData) @@ -401,30 +452,33 @@ def main(): args.shuff = args.shuff and not args.testrun if torch.cuda.is_available(): train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) # distribute dataset to workers # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker>1 else False + pers_w = True if args.nworker > 1 else False # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, 'generator': g} if args.testrun else {} + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} if torch.cuda.is_available(): train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) else: - train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size) + test_loader = torch.utils.data.DataLoader( + test_dataset, batch_size=args.batch_size) - if grank==0: - print('TIMER: read and concat data:', time.time()-st, 's') + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') # create CNN model model = Net().to(device) @@ -433,20 +487,23 @@ def main(): distrib_model = my_trainer.distributedModel(model, device) # optimizer - optimizer = torch.optim.SGD(distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - + optimizer = torch.optim.SGD( + distrib_model.parameters(), lr=args.lr, momentum=args.momentum) -# resume state + +# resume state start_epoch = 1 best_acc = np.Inf - res_name='checkpoint.pth.tar' + res_name = 'checkpoint.pth.tar' if os.path.isfile(res_name): try: if torch.cuda.is_available(): dist.barrier() # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else {'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) else: checkpoint = torch.load(program_dir+'/'+res_name) start_epoch = checkpoint['epoch'] @@ -454,44 +511,46 @@ def main(): distrib_model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if torch.cuda.is_available(): - if grank==0: + if grank == 0: print(f'WARNING: restarting from {start_epoch} epoch') else: print(f'WARNING: restarting from {start_epoch} epoch') except: if torch.cuda.is_available(): - if grank==0: + if grank == 0: print(f'WARNING: restart file cannot be loaded, restarting!') else: print(f'WARNING: restart file cannot be loaded, restarting!') - if start_epoch>=args.epochs: + if start_epoch >= args.epochs: if torch.cuda.is_available(): - if grank==0: - print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') - + if grank == 0: + print(f'WARNING: given epochs are less than the one in the restart file!\n' + f'WARNING: SYS.EXIT is issued') + my_trainer.cleanUp() sys.exit() else: print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') + f'WARNING: SYS.EXIT is issued') sys.exit() # start trainin/testing loop - if grank==0: + if grank == 0: print('TIMER: broadcast:', time.time()-st, 's') print(f'\nDEBUG: start training') - print(f'--------------------------------------------------------') + print(f'--------------------------------------------------------') et = time.time() for epoch in range(start_epoch, args.epochs + 1): lt = time.time() # training - loss_acc = train(distrib_model, device, train_loader, optimizer, epoch, grank, gwsize, args) + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) # testing - acc_test = test(distrib_model, device, test_loader, grank, gwsize, args) + acc_test = test(distrib_model, device, + test_loader, grank, gwsize, args) # save first epoch timer if epoch == start_epoch: @@ -502,25 +561,27 @@ def main(): train_loader.last_epoch = True test_loader.last_epoch = True - if grank==0: + if grank == 0: print('TIMER: epoch time:', time.time()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state is_best = loss_acc < best_acc if epoch % args.restart_int == 0: - save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best) + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best) # reset best_acc best_acc = min(loss_acc, best_acc) # finalise # save final state - save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,True) + save_state(epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, True) if torch.cuda.is_available(): dist.barrier() # some debug - if grank==0: + if grank == 0: print(f'\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -528,21 +589,24 @@ def main(): print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') print('TIMER: total epoch time:', time.time()-et, ' s') if epoch > 1: - print('TIMER: total epoch-1 time:', time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('TIMER: total epoch-1 time:', + time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (time.time()-et-first_ep_t)/(args.epochs-1), ' s') print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:',int(torch.cuda.memory_reserved(lrank)/1024/1024),'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n',torch.cuda.memory_summary(0)) if args.cuda else '' + print('DEBUG: memory req:', int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) if args.cuda else '' - if grank==0: + if grank == 0: print(f'TIMER: final time: {time.time()-st} s\n') my_trainer.cleanUp() -if __name__ == "__main__": + +if __name__ == "__main__": main() sys.exit() -#eof - +# eof From 8df1a9d871ffb53137843e536230eb1581f1fa67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20R=C3=BCttgers?= Date: Tue, 7 Nov 2023 12:08:50 +0100 Subject: [PATCH 008/171] generalized for DDP and DS --- experimental/trainer/general_trainer.py | 140 +++++++++++++++--------- 1 file changed, 90 insertions(+), 50 deletions(-) diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index c7187811..674c51a1 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -81,17 +81,54 @@ class ddpDistributedTrainer: # def __init__(self, model): # self.model=model - def setup(self, *args, **kwargs): - self.initBackend() + def setup(self, train_dataset, test_dataset, gwsize, grank, **kwargs): + model=kwargs.get("model") + device=kwargs.get("device") + shuff=kwargs.get("shuff") + - def distributedModel(self, model, device): if torch.cuda.is_available(): - dist_model = nn.parallel.DistributedDataParallel(model, - device_ids=[device], output_device=device) + dist_model = nn.parallel.DistributedDataParallel(model,device_ids=[device], output_device=device) else: dist_model = model + self.dist_model = dist_model - return dist_model + # restricts data loading to a subset of the dataset exclusive to the current process + if torch.cuda.is_available(): + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle=shuff) + test_sampler = torch.utils.data.distributed.DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, shuffle=shuff) + + # distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker > 1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + else: + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size) + test_loader = torch.utils.data.DataLoader( + test_dataset, batch_size=args.batch_size) + + self.train_loader=train_loader + self.test_loader=test_loader + + def distributedModel(self): + return self.dist_model + + def distributedDataloader(self): + return self.train_loader, self.test_loader def initBackend(self): if torch.cuda.is_available(): @@ -107,24 +144,26 @@ class dsDistributedTrainer: # def __init__(self, model): # self.model=model - def setup(self, model, training_dataset, optim): - self.initBackend() - distrib_model, __, train_loader, __ = deepspeed.initialize( + def setup(self, train_dataset, test_dataset, gwsize, grank, **kwargs): + model=kwargs.get("model") + + test_sampler = torch.utils.data.distributed.DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=0, pin_memory=True, shuffle=False) + + dist_model, __, train_loader, __ = deepspeed.initialize( args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) - self.distrib_model = distrib_model + self.dist_model = dist_model self.train_loader = train_loader + self.test_loader = test_loader - def distributedModel(self, model): - # 1) Distributed model - # 2) DeepSpeed optimizer - # 3) Distributed data loader - # distrib_model, __, train_loader, __ = deepspeed.initialize( - # args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) - return self.distrib_model + def distributedModel(self): + return self.dist_model - def distributeDataloader(self, dataloader): - return self.train_loader + def distributedDataloader(self): + return self.train_loader, self.test_loader def initBackend(self): deepspeed.init_distributed(dist_backend=args.backend) @@ -335,11 +374,8 @@ def par_allgather_obj(obj, gwsize): def main(): # get parse args - print("check_0", flush=True) pars_ini() - print("check_1", flush=True) - # check CUDA availibility args.cuda = not args.no_cuda and torch.cuda.is_available() @@ -447,49 +483,53 @@ def main(): # concat data test_dataset = torch.utils.data.ConcatDataset(largeData) + + # create CNN model + model = Net().to(device) + + my_trainer.setup(train_dataset, test_dataset, gwsize, grank, device=device,model=model,shuff=args.shuff) # restricts data loading to a subset of the dataset exclusive to the current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + #args.shuff = args.shuff and not args.testrun + #if torch.cuda.is_available(): + # train_sampler = torch.utils.data.distributed.DistributedSampler( + # train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # test_sampler = torch.utils.data.distributed.DistributedSampler( + # test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) # distribute dataset to workers # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False + #pers_w = True if args.nworker > 1 else False # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): - train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - else: - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size) - test_loader = torch.utils.data.DataLoader( - test_dataset, batch_size=args.batch_size) + #kwargs = {'worker_init_fn': seed_worker, + #'generator': g} if args.testrun else {} + + #if torch.cuda.is_available(): + # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, + #sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + #persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + # test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, + #sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + #persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + #else: + # train_loader = torch.utils.data.DataLoader( + # train_dataset, batch_size=args.batch_size) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size) + + #if grank == 0: + # print('TIMER: read and concat data:', time.time()-st, 's') - if grank == 0: - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model - model = Net().to(device) # distribute model to workers - distrib_model = my_trainer.distributedModel(model, device) + distrib_model = my_trainer.distributedModel() # optimizer optimizer = torch.optim.SGD( distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + train_loader, test_loader = my_trainer.distributedDataloader() # resume state start_epoch = 1 From 792160706dd8fa6991c309d9a365ed1443b543fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20R=C3=BCttgers?= Date: Tue, 7 Nov 2023 12:09:08 +0100 Subject: [PATCH 009/171] add config file --- experimental/trainer/DS_config.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 experimental/trainer/DS_config.json diff --git a/experimental/trainer/DS_config.json b/experimental/trainer/DS_config.json new file mode 100644 index 00000000..544cab17 --- /dev/null +++ b/experimental/trainer/DS_config.json @@ -0,0 +1,15 @@ +{ + "train_micro_batch_size_per_gpu": 32, + "gradient_accumulation_steps": 1, + "optimizer": { + "type": "Adam", + "params": { + "lr": 0.01 + } + }, + "fp16": { + "enabled": false + }, + "zero_optimization": false +} + From 0ac9452f9b87b8951ba1a677ccd81115e2c5f7da Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 5 Dec 2023 10:32:47 +0100 Subject: [PATCH 010/171] UPDATE: kwargs --- experimental/strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/strategy.py b/experimental/strategy.py index 8fad3429..59dd7a4f 100644 --- a/experimental/strategy.py +++ b/experimental/strategy.py @@ -65,7 +65,7 @@ def device(self) -> int: """Returns the local rank. Assumes one worker per GPU.""" return self.cluster.local_rank() - def setup(self) -> None: + def setup(self, **kwargs) -> None: """Setup the strategy in a distributed context.""" if not self._is_env_setup(): raise RuntimeError( From 91418c58a552a24a215d0eeb89685ea1a1b54677 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:50:50 +0530 Subject: [PATCH 011/171] Update general_trainer.py --- experimental/trainer/general_trainer.py | 529 +++++++++++------------- 1 file changed, 230 insertions(+), 299 deletions(-) diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index 674c51a1..b72e27dd 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -4,25 +4,19 @@ # version: 211029a # std libs -import argparse -import sys -import os -import time -import numpy as np -import random +import argparse, sys, os, time, numpy as np, random # ml libs import deepspeed import torch import torch.distributed as dist +import horovod.torch as hvd import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms # parsed settings - - def pars_ini(): global args parser = argparse.ArgumentParser(description='PyTorch MNIST Example') @@ -78,99 +72,139 @@ def pars_ini(): class ddpDistributedTrainer: - # def __init__(self, model): + #def __init__(self, model): # self.model=model - def setup(self, train_dataset, test_dataset, gwsize, grank, **kwargs): - model=kwargs.get("model") - device=kwargs.get("device") - shuff=kwargs.get("shuff") - - + def distributedModel(self,model,device): if torch.cuda.is_available(): - dist_model = nn.parallel.DistributedDataParallel(model,device_ids=[device], output_device=device) + dist_model = torch.nn.parallel.DistributedDataParallel(model,\ + device_ids=[device], output_device=device) else: dist_model = model - self.dist_model = dist_model - # restricts data loading to a subset of the dataset exclusive to the current process - if torch.cuda.is_available(): - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=shuff) - test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle=shuff) + return dist_model - # distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False + def broadcastParams(self, distrib_model, optimizer): + pass - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} + def distributed_Optimizer(self, optimizer, distrib_model): + return optimizer + def initBackend(self): if torch.cuda.is_available(): - train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - else: - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size) - test_loader = torch.utils.data.DataLoader( - test_dataset, batch_size=args.batch_size) + dist.init_process_group(backend=args.backend) - self.train_loader=train_loader - self.test_loader=test_loader + def dist_gwsize(self): + return dist.get_world_size() - def distributedModel(self): - return self.dist_model - - def distributedDataloader(self): - return self.train_loader, self.test_loader + def dist_lwsize(self): + return torch.cuda.device_count() - def initBackend(self): - if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) + def dist_grank(self): + return dist.get_rank() + + def dist_lrank(self): + return dist.get_rank()%torch.cuda.device_count() def cleanUp(self): if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() + dist.barrier() + dist.destroy_process_group() + # gathers any object from the whole group in a list (to all workers) + def par_allgather_obj(self,obj,gwsize): + res = [None]*gwsize + dist.all_gather_object(res,obj) + return res class dsDistributedTrainer: - # def __init__(self, model): + #def __init__(self, model): # self.model=model - def setup(self, train_dataset, test_dataset, gwsize, grank, **kwargs): - model=kwargs.get("model") + def distributedModel(self, model, device): + # 1) Distributed model + # 2) DeepSpeed optimizer + # 3) Distributed data loader + distrib_model, __, __, __ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), dist_init_required=True) + return distrib_model - test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=0, pin_memory=True, shuffle=False) - - dist_model, __, train_loader, __ = deepspeed.initialize( - args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset) - - self.dist_model = dist_model - self.train_loader = train_loader - self.test_loader = test_loader - - def distributedModel(self): - return self.dist_model + def broadcastParams(self, distrib_model, optimizer): + pass - def distributedDataloader(self): - return self.train_loader, self.test_loader + def distributed_Optimizer(self, optimizer, distrib_model): + return optimizer def initBackend(self): deepspeed.init_distributed(dist_backend=args.backend) + def dist_gwsize(self): + return dist.get_world_size() + + def dist_lwsize(self): + return torch.cuda.device_count() + + def dist_grank(self): + return dist.get_rank() + + def dist_lrank(self): + return dist.get_rank()%torch.cuda.device_count() + def cleanUp(self): deepspeed.sys.exit() + # gathers any object from the whole group in a list (to all workers) + def par_allgather_obj(self,obj,gwsize): + res = [None]*gwsize + dist.all_gather_object(res,obj) + return res + +class hvdDistributedTrainer: + #def __init__(self, model): + # self.model=model + + def distributedModel(self, model, device): + distrib_model = model + return distrib_model + + def broadcastParams(self, distrib_model, optimizer): + hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=-0) + + def distributed_Optimizer(self, optimizer, distrib_model): + distOptimizer = hvd.DistributedOptimizer(optimizer, \ + named_parameters=distrib_model.named_parameters(), \ + op = hvd.Average) + return distOptimizer + + def initBackend(self): + hvd.init() + + def dist_gwsize(self): + return hvd.size() + + def dist_lwsize(self): + return hvd.local_size() + + def dist_grank(self): + return hvd.rank() + + def dist_lrank(self): + return hvd.local_rank() + + def cleanUp(self): + hvd.shutdown() + + def par_allgather_obj(self,obj,gwsize): + """! function that gathers scalar objects across all workers to an array with size(\#worker) + uses horovod communicator + @param obj object in a worker + @param gwsize global world size + + @return gathered array with size(#worker) + """ + return hvd.allgather_object(obj) + class Net(nn.Module): def __init__(self): @@ -191,13 +225,11 @@ def forward(self, x): return F.log_softmax(x) # train loop - - def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): model.train() t_list = [] - loss_acc = 0 - if grank == 0: + loss_acc=0 + if grank==0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = time.perf_counter() @@ -207,19 +239,17 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: + if batch_idx % args.log_int == 0 and grank==0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: {loss.item():.6f}') t_list.append(time.perf_counter() - t) - loss_acc += loss.item() - if grank == 0: - print('TIMER: train time', sum(t_list) / len(t_list), 's') + loss_acc+= loss.item() + if grank==0: + print('TIMER: train time', sum(t_list) / len(t_list),'s') return loss_acc # test loop - - def test(model, device, test_loader, grank, gwsize, args): model.eval() test_loss = 0 @@ -228,13 +258,11 @@ def test(model, device, test_loader, grank, gwsize, args): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) + test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) - if grank == 0: + if grank==0: print( f'Test set: average loss: {test_loss:.4f}\t' f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') @@ -243,38 +271,37 @@ def test(model, device, test_loader, grank, gwsize, args): # save state of the training -def save_state(epoch, distrib_model, loss_acc, optimizer, res_name, grank, gwsize, is_best): +def save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best,my_trainer): rt = time.time() # find if is_best happened in any worker if torch.cuda.is_available(): - is_best_m = par_allgather_obj(is_best, gwsize) + is_best_m = my_trainer.par_allgather_obj(is_best,gwsize) + if torch.cuda.is_available(): if any(is_best_m): # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m) == True)[0][0] + is_best_rank = np.where(np.array(is_best_m)==True)[0][0] # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer' : optimizer.state_dict()} # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + if grank == is_best_rank: + torch.save(state,'./'+res_name) + print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') else: # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer' : optimizer.state_dict()} + + torch.save(state,'./'+res_name) + print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') # deterministic dataloader @@ -283,95 +310,13 @@ def seed_worker(worker_id): np.random.seed(worker_seed) random.seed(worker_seed) -# PARALLEL HELPERS -# sum of field over GPGPUs - - -def par_sum(field): - res = torch.tensor(field).float() - res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res, op=dist.ReduceOp.SUM, - group=None, async_op=True).wait() - return res - -# mean of field over GPGPUs - - -def par_mean(field, gwsize): - res = torch.tensor(field).float() - res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res, op=dist.ReduceOp.SUM, - group=None, async_op=True).wait() - res /= gwsize - return res - -# max(field) over GPGPUs - - -def par_max(field): - res = torch.tensor(field).float() - res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res, op=dist.ReduceOp.MAX, - group=None, async_op=True).wait() - return res - -# min(field) over GPGPUs - - -def par_min(field): - res = torch.tensor(field).float() - res = res.cuda() if args.cuda else res.cpu() - dist.all_reduce(res, op=dist.ReduceOp.MIN, - group=None, async_op=True).wait() - return res -# reduce field to destination with an operation - -def par_reduce(field, dest, oper): - ''' - dest=0 will send the result to GPU on rank 0 (any rank is possible) - op=oper has to be in form "dist.ReduceOp.", where is - SUM - PRODUCT - MIN - MAX - BAND - BOR - BXOR - ''' - res = torch.Tensor([field]) - res = res.cuda() if args.cuda else res.cpu() - dist.reduce(res, dst=dest, op=oper, group=None, async_op=False) - return res - -# gathers tensors from the whole group in a list (to all workers) - - -def par_allgather(field, gwsize): - if args.cuda: - sen = torch.Tensor([field]).cuda() - res = [torch.Tensor([field]).cuda() for i in range(gwsize)] - else: - sen = torch.Tensor([field]) - res = [torch.Tensor([field]) for i in range(gwsize)] - dist.all_gather(res, sen, group=None) - return res - -# gathers any object from the whole group in a list (to all workers) - - -def par_allgather_obj(obj, gwsize): - res = [None]*gwsize - dist.all_gather_object(res, obj, group=None) - return res # # # MAIN # # - - def main(): # get parse args pars_ini() @@ -379,14 +324,15 @@ def main(): # check CUDA availibility args.cuda = not args.no_cuda and torch.cuda.is_available() - # Strategy for distributed training - if args.strategy == 'DDP': - - my_trainer = ddpDistributedTrainer() - - elif args.strategy == 'DS': + #Strategy for distributed training + if args.strategy=='DDP': + my_trainer = ddpDistributedTrainer() + + elif args.strategy=='DS': + my_trainer = dsDistributedTrainer() - my_trainer = dsDistributedTrainer() + elif args.strategy=='HVD': + my_trainer = hvdDistributedTrainer() # limit # of CPU threads to be used per worker torch.set_num_threads(1) @@ -397,6 +343,7 @@ def main(): # start the time.time for profiling st = time.time() + # initializes the distributed backend which will take care of sychronizing nodes/GPUs my_trainer.initBackend() @@ -408,64 +355,63 @@ def main(): # get job rank info - rank==0 master gpu if torch.cuda.is_available(): - lwsize = torch.cuda.device_count() if args.cuda else 0 # local world size - per node - gwsize = dist.get_world_size() # global world size - per run - grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank() % lwsize # local rank - assign per node + lwsize = my_trainer.dist_lwsize() if args.cuda else 0 # local world size - per node + gwsize = my_trainer.dist_gwsize() # global world size - per run + grank = my_trainer.dist_grank() # global rank - assign per run + lrank = my_trainer.dist_lrank() # local rank - assign per node else: gwsize = 1 grank = 0 # some debug - if grank == 0: + if grank==0: print('TIMER: initialise:', time.time()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:', sys.version, '\n') + print('DEBUG: sys.version:',sys.version,'\n') print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.restart_int:', args.restart_int, '\n') + print('DEBUG: args.data_dir:',args.data_dir) + print('DEBUG: args.restart_int:',args.restart_int,'\n') print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - print('DEBUG: args.lr:', args.lr) - print('DEBUG: args.concM:', args.concM) - print('DEBUG: args.momentum:', args.momentum) - print('DEBUG: args.shuff:', args.shuff, '\n') + print('DEBUG: args.batch_size:',args.batch_size) + print('DEBUG: args.epochs:',args.epochs) + print('DEBUG: args.lr:',args.lr) + print('DEBUG: args.concM:',args.concM) + print('DEBUG: args.momentum:',args.momentum) + print('DEBUG: args.shuff:',args.shuff,'\n') print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:', args.testrun) - print('DEBUG: args.nseed:', args.nseed) - print('DEBUG: args.log_int:', args.log_int, '\n') + print('DEBUG: args.testrun:',args.testrun) + print('DEBUG: args.nseed:',args.nseed) + print('DEBUG: args.log_int:',args.log_int,'\n') print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.nworker:', args.nworker) - print('DEBUG: args.prefetch:', args.prefetch) - print('DEBUG: args.cuda:', args.cuda, '\n') + print('DEBUG: args.backend:',args.backend) + print('DEBUG: args.nworker:',args.nworker) + print('DEBUG: args.prefetch:',args.prefetch) + print('DEBUG: args.cuda:',args.cuda,'\n') # encapsulate the model on the GPU assigned to the current process - device = torch.device( - 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu',lrank) if args.cuda: torch.cuda.set_device(lrank) # deterministic testrun if args.testrun: torch.cuda.manual_seed(args.nseed) -# read data +# read data data_dir = args.data_dir mnist_scale = args.concM largeData = [] for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=True, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data train_dataset = torch.utils.data.ConcatDataset(largeData) @@ -475,75 +421,68 @@ def main(): for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=False, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data test_dataset = torch.utils.data.ConcatDataset(largeData) - - # create CNN model - model = Net().to(device) - - my_trainer.setup(train_dataset, test_dataset, gwsize, grank, device=device,model=model,shuff=args.shuff) # restricts data loading to a subset of the dataset exclusive to the current process - #args.shuff = args.shuff and not args.testrun - #if torch.cuda.is_available(): - # train_sampler = torch.utils.data.distributed.DistributedSampler( - # train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - # test_sampler = torch.utils.data.distributed.DistributedSampler( - # test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + test_sampler = torch.utils.data.distributed.DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) # distribute dataset to workers # persistent workers is not possible for nworker=0 - #pers_w = True if args.nworker > 1 else False + pers_w = True if args.nworker>1 else False # deterministic testrun - the same dataset each run - #kwargs = {'worker_init_fn': seed_worker, - #'generator': g} if args.testrun else {} + kwargs = {'worker_init_fn': seed_worker, 'generator': g} if args.testrun else {} - #if torch.cuda.is_available(): - # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - #sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - #persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - # test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - #sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - #persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - #else: - # train_loader = torch.utils.data.DataLoader( - # train_dataset, batch_size=args.batch_size) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size) - - #if grank == 0: - # print('TIMER: read and concat data:', time.time()-st, 's') + if torch.cuda.is_available(): + train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + else: + train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size) + test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size) + + if grank==0: + print('TIMER: read and concat data:', time.time()-st, 's') + # create CNN model + model = Net().to(device) # distribute model to workers - distrib_model = my_trainer.distributedModel() + distrib_model = my_trainer.distributedModel(model, device) # optimizer - optimizer = torch.optim.SGD( - distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + optimizer = torch.optim.SGD(distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + + my_trainer.broadcastParams(distrib_model, optimizer) - train_loader, test_loader = my_trainer.distributedDataloader() + optimizer = my_trainer.distributed_Optimizer(optimizer, distrib_model) -# resume state +# resume state start_epoch = 1 best_acc = np.Inf - res_name = 'checkpoint.pth.tar' + res_name='checkpoint.pth.tar' if os.path.isfile(res_name): try: if torch.cuda.is_available(): dist.barrier() # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else {'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) else: checkpoint = torch.load(program_dir+'/'+res_name) start_epoch = checkpoint['epoch'] @@ -551,46 +490,44 @@ def main(): distrib_model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if torch.cuda.is_available(): - if grank == 0: + if grank==0: print(f'WARNING: restarting from {start_epoch} epoch') else: print(f'WARNING: restarting from {start_epoch} epoch') except: if torch.cuda.is_available(): - if grank == 0: + if grank==0: print(f'WARNING: restart file cannot be loaded, restarting!') else: print(f'WARNING: restart file cannot be loaded, restarting!') - if start_epoch >= args.epochs: + if start_epoch>args.epochs: if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') - + if grank==0: + print(f'WARNING: given epochs are less than the one in the restart file!\n' + f'WARNING: SYS.EXIT is issued') + my_trainer.cleanUp() sys.exit() else: print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') + f'WARNING: SYS.EXIT is issued') sys.exit() # start trainin/testing loop - if grank == 0: + if grank==0: print('TIMER: broadcast:', time.time()-st, 's') print(f'\nDEBUG: start training') - print(f'--------------------------------------------------------') + print(f'--------------------------------------------------------') et = time.time() for epoch in range(start_epoch, args.epochs + 1): lt = time.time() # training - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) + loss_acc = train(distrib_model, device, train_loader, optimizer, epoch, grank, gwsize, args) # testing - acc_test = test(distrib_model, device, - test_loader, grank, gwsize, args) + acc_test = test(distrib_model, device, test_loader, grank, gwsize, args) # save first epoch timer if epoch == start_epoch: @@ -601,27 +538,25 @@ def main(): train_loader.last_epoch = True test_loader.last_epoch = True - if grank == 0: + if grank==0: print('TIMER: epoch time:', time.time()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state is_best = loss_acc < best_acc if epoch % args.restart_int == 0: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best) + save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best,my_trainer) # reset best_acc best_acc = min(loss_acc, best_acc) # finalise # save final state - save_state(epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, True) - if torch.cuda.is_available(): - dist.barrier() + save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,True,my_trainer) + #if torch.cuda.is_available(): + # dist.barrier() # some debug - if grank == 0: + if grank==0: print(f'\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -629,24 +564,20 @@ def main(): print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') print('TIMER: total epoch time:', time.time()-et, ' s') if epoch > 1: - print('TIMER: total epoch-1 time:', - time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', - (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('TIMER: total epoch-1 time:', time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', (time.time()-et-first_ep_t)/(args.epochs-1), ' s') print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n', - torch.cuda.memory_summary(0)) if args.cuda else '' + print('DEBUG: memory req:',int(torch.cuda.memory_reserved(lrank)/1024/1024),'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n',torch.cuda.memory_summary(0)) if args.cuda else '' - if grank == 0: + if grank==0: print(f'TIMER: final time: {time.time()-st} s\n') my_trainer.cleanUp() - -if __name__ == "__main__": +if __name__ == "__main__": main() sys.exit() -# eof +#eof From 0a56335a793a298d36ce22c04a22cca524baa7cc Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:52:36 +0530 Subject: [PATCH 012/171] Update general_startscript --- experimental/trainer/general_startscript | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/experimental/trainer/general_startscript b/experimental/trainer/general_startscript index a39d034f..455466b4 100755 --- a/experimental/trainer/general_startscript +++ b/experimental/trainer/general_startscript @@ -26,7 +26,7 @@ strategy='DS' # parameters debug=false # do debug bs=32 # batch-size -epochs=4 # epochs +epochs=1 # epochs lr=0.01 # learning rate # AT @@ -68,13 +68,13 @@ if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi -COMMAND="general_trainer.py" +COMMAND="general_trainer_v2.py" #launch if [[ $strategy == *"HVD"* ]]; then EXEC="$COMMAND \ - --strat $strategy \ + --strategy $strategy \ --batch-size $bs \ --epochs $epochs \ --lr $lr \ @@ -133,4 +133,3 @@ else $EXEC --deepspeed_mpi --deepspeed_config DS_config.json fi - From 391f20a50ba4fe21b1543d5044b17140ce402c4d Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 21 Dec 2023 18:08:56 +0530 Subject: [PATCH 013/171] Update general_trainer.py --- experimental/trainer/general_trainer.py | 232 +++++++++++++++++------- 1 file changed, 170 insertions(+), 62 deletions(-) diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index b72e27dd..1e2542af 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -71,11 +71,80 @@ def pars_ini(): args = parser.parse_args() -class ddpDistributedTrainer: - #def __init__(self, model): - # self.model=model +class DistributedStrategy(abc.ABC): + """Abstract class to define the distributed backend methods""" + @abc.abstractmethod + def init_backend(self) -> None: + """Initializes the chosen distributed backend""" + + @abc.abstractmethod + def distribute_model(self,model,device) -> nn.Module: + """ + DDP and DS: achieves data parallelism by synchronising the gradients across + each model replica located in each available computing device. + HVD: returns the same model, parameters updates through broadcast_params method + """ + + @abc.abstractmethod + def broadcast_params(self, distrib_model, optimizer) -> None: + """ + HVD: broadcasts variables from root rank to all other processes + DDP and DS: pass + """ + + @abc.abstractmethod + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """ + HVD: construct a new DistributedOptimizer, which uses another optimizer + under the hood for computing single-process gradient values and + applying gradient updates after the gradient values have been + combined across all the Horovod ranks. + DDP and DS: returns the same optimizer passed in argument + """ + + @abc.abstractmethod + def dist_gwsize(self) -> int: + """Returns the number of processes""" + + @abc.abstractmethod + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" + + @abc.abstractmethod + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ + + @abc.abstractmethod + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" + + @abc.abstractmethod + def clean_up(self) -> None: + """Destroys the current process group.""" + + @abc.abstractmethod + def par_allgather_obj(self,obj,gwsize) -> list: + """ + Gathers any object from the whole group + in a list (to all workers) + """ - def distributedModel(self,model,device): + +class DDPDistributedTrainer(DistributedStrategy): + """PyTorch DDP distributed training class""" + def init_backend(self) -> None: + """Initializes the distributed process group and the distributed package""" + if torch.cuda.is_available(): + dist.init_process_group(backend=args.backend) + + def distribute_model(self,model,device) -> nn.Module: + """ + Achieves data parallelism by synchronising the gradients across + each model replica located in each available computing device. + """ if torch.cuda.is_available(): dist_model = torch.nn.parallel.DistributedDataParallel(model,\ device_ids=[device], output_device=device) @@ -84,124 +153,163 @@ def distributedModel(self,model,device): return dist_model - def broadcastParams(self, distrib_model, optimizer): + def broadcast_params(self, distrib_model, optimizer) -> None: + """Only applicable for Horovod, else pass""" pass - def distributed_Optimizer(self, optimizer, distrib_model): + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """Only applicable for Horovod, else returns the optimizer from the argument""" return optimizer - def initBackend(self): - if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) - - def dist_gwsize(self): + def dist_gwsize(self) -> int: + """Returns the number of processes""" return dist.get_world_size() - def dist_lwsize(self): + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" return torch.cuda.device_count() - def dist_grank(self): + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ return dist.get_rank() - def dist_lrank(self): + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" return dist.get_rank()%torch.cuda.device_count() - def cleanUp(self): + def clean_up(self) -> None: + """Destroys the current process group.""" if torch.cuda.is_available(): dist.barrier() dist.destroy_process_group() - # gathers any object from the whole group in a list (to all workers) - def par_allgather_obj(self,obj,gwsize): + def par_allgather_obj(self,obj,gwsize) -> list: + """ + Gathers any object from the whole group + in a list (to all workers) + """ res = [None]*gwsize dist.all_gather_object(res,obj) return res -class dsDistributedTrainer: - #def __init__(self, model): - # self.model=model +class DSDistributedTrainer(DistributedStrategy): + """DeepSpeed distributed training class""" + def init_backend(self) -> None: + """Initializes the distributed process group and the distributed package""" + deepspeed.init_distributed(dist_backend=args.backend) - def distributedModel(self, model, device): - # 1) Distributed model - # 2) DeepSpeed optimizer - # 3) Distributed data loader + def distribute_model(self, model, device) -> nn.Module: + """ + Achieves data parallelism by synchronising the gradients across + each model replica located in each available computing device. + """ distrib_model, __, __, __ = deepspeed.initialize( args=args, model=model, model_parameters=model.parameters(), dist_init_required=True) return distrib_model - def broadcastParams(self, distrib_model, optimizer): + def broadcast_params(self, distrib_model, optimizer) -> None: + """Only applicable for Horovod, else pass""" pass - def distributed_Optimizer(self, optimizer, distrib_model): + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """Only applicable for Horovod, else returns the optimizer from the argument""" return optimizer - def initBackend(self): - deepspeed.init_distributed(dist_backend=args.backend) - - def dist_gwsize(self): + def dist_gwsize(self) -> int: + """Returns the number of processes""" return dist.get_world_size() - def dist_lwsize(self): + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" return torch.cuda.device_count() - def dist_grank(self): + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ return dist.get_rank() - def dist_lrank(self): + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" return dist.get_rank()%torch.cuda.device_count() - def cleanUp(self): + def clean_up(self) -> None: + """Destroys the current process group.""" deepspeed.sys.exit() - # gathers any object from the whole group in a list (to all workers) - def par_allgather_obj(self,obj,gwsize): + def par_allgather_obj(self,obj,gwsize) -> list: + """ + Gathers any object from the whole group + in a list (to all workers) + """ res = [None]*gwsize dist.all_gather_object(res,obj) return res -class hvdDistributedTrainer: - #def __init__(self, model): - # self.model=model +class HVDDistributedTrainer(DistributedStrategy): + """Horovod distributed training class""" + def init_backend(self) -> None: + """Initializes the Horovod distributed backend""" + hvd.init() - def distributedModel(self, model, device): + def distribute_model(self, model, device) -> nn.Module: + """For Horovod, returns the same model passed as argument""" distrib_model = model return distrib_model - def broadcastParams(self, distrib_model, optimizer): + def broadcast_params(self, distrib_model, optimizer) -> None: + """Broadcasts variables from root rank to all other processes""" hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - def distributed_Optimizer(self, optimizer, distrib_model): + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """ + Construct a new DistributedOptimizer, which uses another optimizer + under the hood for computing single-process gradient values and + applying gradient updates after the gradient values have been + combined across all the Horovod ranks. + """ distOptimizer = hvd.DistributedOptimizer(optimizer, \ named_parameters=distrib_model.named_parameters(), \ op = hvd.Average) return distOptimizer - def initBackend(self): - hvd.init() - - def dist_gwsize(self): + def dist_gwsize(self) -> int: + """Returns the number of processes""" return hvd.size() - def dist_lwsize(self): + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" return hvd.local_size() - def dist_grank(self): + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ return hvd.rank() - def dist_lrank(self): + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" return hvd.local_rank() - def cleanUp(self): + def clean_up(self) -> None: + """Shuts Horovod down.""" hvd.shutdown() - def par_allgather_obj(self,obj,gwsize): - """! function that gathers scalar objects across all workers to an array with size(\#worker) + def par_allgather_obj(self,obj,gwsize) -> list: + """ + Gathers scalar objects across + all workers to a list with size(\#worker) uses horovod communicator @param obj object in a worker @param gwsize global world size - @return gathered array with size(#worker) + @return gathered list with size(#worker) """ return hvd.allgather_object(obj) @@ -326,13 +434,13 @@ def main(): #Strategy for distributed training if args.strategy=='DDP': - my_trainer = ddpDistributedTrainer() + my_trainer = DDPDistributedTrainer() elif args.strategy=='DS': - my_trainer = dsDistributedTrainer() + my_trainer = DSDistributedTrainer() elif args.strategy=='HVD': - my_trainer = hvdDistributedTrainer() + my_trainer = HVDDistributedTrainer() # limit # of CPU threads to be used per worker torch.set_num_threads(1) @@ -345,7 +453,7 @@ def main(): # initializes the distributed backend which will take care of sychronizing nodes/GPUs - my_trainer.initBackend() + my_trainer.init_backend() # deterministic testrun if args.testrun: @@ -463,14 +571,14 @@ def main(): model = Net().to(device) # distribute model to workers - distrib_model = my_trainer.distributedModel(model, device) + distrib_model = my_trainer.distribute_model(model, device) # optimizer optimizer = torch.optim.SGD(distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - my_trainer.broadcastParams(distrib_model, optimizer) + my_trainer.broadcast_params(distrib_model, optimizer) - optimizer = my_trainer.distributed_Optimizer(optimizer, distrib_model) + optimizer = my_trainer.distribute_optimizer(optimizer, distrib_model) # resume state start_epoch = 1 @@ -507,7 +615,7 @@ def main(): print(f'WARNING: given epochs are less than the one in the restart file!\n' f'WARNING: SYS.EXIT is issued') - my_trainer.cleanUp() + my_trainer.clean_up() sys.exit() else: print(f'WARNING: given epochs are less than the one in the restart file!\n' @@ -574,7 +682,7 @@ def main(): if grank==0: print(f'TIMER: final time: {time.time()-st} s\n') - my_trainer.cleanUp() + my_trainer.clean_up() if __name__ == "__main__": main() From 88c9dfea7c3691884475a466c4128228f2965dda Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Dec 2023 13:50:46 +0100 Subject: [PATCH 014/171] UPDATE .gitignore --- .gitignore | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 2f0ad142..022d77ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,30 +1,37 @@ *_logs -exp_data/ TODO /data nohup* -lightning_logs -mlruns tmp* .tmp* checkpoints/ mamba* -MNIST -mllogs -*.out -*.err -.logs/ pl-training.yml *-predictions/ *-data/ -*.pth *.tar.gz +*.pth +*.csv + +# Use cases files +MNIST +3dgan-generated-data/ +mnist-sample-data/ +exp_data/ + # Custom envs .venv* # Logs logs/ +ml_logs/ +mllogs/ +*.out +*.err +.logs/ +lightning_logs/ +mlruns/ # Byte-compiled / optimized / DLL files __pycache__/ From 55780789fa25a6c5496421226c1f880e7be62f2d Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Dec 2023 14:07:26 +0100 Subject: [PATCH 015/171] Update distrib strategy --- experimental/trainer/general_trainer.py | 321 ++++++++++++++---------- 1 file changed, 184 insertions(+), 137 deletions(-) diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index 1e2542af..86877bcc 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -4,7 +4,14 @@ # version: 211029a # std libs -import argparse, sys, os, time, numpy as np, random +from typing import Any, Union +import argparse +import sys +import os +import time +import numpy as np +import random +import abc # ml libs import deepspeed @@ -16,7 +23,11 @@ import torch.optim as optim from torchvision import datasets, transforms +# from itwinai.types import MLModel, MLDevice + # parsed settings + + def pars_ini(): global args parser = argparse.ArgumentParser(description='PyTorch MNIST Example') @@ -78,11 +89,15 @@ def init_backend(self) -> None: """Initializes the chosen distributed backend""" @abc.abstractmethod - def distribute_model(self,model,device) -> nn.Module: - """ - DDP and DS: achieves data parallelism by synchronising the gradients across - each model replica located in each available computing device. - HVD: returns the same model, parameters updates through broadcast_params method + def distribute_model(self, model: Any, device: Union[int, str]) -> Any: + """Distributes a machine learning model. + + Args: + model (Any): a generic ML model to be distributed. + device (Union[int, str]): device on which the model is run. + + Returns: + Any: distributed model instance. """ @abc.abstractmethod @@ -93,11 +108,11 @@ def broadcast_params(self, distrib_model, optimizer) -> None: """ @abc.abstractmethod - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + def distribute_optimizer(self, optimizer, distrib_model) -> Any: """ - HVD: construct a new DistributedOptimizer, which uses another optimizer - under the hood for computing single-process gradient values and - applying gradient updates after the gradient values have been + HVD: construct a new DistributedOptimizer, which uses another optimizer + under the hood for computing single-process gradient values and + applying gradient updates after the gradient values have been combined across all the Horovod ranks. DDP and DS: returns the same optimizer passed in argument """ @@ -126,7 +141,7 @@ def clean_up(self) -> None: """Destroys the current process group.""" @abc.abstractmethod - def par_allgather_obj(self,obj,gwsize) -> list: + def par_allgather_obj(self, obj, gwsize) -> list: """ Gathers any object from the whole group in a list (to all workers) @@ -135,19 +150,23 @@ def par_allgather_obj(self,obj,gwsize) -> list: class DDPDistributedTrainer(DistributedStrategy): """PyTorch DDP distributed training class""" + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package""" if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) + dist.init_process_group(backend=args.backend) - def distribute_model(self,model,device) -> nn.Module: + def distribute_model(self, model, device) -> nn.Module: """ Achieves data parallelism by synchronising the gradients across each model replica located in each available computing device. """ if torch.cuda.is_available(): - dist_model = torch.nn.parallel.DistributedDataParallel(model,\ - device_ids=[device], output_device=device) + dist_model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device + ) else: dist_model = model @@ -157,7 +176,11 @@ def broadcast_params(self, distrib_model, optimizer) -> None: """Only applicable for Horovod, else pass""" pass - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + def distribute_optimizer( + self, + optimizer, + distrib_model + ) -> optim.Optimizer: """Only applicable for Horovod, else returns the optimizer from the argument""" return optimizer @@ -178,25 +201,27 @@ def dist_grank(self) -> int: def dist_lrank(self) -> int: """Returns the local rank of the current process.""" - return dist.get_rank()%torch.cuda.device_count() + return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() + dist.barrier() + dist.destroy_process_group() - def par_allgather_obj(self,obj,gwsize) -> list: + def par_allgather_obj(self, obj, gwsize) -> list: """ Gathers any object from the whole group in a list (to all workers) """ res = [None]*gwsize - dist.all_gather_object(res,obj) + dist.all_gather_object(res, obj) return res + class DSDistributedTrainer(DistributedStrategy): """DeepSpeed distributed training class""" + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package""" deepspeed.init_distributed(dist_backend=args.backend) @@ -235,30 +260,32 @@ def dist_grank(self) -> int: def dist_lrank(self) -> int: """Returns the local rank of the current process.""" - return dist.get_rank()%torch.cuda.device_count() + return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" deepspeed.sys.exit() - def par_allgather_obj(self,obj,gwsize) -> list: + def par_allgather_obj(self, obj, gwsize) -> list: """ Gathers any object from the whole group in a list (to all workers) """ res = [None]*gwsize - dist.all_gather_object(res,obj) + dist.all_gather_object(res, obj) return res + class HVDDistributedTrainer(DistributedStrategy): """Horovod distributed training class""" + def init_backend(self) -> None: """Initializes the Horovod distributed backend""" hvd.init() def distribute_model(self, model, device) -> nn.Module: """For Horovod, returns the same model passed as argument""" - distrib_model = model + distrib_model = model return distrib_model def broadcast_params(self, distrib_model, optimizer) -> None: @@ -273,9 +300,9 @@ def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: applying gradient updates after the gradient values have been combined across all the Horovod ranks. """ - distOptimizer = hvd.DistributedOptimizer(optimizer, \ - named_parameters=distrib_model.named_parameters(), \ - op = hvd.Average) + distOptimizer = hvd.DistributedOptimizer(optimizer, + named_parameters=distrib_model.named_parameters(), + op=hvd.Average) return distOptimizer def dist_gwsize(self) -> int: @@ -301,7 +328,7 @@ def clean_up(self) -> None: """Shuts Horovod down.""" hvd.shutdown() - def par_allgather_obj(self,obj,gwsize) -> list: + def par_allgather_obj(self, obj, gwsize) -> list: """ Gathers scalar objects across all workers to a list with size(\#worker) @@ -333,11 +360,13 @@ def forward(self, x): return F.log_softmax(x) # train loop + + def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): model.train() t_list = [] - loss_acc=0 - if grank==0: + loss_acc = 0 + if grank == 0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = time.perf_counter() @@ -347,17 +376,19 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if batch_idx % args.log_int == 0 and grank==0: + if batch_idx % args.log_int == 0 and grank == 0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: {loss.item():.6f}') t_list.append(time.perf_counter() - t) - loss_acc+= loss.item() - if grank==0: - print('TIMER: train time', sum(t_list) / len(t_list),'s') + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc # test loop + + def test(model, device, test_loader, grank, gwsize, args): model.eval() test_loss = 0 @@ -366,11 +397,13 @@ def test(model, device, test_loader, grank, gwsize, args): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) - test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss - pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) - if grank==0: + if grank == 0: print( f'Test set: average loss: {test_loss:.4f}\t' f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') @@ -379,37 +412,38 @@ def test(model, device, test_loader, grank, gwsize, args): # save state of the training -def save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best,my_trainer): +def save_state(epoch, distrib_model, loss_acc, optimizer, res_name, grank, gwsize, is_best, my_trainer): rt = time.time() # find if is_best happened in any worker if torch.cuda.is_available(): - is_best_m = my_trainer.par_allgather_obj(is_best,gwsize) - + is_best_m = my_trainer.par_allgather_obj(is_best, gwsize) if torch.cuda.is_available(): if any(is_best_m): # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m)==True)[0][0] + is_best_rank = np.where(np.array(is_best_m) == True)[0][0] # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer' : optimizer.state_dict()} + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} # write on worker with is_best - if grank == is_best_rank: - torch.save(state,'./'+res_name) - print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') else: # collect state state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer' : optimizer.state_dict()} - - torch.save(state,'./'+res_name) - print(f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') # deterministic dataloader @@ -419,7 +453,6 @@ def seed_worker(worker_id): random.seed(worker_seed) - # # # MAIN @@ -432,15 +465,15 @@ def main(): # check CUDA availibility args.cuda = not args.no_cuda and torch.cuda.is_available() - #Strategy for distributed training - if args.strategy=='DDP': - my_trainer = DDPDistributedTrainer() - - elif args.strategy=='DS': - my_trainer = DSDistributedTrainer() + # Strategy for distributed training + if args.strategy == 'DDP': + my_trainer = DDPDistributedTrainer() - elif args.strategy=='HVD': - my_trainer = HVDDistributedTrainer() + elif args.strategy == 'DS': + my_trainer = DSDistributedTrainer() + + elif args.strategy == 'HVD': + my_trainer = HVDDistributedTrainer() # limit # of CPU threads to be used per worker torch.set_num_threads(1) @@ -451,7 +484,6 @@ def main(): # start the time.time for profiling st = time.time() - # initializes the distributed backend which will take care of sychronizing nodes/GPUs my_trainer.init_backend() @@ -463,7 +495,7 @@ def main(): # get job rank info - rank==0 master gpu if torch.cuda.is_available(): - lwsize = my_trainer.dist_lwsize() if args.cuda else 0 # local world size - per node + lwsize = my_trainer.dist_lwsize() if args.cuda else 0 # local world size - per node gwsize = my_trainer.dist_gwsize() # global world size - per run grank = my_trainer.dist_grank() # global rank - assign per run lrank = my_trainer.dist_lrank() # local rank - assign per node @@ -472,54 +504,55 @@ def main(): grank = 0 # some debug - if grank==0: + if grank == 0: print('TIMER: initialise:', time.time()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:',sys.version,'\n') + print('DEBUG: sys.version:', sys.version, '\n') print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:',args.data_dir) - print('DEBUG: args.restart_int:',args.restart_int,'\n') + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.restart_int:', args.restart_int, '\n') print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:',args.batch_size) - print('DEBUG: args.epochs:',args.epochs) - print('DEBUG: args.lr:',args.lr) - print('DEBUG: args.concM:',args.concM) - print('DEBUG: args.momentum:',args.momentum) - print('DEBUG: args.shuff:',args.shuff,'\n') + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.concM:', args.concM) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff, '\n') print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:',args.testrun) - print('DEBUG: args.nseed:',args.nseed) - print('DEBUG: args.log_int:',args.log_int,'\n') + print('DEBUG: args.testrun:', args.testrun) + print('DEBUG: args.nseed:', args.nseed) + print('DEBUG: args.log_int:', args.log_int, '\n') print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:',args.backend) - print('DEBUG: args.nworker:',args.nworker) - print('DEBUG: args.prefetch:',args.prefetch) - print('DEBUG: args.cuda:',args.cuda,'\n') + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.cuda:', args.cuda, '\n') # encapsulate the model on the GPU assigned to the current process - device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu',lrank) + device = torch.device( + 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) if args.cuda: torch.cuda.set_device(lrank) # deterministic testrun if args.testrun: torch.cuda.manual_seed(args.nseed) -# read data +# read data data_dir = args.data_dir mnist_scale = args.concM largeData = [] for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=True, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data train_dataset = torch.utils.data.ConcatDataset(largeData) @@ -529,11 +562,11 @@ def main(): for i in range(mnist_scale): largeData.append( datasets.MNIST(data_dir, train=False, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) # concat data test_dataset = torch.utils.data.ConcatDataset(largeData) @@ -542,30 +575,33 @@ def main(): args.shuff = args.shuff and not args.testrun if torch.cuda.is_available(): train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle = args.shuff) + test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) # distribute dataset to workers # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker>1 else False + pers_w = True if args.nworker > 1 else False # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, 'generator': g} if args.testrun else {} + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} if torch.cuda.is_available(): train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) else: - train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size) + test_loader = torch.utils.data.DataLoader( + test_dataset, batch_size=args.batch_size) - if grank==0: - print('TIMER: read and concat data:', time.time()-st, 's') + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') # create CNN model model = Net().to(device) @@ -574,23 +610,26 @@ def main(): distrib_model = my_trainer.distribute_model(model, device) # optimizer - optimizer = torch.optim.SGD(distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - + optimizer = torch.optim.SGD( + distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + my_trainer.broadcast_params(distrib_model, optimizer) optimizer = my_trainer.distribute_optimizer(optimizer, distrib_model) -# resume state +# resume state start_epoch = 1 best_acc = np.Inf - res_name='checkpoint.pth.tar' + res_name = 'checkpoint.pth.tar' if os.path.isfile(res_name): try: if torch.cuda.is_available(): dist.barrier() # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else {'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) else: checkpoint = torch.load(program_dir+'/'+res_name) start_epoch = checkpoint['epoch'] @@ -598,44 +637,46 @@ def main(): distrib_model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if torch.cuda.is_available(): - if grank==0: + if grank == 0: print(f'WARNING: restarting from {start_epoch} epoch') else: print(f'WARNING: restarting from {start_epoch} epoch') except: if torch.cuda.is_available(): - if grank==0: + if grank == 0: print(f'WARNING: restart file cannot be loaded, restarting!') else: print(f'WARNING: restart file cannot be loaded, restarting!') - if start_epoch>args.epochs: + if start_epoch > args.epochs: if torch.cuda.is_available(): - if grank==0: - print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') - + if grank == 0: + print(f'WARNING: given epochs are less than the one in the restart file!\n' + f'WARNING: SYS.EXIT is issued') + my_trainer.clean_up() sys.exit() else: print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') + f'WARNING: SYS.EXIT is issued') sys.exit() # start trainin/testing loop - if grank==0: + if grank == 0: print('TIMER: broadcast:', time.time()-st, 's') print(f'\nDEBUG: start training') - print(f'--------------------------------------------------------') + print(f'--------------------------------------------------------') et = time.time() for epoch in range(start_epoch, args.epochs + 1): lt = time.time() # training - loss_acc = train(distrib_model, device, train_loader, optimizer, epoch, grank, gwsize, args) + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) # testing - acc_test = test(distrib_model, device, test_loader, grank, gwsize, args) + acc_test = test(distrib_model, device, + test_loader, grank, gwsize, args) # save first epoch timer if epoch == start_epoch: @@ -646,25 +687,27 @@ def main(): train_loader.last_epoch = True test_loader.last_epoch = True - if grank==0: + if grank == 0: print('TIMER: epoch time:', time.time()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state is_best = loss_acc < best_acc if epoch % args.restart_int == 0: - save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,is_best,my_trainer) + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best, my_trainer) # reset best_acc best_acc = min(loss_acc, best_acc) # finalise # save final state - save_state(epoch,distrib_model,loss_acc,optimizer,res_name,grank,gwsize,True,my_trainer) - #if torch.cuda.is_available(): + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, True, my_trainer) + # if torch.cuda.is_available(): # dist.barrier() # some debug - if grank==0: + if grank == 0: print(f'\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -672,20 +715,24 @@ def main(): print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') print('TIMER: total epoch time:', time.time()-et, ' s') if epoch > 1: - print('TIMER: total epoch-1 time:', time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('TIMER: total epoch-1 time:', + time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (time.time()-et-first_ep_t)/(args.epochs-1), ' s') print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:',int(torch.cuda.memory_reserved(lrank)/1024/1024),'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n',torch.cuda.memory_summary(0)) if args.cuda else '' + print('DEBUG: memory req:', int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) if args.cuda else '' - if grank==0: + if grank == 0: print(f'TIMER: final time: {time.time()-st} s\n') my_trainer.clean_up() -if __name__ == "__main__": + +if __name__ == "__main__": main() sys.exit() -#eof +# eof From 7ddb48c684dde7852801756cfae918757dfdac31 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Dec 2023 15:08:13 +0100 Subject: [PATCH 016/171] UPDATE torch distributed strategy classes --- experimental/trainer/general_trainer.py | 275 +--------------------- src/itwinai/distributed.py | 5 + src/itwinai/tensorflow/distributed.py | 7 + src/itwinai/torch/distributed.py | 299 ++++++++++++++++++++++++ 4 files changed, 320 insertions(+), 266 deletions(-) create mode 100644 src/itwinai/distributed.py create mode 100644 src/itwinai/tensorflow/distributed.py create mode 100644 src/itwinai/torch/distributed.py diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index 86877bcc..119f10dd 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -17,13 +17,15 @@ import deepspeed import torch import torch.distributed as dist -import horovod.torch as hvd import torch.nn as nn import torch.nn.functional as F -import torch.optim as optim from torchvision import datasets, transforms -# from itwinai.types import MLModel, MLDevice +from itwinai.torch.distributed import ( + DDPDistributedStrategy, + DSDistributedStrategy, + HVDDistributedStrategy +) # parsed settings @@ -82,265 +84,6 @@ def pars_ini(): args = parser.parse_args() -class DistributedStrategy(abc.ABC): - """Abstract class to define the distributed backend methods""" - @abc.abstractmethod - def init_backend(self) -> None: - """Initializes the chosen distributed backend""" - - @abc.abstractmethod - def distribute_model(self, model: Any, device: Union[int, str]) -> Any: - """Distributes a machine learning model. - - Args: - model (Any): a generic ML model to be distributed. - device (Union[int, str]): device on which the model is run. - - Returns: - Any: distributed model instance. - """ - - @abc.abstractmethod - def broadcast_params(self, distrib_model, optimizer) -> None: - """ - HVD: broadcasts variables from root rank to all other processes - DDP and DS: pass - """ - - @abc.abstractmethod - def distribute_optimizer(self, optimizer, distrib_model) -> Any: - """ - HVD: construct a new DistributedOptimizer, which uses another optimizer - under the hood for computing single-process gradient values and - applying gradient updates after the gradient values have been - combined across all the Horovod ranks. - DDP and DS: returns the same optimizer passed in argument - """ - - @abc.abstractmethod - def dist_gwsize(self) -> int: - """Returns the number of processes""" - - @abc.abstractmethod - def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" - - @abc.abstractmethod - def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size - """ - - @abc.abstractmethod - def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" - - @abc.abstractmethod - def clean_up(self) -> None: - """Destroys the current process group.""" - - @abc.abstractmethod - def par_allgather_obj(self, obj, gwsize) -> list: - """ - Gathers any object from the whole group - in a list (to all workers) - """ - - -class DDPDistributedTrainer(DistributedStrategy): - """PyTorch DDP distributed training class""" - - def init_backend(self) -> None: - """Initializes the distributed process group and the distributed package""" - if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) - - def distribute_model(self, model, device) -> nn.Module: - """ - Achieves data parallelism by synchronising the gradients across - each model replica located in each available computing device. - """ - if torch.cuda.is_available(): - dist_model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device - ) - else: - dist_model = model - - return dist_model - - def broadcast_params(self, distrib_model, optimizer) -> None: - """Only applicable for Horovod, else pass""" - pass - - def distribute_optimizer( - self, - optimizer, - distrib_model - ) -> optim.Optimizer: - """Only applicable for Horovod, else returns the optimizer from the argument""" - return optimizer - - def dist_gwsize(self) -> int: - """Returns the number of processes""" - return dist.get_world_size() - - def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" - return torch.cuda.device_count() - - def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size - """ - return dist.get_rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" - return dist.get_rank() % torch.cuda.device_count() - - def clean_up(self) -> None: - """Destroys the current process group.""" - if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() - - def par_allgather_obj(self, obj, gwsize) -> list: - """ - Gathers any object from the whole group - in a list (to all workers) - """ - res = [None]*gwsize - dist.all_gather_object(res, obj) - return res - - -class DSDistributedTrainer(DistributedStrategy): - """DeepSpeed distributed training class""" - - def init_backend(self) -> None: - """Initializes the distributed process group and the distributed package""" - deepspeed.init_distributed(dist_backend=args.backend) - - def distribute_model(self, model, device) -> nn.Module: - """ - Achieves data parallelism by synchronising the gradients across - each model replica located in each available computing device. - """ - distrib_model, __, __, __ = deepspeed.initialize( - args=args, model=model, model_parameters=model.parameters(), dist_init_required=True) - return distrib_model - - def broadcast_params(self, distrib_model, optimizer) -> None: - """Only applicable for Horovod, else pass""" - pass - - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: - """Only applicable for Horovod, else returns the optimizer from the argument""" - return optimizer - - def dist_gwsize(self) -> int: - """Returns the number of processes""" - return dist.get_world_size() - - def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" - return torch.cuda.device_count() - - def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size - """ - return dist.get_rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" - return dist.get_rank() % torch.cuda.device_count() - - def clean_up(self) -> None: - """Destroys the current process group.""" - deepspeed.sys.exit() - - def par_allgather_obj(self, obj, gwsize) -> list: - """ - Gathers any object from the whole group - in a list (to all workers) - """ - res = [None]*gwsize - dist.all_gather_object(res, obj) - return res - - -class HVDDistributedTrainer(DistributedStrategy): - """Horovod distributed training class""" - - def init_backend(self) -> None: - """Initializes the Horovod distributed backend""" - hvd.init() - - def distribute_model(self, model, device) -> nn.Module: - """For Horovod, returns the same model passed as argument""" - distrib_model = model - return distrib_model - - def broadcast_params(self, distrib_model, optimizer) -> None: - """Broadcasts variables from root rank to all other processes""" - hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) - hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: - """ - Construct a new DistributedOptimizer, which uses another optimizer - under the hood for computing single-process gradient values and - applying gradient updates after the gradient values have been - combined across all the Horovod ranks. - """ - distOptimizer = hvd.DistributedOptimizer(optimizer, - named_parameters=distrib_model.named_parameters(), - op=hvd.Average) - return distOptimizer - - def dist_gwsize(self) -> int: - """Returns the number of processes""" - return hvd.size() - - def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" - return hvd.local_size() - - def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size - """ - return hvd.rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" - return hvd.local_rank() - - def clean_up(self) -> None: - """Shuts Horovod down.""" - hvd.shutdown() - - def par_allgather_obj(self, obj, gwsize) -> list: - """ - Gathers scalar objects across - all workers to a list with size(\#worker) - uses horovod communicator - @param obj object in a worker - @param gwsize global world size - - @return gathered list with size(#worker) - """ - return hvd.allgather_object(obj) - - class Net(nn.Module): def __init__(self): super(Net, self).__init__() @@ -467,13 +210,13 @@ def main(): # Strategy for distributed training if args.strategy == 'DDP': - my_trainer = DDPDistributedTrainer() + my_trainer = DDPDistributedStrategy() elif args.strategy == 'DS': - my_trainer = DSDistributedTrainer() + my_trainer = DSDistributedStrategy() elif args.strategy == 'HVD': - my_trainer = HVDDistributedTrainer() + my_trainer = HVDDistributedStrategy() # limit # of CPU threads to be used per worker torch.set_num_threads(1) @@ -485,7 +228,7 @@ def main(): st = time.time() # initializes the distributed backend which will take care of sychronizing nodes/GPUs - my_trainer.init_backend() + my_trainer.init_backend(backend=args.backend) # deterministic testrun if args.testrun: diff --git a/src/itwinai/distributed.py b/src/itwinai/distributed.py new file mode 100644 index 00000000..868f993a --- /dev/null +++ b/src/itwinai/distributed.py @@ -0,0 +1,5 @@ +import abc + + +class DistributedStrategy(abc.ABC): + """Abstract class to define the distributed backend methods.""" diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py new file mode 100644 index 00000000..36d29c7b --- /dev/null +++ b/src/itwinai/tensorflow/distributed.py @@ -0,0 +1,7 @@ +from ..distributed import DistributedStrategy + + +class TFDistributedStrategy(DistributedStrategy): + """Abstract class to define the distributed backend methods for + TensorFlow models. + """ diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py new file mode 100644 index 00000000..b2dad87e --- /dev/null +++ b/src/itwinai/torch/distributed.py @@ -0,0 +1,299 @@ +import abc +from typing import Any, Union, List + +import deepspeed +import torch +import torch.distributed as dist +import horovod.torch as hvd +import torch.nn as nn +import torch.optim as optim + +from ..distributed import DistributedStrategy + + +class TorchDistributedStrategy(DistributedStrategy): + """Abstract class to define the distributed backend methods for + PyTorch models. + """ + @abc.abstractmethod + def init_backend(self, *args, **kwargs) -> None: + """Initializes the chosen distributed backend""" + + @abc.abstractmethod + def distribute_model(self, model: Any, device: Union[int, str]) -> Any: + """Distributes a machine learning model. + + Args: + model (Any): a generic ML model to be distributed. + device (Union[int, str]): device on which the model is run. + + Returns: + Any: distributed model instance. + """ + + @abc.abstractmethod + def broadcast_params(self, distrib_model: Any, optimizer: Any) -> None: + """Broadcasts variables from root rank to all other processes/ + + Args: + distrib_model (Any): distributed model. + optimizer (Any): optimizer. + """ + + @abc.abstractmethod + def distribute_optimizer(self, optimizer: Any, distrib_model: Any) -> Any: + """Distribute optimizer. + + Args: + optimizer (Any): optimizer. + distrib_model (Any): distributed model. + + Returns: + Any: distributed optimizer. + """ + + @abc.abstractmethod + def dist_gwsize(self) -> int: + """Returns the total number of processes (global word size). + + Returns: + int: global word size. + """ + + @abc.abstractmethod + def dist_lwsize(self) -> int: + """Returns the number of local workers available on a node + (local word size). Usually it is equal to the number of available GPUs. + + Returns: + int: local word size. + """ + + @abc.abstractmethod + def dist_grank(self) -> int: + """Returns the global rank of the current process. + Rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + + @abc.abstractmethod + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + + @abc.abstractmethod + def clean_up(self) -> None: + """Cleans up resources allocated by distributed strategy.""" + + @abc.abstractmethod + def par_allgather_obj(self, obj: Any) -> List[Any]: + """Gathers any object from the whole group in a list (to all workers). + + Args: + obj (Any): object to gather from all workers. + + Returns: + List[Any]: list of objects gathered from all workers. + """ + + +class DDPDistributedStrategy(TorchDistributedStrategy): + """PyTorch DDP distributed training class""" + + def init_backend(self, backend: str, *args, **kwargs) -> None: + """Initializes the distributed process group and the distributed + package. + """ + if torch.cuda.is_available(): + dist.init_process_group(backend=backend) + + def distribute_model(self, model, device) -> nn.Module: + """ + Achieves data parallelism by synchronising the gradients across + each model replica located in each available computing device. + """ + if torch.cuda.is_available(): + dist_model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device + ) + else: + dist_model = model + + return dist_model + + def broadcast_params(self, distrib_model, optimizer) -> None: + """Only applicable for Horovod, else pass""" + pass + + def distribute_optimizer( + self, + optimizer, + distrib_model + ) -> optim.Optimizer: + """Only applicable for Horovod, else returns the optimizer from theargument""" + return optimizer + + def dist_gwsize(self) -> int: + """Returns the number of processes""" + return dist.get_world_size() + + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" + return torch.cuda.device_count() + + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ + return dist.get_rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" + return dist.get_rank() % torch.cuda.device_count() + + def clean_up(self) -> None: + """Destroys the current process group.""" + if torch.cuda.is_available(): + dist.barrier() + dist.destroy_process_group() + + def par_allgather_obj(self, obj) -> List[Any]: + """ + Gathers any object from the whole group + in a list (to all workers) + """ + res = [None] * self.dist_gwsize() + dist.all_gather_object(res, obj) + return res + + +class DSDistributedStrategy(TorchDistributedStrategy): + """DeepSpeed distributed training class""" + + def init_backend(self, backend: str, *args, **kwargs) -> None: + """Initializes the distributed process group and the distributed + package. + """ + deepspeed.init_distributed(dist_backend=backend) + + def distribute_model(self, model, device) -> nn.Module: + """ + Achieves data parallelism by synchronising the gradients across + each model replica located in each available computing device. + """ + distrib_model, __, __, __ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), dist_init_required=True) + return distrib_model + + def broadcast_params(self, distrib_model, optimizer) -> None: + """Only applicable for Horovod, else pass""" + pass + + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """Only applicable for Horovod, else returns the optimizer from the argument""" + return optimizer + + def dist_gwsize(self) -> int: + """Returns the number of processes""" + return dist.get_world_size() + + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" + return torch.cuda.device_count() + + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ + return dist.get_rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" + return dist.get_rank() % torch.cuda.device_count() + + def clean_up(self) -> None: + """Destroys the current process group.""" + deepspeed.sys.exit() + + def par_allgather_obj(self, obj) -> list: + """ + Gathers any object from the whole group + in a list (to all workers) + """ + res = [None] * self.dist_gwsize() + dist.all_gather_object(res, obj) + return res + + +class HVDDistributedStrategy(TorchDistributedStrategy): + """Horovod distributed training class""" + + def init_backend(self, *args, **kwargs) -> None: + """Initializes the Horovod distributed backend""" + hvd.init() + + def distribute_model(self, model, device) -> nn.Module: + """For Horovod, returns the same model passed as argument""" + distrib_model = model + return distrib_model + + def broadcast_params(self, distrib_model, optimizer) -> None: + """Broadcasts variables from root rank to all other processes""" + hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=-0) + + def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: + """ + Construct a new DistributedOptimizer, which uses another optimizer + under the hood for computing single-process gradient values and + applying gradient updates after the gradient values have been + combined across all the Horovod ranks. + """ + distOptimizer = hvd.DistributedOptimizer(optimizer, + named_parameters=distrib_model.named_parameters(), + op=hvd.Average) + return distOptimizer + + def dist_gwsize(self) -> int: + """Returns the number of processes""" + return hvd.size() + + def dist_lwsize(self) -> int: + """Returns the number of GPUs available""" + return hvd.local_size() + + def dist_grank(self) -> int: + """ + Returns the rank of the current process. + Rank ranges from 0 to world_size + """ + return hvd.rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process.""" + return hvd.local_rank() + + def clean_up(self) -> None: + """Shuts Horovod down.""" + hvd.shutdown() + + def par_allgather_obj(self, obj, gwsize) -> list: + """ + Gathers scalar objects across + all workers to a list with size(\#worker) + uses horovod communicator + @param obj object in a worker + @param gwsize global world size + + @return gathered list with size(#worker) + """ + return hvd.allgather_object(obj) From c81f6943b0c07eead5075e7dd9cb3eae1e482e07 Mon Sep 17 00:00:00 2001 From: r-sarma Date: Fri, 22 Dec 2023 10:21:52 +0100 Subject: [PATCH 017/171] Updated docstrings --- src/itwinai/torch/distributed.py | 291 ++++++++++++++++++++++++------- 1 file changed, 226 insertions(+), 65 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index b2dad87e..bcc6214b 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -32,21 +32,21 @@ def distribute_model(self, model: Any, device: Union[int, str]) -> Any: """ @abc.abstractmethod - def broadcast_params(self, distrib_model: Any, optimizer: Any) -> None: + def broadcast_params(self, model: Any, optimizer: Any) -> None: """Broadcasts variables from root rank to all other processes/ Args: - distrib_model (Any): distributed model. + model (Any): distributed model. optimizer (Any): optimizer. """ @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any, distrib_model: Any) -> Any: + def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: """Distribute optimizer. Args: optimizer (Any): optimizer. - distrib_model (Any): distributed model. + model (Any): distributed model. Returns: Any: distributed optimizer. @@ -103,19 +103,34 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: class DDPDistributedStrategy(TorchDistributedStrategy): - """PyTorch DDP distributed training class""" + """PyTorch DDP distributed training class + + Args: + TorchDistributedStrategy (Any): Parent class + """ def init_backend(self, backend: str, *args, **kwargs) -> None: """Initializes the distributed process group and the distributed package. + + Args: + backend (str): Name of the communication backend to employ """ if torch.cuda.is_available(): dist.init_process_group(backend=backend) def distribute_model(self, model, device) -> nn.Module: - """ - Achieves data parallelism by synchronising the gradients across - each model replica located in each available computing device. + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. + + Args: + model (nn.Module): ML model to be distributed + device (Union[int, str]): Compute device to be used + + Returns: + nn.Module: Distributed model replicas across all devices + that are to be synchronized """ if torch.cuda.is_available(): dist_model = torch.nn.parallel.DistributedDataParallel( @@ -128,47 +143,86 @@ def distribute_model(self, model, device) -> nn.Module: return dist_model - def broadcast_params(self, distrib_model, optimizer) -> None: - """Only applicable for Horovod, else pass""" + def broadcast_params( + self, + model, + optimizer + ) -> None: + """Only applicable for Horovod, else pass + + Args: + model (Any): ML model + optimizer (Any): Optimizer + """ pass def distribute_optimizer( self, optimizer, - distrib_model + model ) -> optim.Optimizer: - """Only applicable for Horovod, else returns the optimizer from theargument""" + """Only applicable for Horovod, else returns the + optimizer from argument + + Args: + optimizer (Any): Optimizer + model (Any): ML model + + Returns: + optim.Optimizer: Distributed optimizer + """ return optimizer def dist_gwsize(self) -> int: - """Returns the number of processes""" + """Returns the total number of processes (global world size) + + Returns: + int: global world size + """ return dist.get_world_size() def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" + """Returns the local number of workers available per node, + which is usually the number of GPUs available + + Returns: + int: local world size + """ return torch.cuda.device_count() def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size + """Returns the global rank of the current process, where + rank ranges from 0 to world_size + + Returns: + int: global rank """ return dist.get_rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" + """Returns the local rank of the current process + + Returns: + int: local rank + """ return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: - """Destroys the current process group.""" + """Destroys the current process group + """ if torch.cuda.is_available(): dist.barrier() dist.destroy_process_group() def par_allgather_obj(self, obj) -> List[Any]: - """ - Gathers any object from the whole group + """Gathers any object from the whole group in a list (to all workers) + + Args: + obj (Any): Object to gather from all workers + + Returns: + List[Any]: List of gathered objects """ res = [None] * self.dist_gwsize() dist.all_gather_object(res, obj) @@ -176,58 +230,116 @@ def par_allgather_obj(self, obj) -> List[Any]: class DSDistributedStrategy(TorchDistributedStrategy): - """DeepSpeed distributed training class""" + """DeepSpeed distributed training class + + Args: + TorchDistributedStrategy (Any): Parent class + """ def init_backend(self, backend: str, *args, **kwargs) -> None: """Initializes the distributed process group and the distributed package. + + Args: + backend (str): Name of the communication backend to employ """ deepspeed.init_distributed(dist_backend=backend) def distribute_model(self, model, device) -> nn.Module: - """ - Achieves data parallelism by synchronising the gradients across - each model replica located in each available computing device. + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. + + Args: + model (nn.Module): ML model to be distributed + device (Union[int, str]): Compute device to be used + + Returns: + nn.Module: Distributed model replicas across all devices + that are to be synchronized """ distrib_model, __, __, __ = deepspeed.initialize( - args=args, model=model, model_parameters=model.parameters(), dist_init_required=True) + args=args, + model=model, + model_parameters=model.parameters(), + dist_init_required=True + ) return distrib_model def broadcast_params(self, distrib_model, optimizer) -> None: - """Only applicable for Horovod, else pass""" + """Only applicable for Horovod, else pass + + Args: + model (Any): ML model + optimizer (Any): Optimizer + """ pass - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: - """Only applicable for Horovod, else returns the optimizer from the argument""" + def distribute_optimizer( + self, + optimizer, + distrib_model + ) -> optim.Optimizer: + """Only applicable for Horovod, else returns the + optimizer from argument + + Args: + optimizer (Any): Optimizer + model (Any): ML model + + Returns: + optim.Optimizer: Distributed optimizer + """ return optimizer def dist_gwsize(self) -> int: - """Returns the number of processes""" + """Returns the total number of processes (global world size) + + Returns: + int: global world size + """ return dist.get_world_size() def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" + """Returns the local number of workers available per node, + which is usually the number of GPUs available + + Returns: + int: local world size + """ return torch.cuda.device_count() def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size + """Returns the global rank of the current process, where + rank ranges from 0 to world_size + + Returns: + int: global rank """ return dist.get_rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" + """Returns the local rank of the current process + + Returns: + int: local rank + """ return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: - """Destroys the current process group.""" + """Destroys the current process group + """ deepspeed.sys.exit() def par_allgather_obj(self, obj) -> list: - """ - Gathers any object from the whole group + """Gathers any object from the whole group in a list (to all workers) + + Args: + obj (Any): Object to gather from all workers + + Returns: + List[Any]: List of gathered objects """ res = [None] * self.dist_gwsize() dist.all_gather_object(res, obj) @@ -235,65 +347,114 @@ def par_allgather_obj(self, obj) -> list: class HVDDistributedStrategy(TorchDistributedStrategy): - """Horovod distributed training class""" + """Horovod distributed training class + + Args: + TorchDistributedStrategy (Any): Parent class + """ def init_backend(self, *args, **kwargs) -> None: - """Initializes the Horovod distributed backend""" + """Initializes the Horovod distributed backend + """ hvd.init() def distribute_model(self, model, device) -> nn.Module: - """For Horovod, returns the same model passed as argument""" + """Only applicable for DDP and DeepSpeed. + For Horovod, returns the same model passed as argument + + Args: + model (nn.Module): ML model to be distributed + device (Union[int, str]): Compute device to be used + + Returns: + nn.Module: ML model passed in the argument + """ distrib_model = model return distrib_model def broadcast_params(self, distrib_model, optimizer) -> None: - """Broadcasts variables from root rank to all other processes""" + """Broadcasts variables from root rank to all other processes + + Args: + distrib_model (nn.Module): ML model that is to be broadcasted + across processes + optimizer (optim.Optimizer): Optimizer that is to be broadcasted + across processes + """ hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - def distribute_optimizer(self, optimizer, distrib_model) -> optim.Optimizer: - """ - Construct a new DistributedOptimizer, which uses another optimizer - under the hood for computing single-process gradient values and - applying gradient updates after the gradient values have been - combined across all the Horovod ranks. + def distribute_optimizer( + self, + optimizer, + distrib_model + ) -> optim.Optimizer: + """Constructs a DistributedOptimizer, for computing single-process + gradient values and applying gradient updates after the gradient values + have been combined across all the Horovod ranks. + + Args: + optimizer (optim.Optimizer): Optimizer to be distributed + distrib_model (nn.Module): ML model to be trained + + Returns: + optim.Optimizer: Distributed optimizer across all ranks """ - distOptimizer = hvd.DistributedOptimizer(optimizer, - named_parameters=distrib_model.named_parameters(), - op=hvd.Average) + distOptimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=distrib_model.named_parameters(), + op=hvd.Average + ) return distOptimizer def dist_gwsize(self) -> int: - """Returns the number of processes""" + """Returns the total number of processes (global world size) + + Returns: + int: global world size + """ return hvd.size() def dist_lwsize(self) -> int: - """Returns the number of GPUs available""" + """Returns the local number of workers available per node, + which is usually the number of GPUs available + + Returns: + int: local world size + """ return hvd.local_size() def dist_grank(self) -> int: - """ - Returns the rank of the current process. - Rank ranges from 0 to world_size + """Returns the global rank of the current process, where + rank ranges from 0 to world_size + + Returns: + int: global rank """ return hvd.rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process.""" + """Returns the local rank of the current process + + Returns: + int: local rank + """ return hvd.local_rank() def clean_up(self) -> None: - """Shuts Horovod down.""" + """Shuts Horovod down + """ hvd.shutdown() def par_allgather_obj(self, obj, gwsize) -> list: - """ - Gathers scalar objects across - all workers to a list with size(\#worker) - uses horovod communicator - @param obj object in a worker - @param gwsize global world size + """Gathers scalar objects across all workers to a + list with size(#worker), uses horovod communicator - @return gathered list with size(#worker) + Args: + obj (Any): object in a worker + gwsize (int): global world size + + Returns: + list: gathered list with size(#worker) """ return hvd.allgather_object(obj) From 21bba18ab19a55ef0fde35fe515e4dcd8aa19851 Mon Sep 17 00:00:00 2001 From: r-sarma Date: Fri, 22 Dec 2023 10:45:11 +0100 Subject: [PATCH 018/171] Small fixes --- src/itwinai/torch/distributed.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index bcc6214b..90ecc9fb 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -120,8 +120,8 @@ def init_backend(self, backend: str, *args, **kwargs) -> None: dist.init_process_group(backend=backend) def distribute_model(self, model, device) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available computing device. Args: @@ -129,7 +129,7 @@ def distribute_model(self, model, device) -> nn.Module: device (Union[int, str]): Compute device to be used Returns: - nn.Module: Distributed model replicas across all devices + nn.Module: Distributed model replicas across all devices that are to be synchronized """ if torch.cuda.is_available(): @@ -215,7 +215,7 @@ def clean_up(self) -> None: dist.destroy_process_group() def par_allgather_obj(self, obj) -> List[Any]: - """Gathers any object from the whole group + """Gathers any object from the whole group in a list (to all workers) Args: @@ -246,8 +246,8 @@ def init_backend(self, backend: str, *args, **kwargs) -> None: deepspeed.init_distributed(dist_backend=backend) def distribute_model(self, model, device) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available computing device. Args: @@ -255,11 +255,11 @@ def distribute_model(self, model, device) -> nn.Module: device (Union[int, str]): Compute device to be used Returns: - nn.Module: Distributed model replicas across all devices + nn.Module: Distributed model replicas across all devices that are to be synchronized """ distrib_model, __, __, __ = deepspeed.initialize( - args=args, + args=None, model=model, model_parameters=model.parameters(), dist_init_required=True @@ -332,7 +332,7 @@ def clean_up(self) -> None: deepspeed.sys.exit() def par_allgather_obj(self, obj) -> list: - """Gathers any object from the whole group + """Gathers any object from the whole group in a list (to all workers) Args: @@ -376,10 +376,10 @@ def broadcast_params(self, distrib_model, optimizer) -> None: """Broadcasts variables from root rank to all other processes Args: - distrib_model (nn.Module): ML model that is to be broadcasted + distrib_model (nn.Module): ML model that is to be broadcasted + across processes + optimizer (optim.Optimizer): Optimizer that is to be broadcasted across processes - optimizer (optim.Optimizer): Optimizer that is to be broadcasted - across processes """ hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) @@ -398,7 +398,7 @@ def distribute_optimizer( distrib_model (nn.Module): ML model to be trained Returns: - optim.Optimizer: Distributed optimizer across all ranks + optim.Optimizer: Distributed optimizer across all ranks """ distOptimizer = hvd.DistributedOptimizer( optimizer, @@ -447,7 +447,7 @@ def clean_up(self) -> None: hvd.shutdown() def par_allgather_obj(self, obj, gwsize) -> list: - """Gathers scalar objects across all workers to a + """Gathers scalar objects across all workers to a list with size(#worker), uses horovod communicator Args: From d20f7034e7f6c0f97b9f4ce8b39bbdf51e4eacad Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 15 Jan 2024 17:33:03 +0100 Subject: [PATCH 019/171] UPDATE docstrings --- src/itwinai/torch/distributed.py | 214 +++++++++++++++---------------- 1 file changed, 102 insertions(+), 112 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 90ecc9fb..11e608cb 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -103,34 +103,32 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: class DDPDistributedStrategy(TorchDistributedStrategy): - """PyTorch DDP distributed training class - - Args: - TorchDistributedStrategy (Any): Parent class - """ + """PyTorch DDP distributed strategy class.""" def init_backend(self, backend: str, *args, **kwargs) -> None: """Initializes the distributed process group and the distributed package. Args: - backend (str): Name of the communication backend to employ + backend (str): Name of the communication backend to employ. """ if torch.cuda.is_available(): dist.init_process_group(backend=backend) - def distribute_model(self, model, device) -> nn.Module: + def distribute_model( + self, model: nn.Module, device: Union[int, str] + ) -> nn.Module: """Achieves data parallelism by synchronizing the gradients across each model replica located in each available computing device. Args: - model (nn.Module): ML model to be distributed - device (Union[int, str]): Compute device to be used + model (nn.Module): ML model to be distributed. + device (Union[int, str]): Compute device to be used. Returns: - nn.Module: Distributed model replicas across all devices - that are to be synchronized + nn.Module: Distributed model replicas across all devices. + that are to be synchronized. """ if torch.cuda.is_available(): dist_model = torch.nn.parallel.DistributedDataParallel( @@ -145,84 +143,82 @@ def distribute_model(self, model, device) -> nn.Module: def broadcast_params( self, - model, - optimizer + model: nn.Module, + optimizer: optim.Optimizer ) -> None: - """Only applicable for Horovod, else pass + """Do nothing. Only applicable for Horovod. Args: - model (Any): ML model - optimizer (Any): Optimizer + model (nn.Module): ML model + optimizer (optim.Optimizer): Optimizer """ pass def distribute_optimizer( self, - optimizer, - model + optimizer: optim.Optimizer, + model: nn.Module ) -> optim.Optimizer: - """Only applicable for Horovod, else returns the - optimizer from argument + """Returns the optimizer from argument. Args: - optimizer (Any): Optimizer - model (Any): ML model + optimizer (optim.Optimizer): optimizer. + model (nn.Module): ML model. Unused here. Returns: - optim.Optimizer: Distributed optimizer + optim.Optimizer: Distributed optimizer. """ return optimizer def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size) + """Returns the total number of processes (global world size). Returns: - int: global world size + int: global world size. """ return dist.get_world_size() def dist_lwsize(self) -> int: """Returns the local number of workers available per node, - which is usually the number of GPUs available + which is usually the number of GPUs available. Returns: - int: local world size + int: local world size. """ return torch.cuda.device_count() def dist_grank(self) -> int: """Returns the global rank of the current process, where - rank ranges from 0 to world_size + rank ranges from 0 to world_size. Returns: - int: global rank + int: global rank. """ return dist.get_rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process + """Returns the local rank of the current process. Returns: - int: local rank + int: local rank. """ return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: - """Destroys the current process group - """ + """Destroys the current process group.""" if torch.cuda.is_available(): dist.barrier() dist.destroy_process_group() - def par_allgather_obj(self, obj) -> List[Any]: + def par_allgather_obj(self, obj: Any) -> List[Any]: """Gathers any object from the whole group - in a list (to all workers) + in a list (to all workers). Args: - obj (Any): Object to gather from all workers + obj (Any): Object to gather from all workers. Returns: - List[Any]: List of gathered objects + List[Any]: List of gathered objects. """ res = [None] * self.dist_gwsize() dist.all_gather_object(res, obj) @@ -230,33 +226,31 @@ def par_allgather_obj(self, obj) -> List[Any]: class DSDistributedStrategy(TorchDistributedStrategy): - """DeepSpeed distributed training class - - Args: - TorchDistributedStrategy (Any): Parent class - """ + """DeepSpeed distributed strategy class.""" def init_backend(self, backend: str, *args, **kwargs) -> None: """Initializes the distributed process group and the distributed package. Args: - backend (str): Name of the communication backend to employ + backend (str): Name of the communication backend to employ. """ deepspeed.init_distributed(dist_backend=backend) - def distribute_model(self, model, device) -> nn.Module: + def distribute_model( + self, model: nn.Module, device: Union[int, str] + ) -> nn.Module: """Achieves data parallelism by synchronizing the gradients across each model replica located in each available computing device. Args: - model (nn.Module): ML model to be distributed - device (Union[int, str]): Compute device to be used + model (nn.Module): ML model to be distributed. + device (Union[int, str]): Compute device to be used. Returns: nn.Module: Distributed model replicas across all devices - that are to be synchronized + that are to be synchronized. """ distrib_model, __, __, __ = deepspeed.initialize( args=None, @@ -266,80 +260,80 @@ def distribute_model(self, model, device) -> nn.Module: ) return distrib_model - def broadcast_params(self, distrib_model, optimizer) -> None: - """Only applicable for Horovod, else pass + def broadcast_params( + self, model: nn.Module, optimizer: optim.Optimizer + ) -> None: + """Only applicable for Horovod. Does nothing. Args: - model (Any): ML model - optimizer (Any): Optimizer + model (nn.Module): ML model. + optimizer (optim.Optimizer): optimizer. """ pass def distribute_optimizer( self, - optimizer, - distrib_model + optimizer: optim.Optimizer, + model: nn.Module ) -> optim.Optimizer: - """Only applicable for Horovod, else returns the - optimizer from argument + """Returns the optimizer from argument. Args: - optimizer (Any): Optimizer - model (Any): ML model + optimizer (optim.Optimizer): torch optimizer. + model (nn.Module): torch neural network. Returns: - optim.Optimizer: Distributed optimizer + optim.Optimizer: distributed optimizer. """ return optimizer def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size) + """Returns the total number of processes (global world size). Returns: - int: global world size + int: global world size. """ return dist.get_world_size() def dist_lwsize(self) -> int: """Returns the local number of workers available per node, - which is usually the number of GPUs available + which is usually the number of GPUs available. Returns: - int: local world size + int: local world size. """ return torch.cuda.device_count() def dist_grank(self) -> int: """Returns the global rank of the current process, where - rank ranges from 0 to world_size + rank ranges from 0 to world_size. Returns: - int: global rank + int: global rank. """ return dist.get_rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process + """Returns the local rank of the current process. Returns: - int: local rank + int: local rank. """ return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: - """Destroys the current process group - """ + """Destroys the current process group.""" deepspeed.sys.exit() - def par_allgather_obj(self, obj) -> list: + def par_allgather_obj(self, obj: Any) -> list[Any]: """Gathers any object from the whole group - in a list (to all workers) + in a list (to all workers). Args: - obj (Any): Object to gather from all workers + obj (Any): Object to gather from all workers. Returns: - List[Any]: List of gathered objects + List[Any]: List of gathered objects. """ res = [None] * self.dist_gwsize() dist.all_gather_object(res, obj) @@ -347,114 +341,110 @@ def par_allgather_obj(self, obj) -> list: class HVDDistributedStrategy(TorchDistributedStrategy): - """Horovod distributed training class - - Args: - TorchDistributedStrategy (Any): Parent class - """ + """Horovod distributed strategy class.""" def init_backend(self, *args, **kwargs) -> None: - """Initializes the Horovod distributed backend - """ + """Initializes the Horovod distributed backend.""" hvd.init() - def distribute_model(self, model, device) -> nn.Module: + def distribute_model( + self, model: nn.Module, device: Union[int, str] + ) -> nn.Module: """Only applicable for DDP and DeepSpeed. - For Horovod, returns the same model passed as argument + For Horovod, returns the same model passed as argument. Args: - model (nn.Module): ML model to be distributed - device (Union[int, str]): Compute device to be used + model (nn.Module): ML model to be distributed. + device (Union[int, str]): Compute device to be used. Returns: - nn.Module: ML model passed in the argument + nn.Module: ML model passed in the argument. """ - distrib_model = model - return distrib_model + return model - def broadcast_params(self, distrib_model, optimizer) -> None: - """Broadcasts variables from root rank to all other processes + def broadcast_params( + self, model: nn.Module, optimizer: optim.Optimizer + ) -> None: + """Broadcasts variables from root rank to all other processes. Args: - distrib_model (nn.Module): ML model that is to be broadcasted - across processes + model (nn.Module): ML model that is to be broadcasted + across processes. optimizer (optim.Optimizer): Optimizer that is to be broadcasted - across processes + across processes. """ - hvd.broadcast_parameters(distrib_model.state_dict(), root_rank=0) + hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) def distribute_optimizer( self, - optimizer, - distrib_model + optimizer: optim.Optimizer, + model: nn.Module ) -> optim.Optimizer: """Constructs a DistributedOptimizer, for computing single-process gradient values and applying gradient updates after the gradient values have been combined across all the Horovod ranks. Args: - optimizer (optim.Optimizer): Optimizer to be distributed - distrib_model (nn.Module): ML model to be trained + optimizer (optim.Optimizer): Optimizer to be distributed. + model (nn.Module): ML model to be trained. Returns: - optim.Optimizer: Distributed optimizer across all ranks + optim.Optimizer: Distributed optimizer across all ranks. """ distOptimizer = hvd.DistributedOptimizer( optimizer, - named_parameters=distrib_model.named_parameters(), + named_parameters=model.named_parameters(), op=hvd.Average ) return distOptimizer def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size) + """Returns the total number of processes (global world size). Returns: - int: global world size + int: global world size. """ return hvd.size() def dist_lwsize(self) -> int: """Returns the local number of workers available per node, - which is usually the number of GPUs available + which is usually the number of GPUs available. Returns: - int: local world size + int: local world size. """ return hvd.local_size() def dist_grank(self) -> int: """Returns the global rank of the current process, where - rank ranges from 0 to world_size + rank ranges from 0 to world_size. Returns: - int: global rank + int: global rank. """ return hvd.rank() def dist_lrank(self) -> int: - """Returns the local rank of the current process + """Returns the local rank of the current process. Returns: - int: local rank + int: local rank. """ return hvd.local_rank() def clean_up(self) -> None: - """Shuts Horovod down - """ + """Shuts Horovod down.""" hvd.shutdown() - def par_allgather_obj(self, obj, gwsize) -> list: + def par_allgather_obj(self, obj: Any) -> list[Any]: """Gathers scalar objects across all workers to a list with size(#worker), uses horovod communicator Args: - obj (Any): object in a worker - gwsize (int): global world size + obj (Any): object in a worker. Returns: - list: gathered list with size(#worker) + list: gathered list with size(#worker). """ return hvd.allgather_object(obj) From 3187f6271a6f56ac415face9d5557769e746a64a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 1 Feb 2024 16:22:58 +0100 Subject: [PATCH 020/171] ADD deepespeed config loader --- .vscode/settings.json | 3 --- src/itwinai/torch/distributed.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 38dc1230..896f98c2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,4 @@ { - "python.linting.flake8Enabled": true, - "python.linting.enabled": true, "editor.formatOnSave": true, "editor.defaultFormatter": null, "cSpell.ignoreWords": [ @@ -55,7 +53,6 @@ "[python]": { "editor.defaultFormatter": "ms-python.autopep8" }, - "python.formatting.provider": "none", "[markdown]": { "editor.formatOnSave": false }, diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 11e608cb..8158fb79 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1,5 +1,7 @@ import abc -from typing import Any, Union, List +from typing import Any, Union, List, Dict +from pathlib import Path +import json import deepspeed import torch @@ -228,15 +230,35 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: class DSDistributedStrategy(TorchDistributedStrategy): """DeepSpeed distributed strategy class.""" - def init_backend(self, backend: str, *args, **kwargs) -> None: + config: Dict = None + + def init_backend( + self, + backend: str, + ds_config: Union[Dict, Path, str], + *args, **kwargs + ) -> None: """Initializes the distributed process group and the distributed package. Args: backend (str): Name of the communication backend to employ. + ds_config (Union[dict, Path, str]): DeepSpeed config. Either a + dictionary or a path to a JSON file. """ + # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization + self._load_config(ds_config) deepspeed.init_distributed(dist_backend=backend) + def _load_config(self, ds_config): + if isinstance(ds_config, (str, Path)): + with open(ds_config) as fp: + self.config = json.load(fp) + elif isinstance(ds_config, dict): + self.config = ds_config + else: + raise ValueError("ds_config is not a dictionary not a path.") + def distribute_model( self, model: nn.Module, device: Union[int, str] ) -> nn.Module: @@ -252,11 +274,12 @@ def distribute_model( nn.Module: Distributed model replicas across all devices that are to be synchronized. """ + # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization distrib_model, __, __, __ = deepspeed.initialize( - args=None, model=model, model_parameters=model.parameters(), - dist_init_required=True + dist_init_required=True, + config=self.config ) return distrib_model From 1ba8b8b33e75feee6061d544b5e805067d49dc57 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 8 Feb 2024 19:47:00 +0100 Subject: [PATCH 021/171] ADD first deepspeed tutorial draft --- env-files/torch/pytorch-env-gpu.yml | 1 + experimental/trainer/general_trainer.py | 5 +- pyproject.toml | 8 +- src/itwinai/torch/distributed.py | 585 ++++++++++++++++-- .../ex0.1_multiple_torch_strategies.py | 149 +++++ .../ex0_multiple_torch_strategies.py | 149 +++++ .../ex0_multiple_torch_strategies.sh | 137 ++++ 7 files changed, 987 insertions(+), 47 deletions(-) create mode 100644 tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py create mode 100644 tutorials/distributed-ml/ex0_multiple_torch_strategies.py create mode 100644 tutorials/distributed-ml/ex0_multiple_torch_strategies.sh diff --git a/env-files/torch/pytorch-env-gpu.yml b/env-files/torch/pytorch-env-gpu.yml index 1c82cc30..6352cd0b 100644 --- a/env-files/torch/pytorch-env-gpu.yml +++ b/env-files/torch/pytorch-env-gpu.yml @@ -16,3 +16,4 @@ dependencies: - cudatoolkit=10.1 - lightning=2.0.0 - torchmetrics + - cuda-compiler diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index 119f10dd..ae9d7743 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -238,8 +238,9 @@ def main(): # get job rank info - rank==0 master gpu if torch.cuda.is_available(): - lwsize = my_trainer.dist_lwsize() if args.cuda else 0 # local world size - per node - gwsize = my_trainer.dist_gwsize() # global world size - per run + # local world size - per node + lwsize = my_trainer.local_world_size() if args.cuda else 0 + gwsize = my_trainer.global_world_size() # global world size - per run grank = my_trainer.dist_grank() # global rank - assign per run lrank = my_trainer.dist_lrank() # local rank - assign per node else: diff --git a/pyproject.toml b/pyproject.toml index 5e93f3ec..c50e1e86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,11 +34,9 @@ dependencies = [ "submitit>=1.4.6", "typing-extensions==4.5.0", "typing_extensions==4.5.0", - "rich>=13.5.3", - "typer>=0.9.0", - "urllib3>=1.26.18", - "lightning>=2.0.0", - "torchmetrics>=1.2.0", + "urllib3>=2.0.5", + "deepspeed>=0.13.1", + "horovod[tensorflow,keras,pytorch]>=0.28.1", ] # dynamic = ["version", "description"] diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 8158fb79..b80f8e10 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -18,16 +18,15 @@ class TorchDistributedStrategy(DistributedStrategy): PyTorch models. """ @abc.abstractmethod - def init_backend(self, *args, **kwargs) -> None: + def init_backend(self) -> None: """Initializes the chosen distributed backend""" @abc.abstractmethod - def distribute_model(self, model: Any, device: Union[int, str]) -> Any: + def distribute_model(self, model: Any) -> Any: """Distributes a machine learning model. Args: model (Any): a generic ML model to be distributed. - device (Union[int, str]): device on which the model is run. Returns: Any: distributed model instance. @@ -56,19 +55,20 @@ def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: @abc.abstractmethod def dist_gwsize(self) -> int: - """Returns the total number of processes (global word size). + """Returns the total number of processes (global world size). Returns: - int: global word size. + int: global world size. """ @abc.abstractmethod def dist_lwsize(self) -> int: """Returns the number of local workers available on a node - (local word size). Usually it is equal to the number of available GPUs. + (local world size). + Usually it is equal to the number of available GPUs. Returns: - int: local word size. + int: local world size. """ @abc.abstractmethod @@ -88,6 +88,22 @@ def dist_lrank(self) -> int: int: local rank. """ + def is_main_worker(self) -> bool: + """Checks if local worker has global rank equal to zero. + + Returns: + bool: True if main worker. + """ + return self.dist_grank() == 0 + + def dist_device(self) -> str: + """Device used by local worker. + + Returns: + str: torch device in the form 'cuda:N'. + """ + return f"cuda:{self.dist_lrank()}" + @abc.abstractmethod def clean_up(self) -> None: """Cleans up resources allocated by distributed strategy.""" @@ -105,38 +121,44 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: class DDPDistributedStrategy(TorchDistributedStrategy): - """PyTorch DDP distributed strategy class.""" + """PyTorch DDP distributed strategy class. + + Args: + backend (str): Name of the communication backend to employ. + """ + + backend: str - def init_backend(self, backend: str, *args, **kwargs) -> None: + def __init__(self, backend: str) -> None: + super().__init__() + self.backend = backend + + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package. - - Args: - backend (str): Name of the communication backend to employ. """ if torch.cuda.is_available(): - dist.init_process_group(backend=backend) + dist.init_process_group(backend=self.backend) - def distribute_model( - self, model: nn.Module, device: Union[int, str] - ) -> nn.Module: + def distribute_model(self, model: nn.Module) -> nn.Module: """Achieves data parallelism by synchronizing the gradients across each model replica located in each available computing device. Args: model (nn.Module): ML model to be distributed. - device (Union[int, str]): Compute device to be used. Returns: nn.Module: Distributed model replicas across all devices. that are to be synchronized. """ if torch.cuda.is_available(): + # device = self.dist_lrank() + model = model.to(self.dist_device()) dist_model = torch.nn.parallel.DistributedDataParallel( model, - device_ids=[device], - output_device=device + device_ids=[self.dist_device()], + output_device=self.dist_device() ) else: dist_model = model @@ -159,7 +181,7 @@ def broadcast_params( def distribute_optimizer( self, optimizer: optim.Optimizer, - model: nn.Module + model: nn.Module = None ) -> optim.Optimizer: """Returns the optimizer from argument. @@ -228,27 +250,509 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: class DSDistributedStrategy(TorchDistributedStrategy): - """DeepSpeed distributed strategy class.""" + """DeepSpeed distributed strategy class. + + Args: + backend (str): Name of the communication backend to employ. + config (Union[dict, Path, str]): DeepSpeed config. Either a + dictionary or a path to a JSON file. + """ config: Dict = None + backend: str - def init_backend( + def __init__( self, backend: str, - ds_config: Union[Dict, Path, str], - *args, **kwargs + config: Union[Dict, Path, str] ) -> None: + super().__init__() + self.backend = backend + self._load_config(config) + + def _load_config(self, ds_config): + if isinstance(ds_config, (str, Path)): + with open(ds_config) as fp: + self.config = json.load(fp) + elif isinstance(ds_config, dict): + self.config = ds_config + else: + raise ValueError("ds_config is not a dictionary not a path.") + + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package. + """ + # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization + deepspeed.init_distributed(dist_backend=self.backend) + + def distribute_model(self, model: nn.Module) -> nn.Module: + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. Args: - backend (str): Name of the communication backend to employ. - ds_config (Union[dict, Path, str]): DeepSpeed config. Either a - dictionary or a path to a JSON file. + model (nn.Module): ML model to be distributed. + + Returns: + nn.Module: Distributed model replicas across all devices + that are to be synchronized. """ # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - self._load_config(ds_config) - deepspeed.init_distributed(dist_backend=backend) + distrib_model, __, __, __ = deepspeed.initialize( + model=model, + model_parameters=model.parameters(), + dist_init_required=True, + config=self.config + ) + return distrib_model + + def broadcast_params( + self, model: nn.Module, optimizer: optim.Optimizer + ) -> None: + """Only applicable for Horovod. Does nothing. + + Args: + model (nn.Module): ML model. + optimizer (optim.Optimizer): optimizer. + """ + pass + + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module = None + ) -> optim.Optimizer: + """Returns the optimizer from argument. + + Args: + optimizer (optim.Optimizer): torch optimizer. + model (nn.Module): torch neural network. + + Returns: + optim.Optimizer: distributed optimizer. + """ + return optimizer + + def dist_gwsize(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + return dist.get_world_size() + + def dist_lwsize(self) -> int: + """Returns the local number of workers available per node, + which is usually the number of GPUs available. + + Returns: + int: local world size. + """ + return torch.cuda.device_count() + + def dist_grank(self) -> int: + """Returns the global rank of the current process, where + rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + return dist.get_rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + return dist.get_rank() % torch.cuda.device_count() + + def clean_up(self) -> None: + """Destroys the current process group.""" + deepspeed.sys.exit() + + def par_allgather_obj(self, obj: Any) -> list[Any]: + """Gathers any object from the whole group + in a list (to all workers). + + Args: + obj (Any): Object to gather from all workers. + + Returns: + List[Any]: List of gathered objects. + """ + res = [None] * self.dist_gwsize() + dist.all_gather_object(res, obj) + return res + + +class HVDDistributedStrategy(TorchDistributedStrategy): + """Horovod distributed strategy class.""" + + def init_backend(self) -> None: + """Initializes the Horovod distributed backend.""" + hvd.init() + + def distribute_model(self, model: nn.Module) -> nn.Module: + """Only applicable for DDP and DeepSpeed. + For Horovod, returns the same model passed as argument. + + Args: + model (nn.Module): ML model to be distributed. + + Returns: + nn.Module: ML model passed in the argument. + """ + return model + + def broadcast_params( + self, model: nn.Module, optimizer: optim.Optimizer + ) -> None: + """Broadcasts variables from root rank to all other processes. + + Args: + model (nn.Module): ML model that is to be broadcasted + across processes. + optimizer (optim.Optimizer): Optimizer that is to be broadcasted + across processes. + """ + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=-0) + + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module + ) -> optim.Optimizer: + """Constructs a DistributedOptimizer, for computing single-process + gradient values and applying gradient updates after the gradient values + have been combined across all the Horovod ranks. + + Args: + optimizer (optim.Optimizer): Optimizer to be distributed. + model (nn.Module): ML model to be trained. + + Returns: + optim.Optimizer: Distributed optimizer across all ranks. + """ + distOptimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + op=hvd.Average + ) + return distOptimizer + + def dist_gwsize(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + return hvd.size() + + def dist_lwsize(self) -> int: + """Returns the local number of workers available per node, + which is usually the number of GPUs available. + + Returns: + int: local world size. + """ + return hvd.local_size() + + def dist_grank(self) -> int: + """Returns the global rank of the current process, where + rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + return hvd.rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + return hvd.local_rank() + + def clean_up(self) -> None: + """Shuts Horovod down.""" + hvd.shutdown() + + def par_allgather_obj(self, obj: Any) -> list[Any]: + """Gathers scalar objects across all workers to a + list with size(#worker), uses horovod communicator + + Args: + obj (Any): object in a worker. + + Returns: + list: gathered list with size(#worker). + """ + return hvd.allgather_object(obj) + + +################################################################ + +class TorchDistributedStrategy2(DistributedStrategy): + """Abstract class to define the distributed backend methods for + PyTorch models. + """ + @abc.abstractmethod + def init(self) -> None: + """Initializes the chosen distributed backend""" + + @abc.abstractmethod + def distribute(self, model: Any, optimizer: Any) -> Any: + """Distributes a machine learning model and its optimizer. + + Args: + model (Any): a generic ML model to be distributed. + device (Union[int, str]): device on which the model is run. + + Returns: + Any: distributed model instance. + """ + + @abc.abstractmethod + def broadcast_params(self, model: Any, optimizer: Any) -> None: + """Broadcasts variables from root rank to all other processes/ + + Args: + model (Any): distributed model. + optimizer (Any): optimizer. + """ + + @abc.abstractmethod + def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: + """Distribute optimizer. + + Args: + optimizer (Any): optimizer. + model (Any): distributed model. + + Returns: + Any: distributed optimizer. + """ + + @abc.abstractmethod + def dist_gwsize(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + + @abc.abstractmethod + def dist_lwsize(self) -> int: + """Returns the number of local workers available on a node + (local world size). + Usually it is equal to the number of available GPUs. + + Returns: + int: local world size. + """ + + @abc.abstractmethod + def dist_grank(self) -> int: + """Returns the global rank of the current process. + Rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + + @abc.abstractmethod + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + + def is_main_worker(self) -> bool: + """Checks if local worker has global rank equal to zero. + + Returns: + bool: True if main worker. + """ + return self.dist_grank() == 0 + + def dist_device(self) -> str: + """Device used by local worker. + + Returns: + str: torch device in the form 'cuda:N'. + """ + return f"cuda:{self.dist_lrank()}" + + @abc.abstractmethod + def clean_up(self) -> None: + """Cleans up resources allocated by distributed strategy.""" + + @abc.abstractmethod + def par_allgather_obj(self, obj: Any) -> List[Any]: + """Gathers any object from the whole group in a list (to all workers). + + Args: + obj (Any): object to gather from all workers. + + Returns: + List[Any]: list of objects gathered from all workers. + """ + + +class DDPDistributedStrategy2(TorchDistributedStrategy2): + """PyTorch DDP distributed strategy class. + + Args: + backend (str): Name of the communication backend to employ. + """ + + backend: str + + def __init__(self, backend: str) -> None: + super().__init__() + self.backend = backend + + def init(self) -> None: + """Initializes the distributed process group and the distributed + package. + """ + if torch.cuda.is_available(): + dist.init_process_group(backend=self.backend) + + def distribute(self, model: nn.Module) -> nn.Module: + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. + + Args: + model (nn.Module): ML model to be distributed. + + Returns: + nn.Module: Distributed model replicas across all devices. + that are to be synchronized. + """ + if torch.cuda.is_available(): + # device = self.dist_lrank() + model = model.to(self.dist_device()) + dist_model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[self.dist_device()], + output_device=self.dist_device() + ) + else: + dist_model = model + + return dist_model + + def broadcast_params( + self, + model: nn.Module, + optimizer: optim.Optimizer + ) -> None: + """Do nothing. Only applicable for Horovod. + + Args: + model (nn.Module): ML model + optimizer (optim.Optimizer): Optimizer + """ + pass + + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module = None + ) -> optim.Optimizer: + """Returns the optimizer from argument. + + Args: + optimizer (optim.Optimizer): optimizer. + model (nn.Module): ML model. Unused here. + + Returns: + optim.Optimizer: Distributed optimizer. + """ + return optimizer + + def dist_gwsize(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + return dist.get_world_size() + + def dist_lwsize(self) -> int: + """Returns the local number of workers available per node, + which is usually the number of GPUs available. + + Returns: + int: local world size. + """ + return torch.cuda.device_count() + + def dist_grank(self) -> int: + """Returns the global rank of the current process, where + rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + return dist.get_rank() + + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + return dist.get_rank() % torch.cuda.device_count() + + def clean_up(self) -> None: + """Destroys the current process group.""" + if torch.cuda.is_available(): + dist.barrier() + dist.destroy_process_group() + + def par_allgather_obj(self, obj: Any) -> List[Any]: + """Gathers any object from the whole group + in a list (to all workers). + + Args: + obj (Any): Object to gather from all workers. + + Returns: + List[Any]: List of gathered objects. + """ + res = [None] * self.dist_gwsize() + dist.all_gather_object(res, obj) + return res + + +class DSDistributedStrategy2(TorchDistributedStrategy2): + """DeepSpeed distributed strategy class. + + Args: + backend (str): Name of the communication backend to employ. + config (Union[dict, Path, str]): DeepSpeed config. Either a + dictionary or a path to a JSON file. + """ + + config: Dict = None + backend: str + + def __init__( + self, + backend: str, + config: Union[Dict, Path, str] + ) -> None: + super().__init__() + self.backend = backend + self._load_config(config) def _load_config(self, ds_config): if isinstance(ds_config, (str, Path)): @@ -259,16 +763,20 @@ def _load_config(self, ds_config): else: raise ValueError("ds_config is not a dictionary not a path.") - def distribute_model( - self, model: nn.Module, device: Union[int, str] - ) -> nn.Module: + def init(self) -> None: + """Initializes the distributed process group and the distributed + package. + """ + # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization + deepspeed.init_distributed(dist_backend=self.backend) + + def distribute(self, model: nn.Module) -> nn.Module: """Achieves data parallelism by synchronizing the gradients across each model replica located in each available computing device. Args: model (nn.Module): ML model to be distributed. - device (Union[int, str]): Compute device to be used. Returns: nn.Module: Distributed model replicas across all devices @@ -297,7 +805,7 @@ def broadcast_params( def distribute_optimizer( self, optimizer: optim.Optimizer, - model: nn.Module + model: nn.Module = None ) -> optim.Optimizer: """Returns the optimizer from argument. @@ -363,22 +871,19 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy(TorchDistributedStrategy): +class HVDDistributedStrategy2(TorchDistributedStrategy2): """Horovod distributed strategy class.""" - def init_backend(self, *args, **kwargs) -> None: + def init(self) -> None: """Initializes the Horovod distributed backend.""" hvd.init() - def distribute_model( - self, model: nn.Module, device: Union[int, str] - ) -> nn.Module: + def distribute(self, model: nn.Module) -> nn.Module: """Only applicable for DDP and DeepSpeed. For Horovod, returns the same model passed as argument. Args: model (nn.Module): ML model to be distributed. - device (Union[int, str]): Compute device to be used. Returns: nn.Module: ML model passed in the argument. diff --git a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py new file mode 100644 index 00000000..35c7a4ab --- /dev/null +++ b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py @@ -0,0 +1,149 @@ +""" +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably. +Depending on the strategy you choose, you need to run this script with +different ad-hoc commands: + +Torch DistributedDataParallel (DDP). Launch with torchrun:: +>>> micromamba run -p ../../.venv-pytorch/ torchrun \ + --rdzv_backend=c10d \ + --rdzv_endpoint=localhost:0 \ + --nnodes=1 \ + --nproc_per_node=4 \ + ex0_multiple_torch_strategies.py -s ddp + + +Using a SLURM jobscript: + +1. Torch DistributedDataParallel (DDP): +set STRATEGY="ddp" in ``torchrun ex0_multiple_torch_strategies.sh`` +2. Horovod: +set STRATEGY="horovod" in ``torchrun ex0_multiple_torch_strategies.sh`` +3. DeepSpeed: +set STRATEGY="deepspeed" in ``torchrun ex0_multiple_torch_strategies.sh`` + +Execute ``torchrun ex0_multiple_torch_strategies.sh`` in a slurm environment: + +>>> sbatch ex0_multiple_torch_strategies.sh + + +""" +from typing import Any +import os +import argparse + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset, DistributedSampler + +from itwinai.torch.distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + HVDDistributedStrategy, + DSDistributedStrategy +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--strategy", "-s", type=str, + choices=['ddp', 'horovod', 'deepspeed'], + default='ddp' + ) + parser.add_argument( + "--shuffle_dataloader", + action=argparse.BooleanOptionalAction + ) + return parser.parse_args() + + +class UniformRndDataset(Dataset): + """Dummy torch dataset.""" + + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn( + foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy +) -> int: + """Dummy training function. This emulates custom code developed + by some use case. + """ + strategy.init_backend() + print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim, model) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + # Distributed dataloader + train_loader = DataLoader( + train_set, batch_size=10, num_workers=1, + sampler=DistributedSampler( + train_set, + num_replicas=strategy.dist_gwsize(), + rank=strategy.dist_grank(), + shuffle=args.shuffle_dataloader + ) + ) + + # Device allocated for this worker + device = strategy.dist_device() + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{device}") + x = x.to(device) + y = y.to(device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.clean_up() + return 123 + + +if __name__ == "__main__": + + args = parse_args() + + # Instantiate Strategy + if args.strategy == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPDistributedStrategy(backend='nccl') + elif args.strategy == 'horovod': + strategy = HVDDistributedStrategy() + elif args.strategy == 'deepspeed': + strategy = DSDistributedStrategy(...) + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + # Launch distributed training + trainer_entrypoint_fn("foobar", args, strategy) diff --git a/tutorials/distributed-ml/ex0_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0_multiple_torch_strategies.py new file mode 100644 index 00000000..35c7a4ab --- /dev/null +++ b/tutorials/distributed-ml/ex0_multiple_torch_strategies.py @@ -0,0 +1,149 @@ +""" +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably. +Depending on the strategy you choose, you need to run this script with +different ad-hoc commands: + +Torch DistributedDataParallel (DDP). Launch with torchrun:: +>>> micromamba run -p ../../.venv-pytorch/ torchrun \ + --rdzv_backend=c10d \ + --rdzv_endpoint=localhost:0 \ + --nnodes=1 \ + --nproc_per_node=4 \ + ex0_multiple_torch_strategies.py -s ddp + + +Using a SLURM jobscript: + +1. Torch DistributedDataParallel (DDP): +set STRATEGY="ddp" in ``torchrun ex0_multiple_torch_strategies.sh`` +2. Horovod: +set STRATEGY="horovod" in ``torchrun ex0_multiple_torch_strategies.sh`` +3. DeepSpeed: +set STRATEGY="deepspeed" in ``torchrun ex0_multiple_torch_strategies.sh`` + +Execute ``torchrun ex0_multiple_torch_strategies.sh`` in a slurm environment: + +>>> sbatch ex0_multiple_torch_strategies.sh + + +""" +from typing import Any +import os +import argparse + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset, DistributedSampler + +from itwinai.torch.distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + HVDDistributedStrategy, + DSDistributedStrategy +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--strategy", "-s", type=str, + choices=['ddp', 'horovod', 'deepspeed'], + default='ddp' + ) + parser.add_argument( + "--shuffle_dataloader", + action=argparse.BooleanOptionalAction + ) + return parser.parse_args() + + +class UniformRndDataset(Dataset): + """Dummy torch dataset.""" + + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn( + foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy +) -> int: + """Dummy training function. This emulates custom code developed + by some use case. + """ + strategy.init_backend() + print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + model: nn.Module = strategy.distribute_model(model) + optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim, model) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + # Distributed dataloader + train_loader = DataLoader( + train_set, batch_size=10, num_workers=1, + sampler=DistributedSampler( + train_set, + num_replicas=strategy.dist_gwsize(), + rank=strategy.dist_grank(), + shuffle=args.shuffle_dataloader + ) + ) + + # Device allocated for this worker + device = strategy.dist_device() + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{device}") + x = x.to(device) + y = y.to(device) + + optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) + loss.backward() + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + + strategy.clean_up() + return 123 + + +if __name__ == "__main__": + + args = parse_args() + + # Instantiate Strategy + if args.strategy == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPDistributedStrategy(backend='nccl') + elif args.strategy == 'horovod': + strategy = HVDDistributedStrategy() + elif args.strategy == 'deepspeed': + strategy = DSDistributedStrategy(...) + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + # Launch distributed training + trainer_entrypoint_fn("foobar", args, strategy) diff --git a/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh b/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh new file mode 100644 index 00000000..445ee3d6 --- /dev/null +++ b/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TorchTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# parallelization STRATEGY (ddp, horovod, deepspeed) +STRATEGY='ddp' + +# parameters +debug=false # do debug +bs=32 # batch-size +epochs=10 # epochs +lr=0.01 # learning rate + + +# set modules +ml --force purge + +ml Stages/2022 NVHPC/22.1 ParaStationMPI/5.5.0-1-mt NCCL/2.11.4-CUDA-11.5 cuDNN/8.3.1.22-CUDA-11.5 +ml Python/3.9.6 CMake HDF5 PnetCDF libaio/0.3.112 mpi-settings/CUDA + +# set env +source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/envAI_hdfml/bin/activate + +# sleep a sec +sleep 1 + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +#launch +# srun python train.py --STRATEGY hvd --n_workers_per_node $SLURM_GPUS_PER_NODE + +if [[ $STRATEGY == *"horovod"* ]]; +then + echo "NOT IMPLEMENTED" + # COMMAND="horovod_trainer.py" + + # EXEC="$COMMAND \ + # --batch-size $bs \ + # --epochs $epochs \ + # --lr $lr \ + # --data-dir $dataDir" + + # # MB: how local worker processes are spawned? + # srun --cpu-bind=none python3 -u $EXEC + +elif [[ $STRATEGY == *"ddp"* ]]; +then + COMMAND="ex0_multiple_torch_strategies.py --strategy ddp" + + EXEC="$COMMAND" + # --batch-size $bs \ + # --epochs $epochs \ + # --lr $lr \ + # --nworker $SLURM_CPUS_PER_TASK \ + # --data-dir $dataDir" + + srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $EXEC" + +else + echo "NOT IMPLEMENTED" + # COMMAND="DS_trainer.py" + + # EXEC=$COMMAND" --batch-size $bs + # --epochs $epochs + # --nworker $SLURM_CPUS_PER_TASK + # --data-dir $dataDir" + + # #### do not change this part + # # create node-list + # sysN=$(eval "scontrol show hostnames") + # for i in $sysN; do + # x+=\"$i\":[$CUDA_VISIBLE_DEVICES], + # done + # WID=`echo {${x::-1}} | base64 -w 0` + + # # modify config file with parameters + # sed -i "2s|.*| \"train_micro_batch_size_per_gpu\": ${bs},|" DS_config.json + # sed -i "7s|.*| \"lr\": ${lr}|" DS_config.json + # #### + + # # launch + # srun python -m deepspeed.launcher.launch \ + # --node_rank $SLURM_PROCID \ + # --master_addr ${SLURMD_NODENAME}i \ + # --master_port 29500 \ + # --world_info $WID \ + # $EXEC --deepspeed_mpi --deepspeed_config DS_config.json + +fi From 4137666e24feeccff6360e7d677864a4926f022c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 9 Feb 2024 15:34:00 +0100 Subject: [PATCH 022/171] UPDATE DDP Dp distrib strategy --- experimental/trainer/general_trainer.py | 12 +- src/itwinai/torch/distributed.py | 493 ++++++++++++------ .../ex0.1_multiple_torch_strategies.py | 55 +- .../ex0_multiple_torch_strategies.py | 19 +- 4 files changed, 389 insertions(+), 190 deletions(-) diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index ae9d7743..e22c871d 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -22,9 +22,9 @@ from torchvision import datasets, transforms from itwinai.torch.distributed import ( - DDPDistributedStrategy, - DSDistributedStrategy, - HVDDistributedStrategy + DDPDistributedStrategy_old, + DSDistributedStrategy_old, + HVDDistributedStrategy_old ) # parsed settings @@ -210,13 +210,13 @@ def main(): # Strategy for distributed training if args.strategy == 'DDP': - my_trainer = DDPDistributedStrategy() + my_trainer = DDPDistributedStrategy_old() elif args.strategy == 'DS': - my_trainer = DSDistributedStrategy() + my_trainer = DSDistributedStrategy_old() elif args.strategy == 'HVD': - my_trainer = HVDDistributedStrategy() + my_trainer = HVDDistributedStrategy_old() # limit # of CPU threads to be used per worker torch.set_num_threads(1) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index b80f8e10..d82105ee 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1,19 +1,26 @@ import abc -from typing import Any, Union, List, Dict +from typing import Any, Union, List, Dict, Optional, Callable, Tuple from pathlib import Path import json +from pydantic import BaseModel + import deepspeed import torch import torch.distributed as dist import horovod.torch as hvd import torch.nn as nn +# from torch.nn.modules import Module import torch.optim as optim +from torch.optim.lr_scheduler import _LRScheduler as LRScheduler +from torch.optim.optimizer import Optimizer +from torch.cuda import amp +from torch import autocast from ..distributed import DistributedStrategy -class TorchDistributedStrategy(DistributedStrategy): +class TorchDistributedStrategy_old(DistributedStrategy): """Abstract class to define the distributed backend methods for PyTorch models. """ @@ -120,7 +127,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: """ -class DDPDistributedStrategy(TorchDistributedStrategy): +class DDPDistributedStrategy_old(TorchDistributedStrategy_old): """PyTorch DDP distributed strategy class. Args: @@ -249,7 +256,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: return res -class DSDistributedStrategy(TorchDistributedStrategy): +class DSDistributedStrategy_old(TorchDistributedStrategy_old): """DeepSpeed distributed strategy class. Args: @@ -387,7 +394,7 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy(TorchDistributedStrategy): +class HVDDistributedStrategy_old(TorchDistributedStrategy_old): """Horovod distributed strategy class.""" def init_backend(self) -> None: @@ -496,46 +503,286 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: ################################################################ -class TorchDistributedStrategy2(DistributedStrategy): - """Abstract class to define the distributed backend methods for - PyTorch models. - """ +class OptimizerConfig: + def __init__(self, optim_class, **kwargs) -> None: + self.optim_class = optim_class + self.kwargs = kwargs + + def to_optim(self, parameters) -> optim.Optimizer: + return self.optim_class(parameters, **self.kwargs) + + +class LRSchedulerConfig: + def __init__(self, scheduler_class, **kwargs) -> None: + self.scheduler_class = scheduler_class + self.kwargs = kwargs + + def to_scheduler(self, optim) -> LRScheduler: + return self.scheduler_class(optim, **self.kwargs) + + +class ModelEngineConfig(BaseModel): + mixed_precision: bool = False + + +class ModelEngine(abc.ABC): + """Wrapper around distributed model""" + + model: nn.Module + _model_parameters: Any + optimizer: optim.Optimizer + lr_scheduler: LRScheduler + # config: ModelEngineConfig + mixed_precision: bool = False + grad_scaler: amp.GradScaler = None + + def __init__( + self, + model: nn.Module, + # model_parameters: Any, + optimizer: Union[optim.Optimizer, OptimizerConfig], + lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, + mixed_precision: bool = False + # config: Optional[ModelEngineConfig] = None + ) -> None: + super().__init__() + self.model = model + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + # self._model_parameters = model_parameters + # if isinstance(optimizer, OptimizerConfig): + # self.optimizer = optimizer.to_optim(model_parameters) + # else: + # self.optimizer = optimizer + + # if isinstance(lr_scheduler, LRSchedulerConfig): + # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) + # else: + # self.lr_scheduler = lr_scheduler + + # if not config: + # self.config = ModelEngineConfig() + self.mixed_precision = mixed_precision + if mixed_precision: + self.grad_scaler = amp.GradScaler() + + def __call__(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + # Wrapper of self.forward() + return self.forward(*args, **kwds) + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + return self.model(*args, **kwds) + + def train(self, mode: bool = True) -> nn.Module: + """Set model in training mode.""" + self.model.train(mode=mode) + return self.model + + def eval(self) -> nn.Module: + """Set model in inference mode.""" + self.model.eval() + return self.model + + def to(self, device) -> nn.Module: + """Move model to specified device.""" + self.model.to(device) + return self.model + @abc.abstractmethod - def init(self) -> None: - """Initializes the chosen distributed backend""" + def zero_grad(): + """Set gradients to zero for the optimizer.""" @abc.abstractmethod - def distribute(self, model: Any, optimizer: Any) -> Any: - """Distributes a machine learning model and its optimizer. + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. Args: - model (Any): a generic ML model to be distributed. - device (Union[int, str]): device on which the model is run. + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. Returns: - Any: distributed model instance. + torch.Tensor: computed loss. """ @abc.abstractmethod - def broadcast_params(self, model: Any, optimizer: Any) -> None: - """Broadcasts variables from root rank to all other processes/ + def optimizer_step(self): + """Perform optimizer step.""" - Args: - model (Any): distributed model. - optimizer (Any): optimizer. - """ + @abc.abstractmethod + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + # This should be incorporated in the optim step: + # https://deepspeed.readthedocs.io/en/latest/schedulers.html + # scheduler is updated automatically at each training step @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: - """Distribute optimizer. + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + + +class DDPModelEngine(ModelEngine): + """Model engine for torch DDP distributed strategy.""" + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) + + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. Args: - optimizer (Any): optimizer. - model (Any): distributed model. + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. Returns: - Any: distributed optimizer. - """ + torch.Tensor: computed loss. + """ + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) + + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) + else: + loss = loss_fn(*loss_args) + loss.backward() + return loss + + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) + + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() + + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError + + +class DSModelEngine(ModelEngine): + """Model engine for DeeSpeed distributed strategy.""" + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) + + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. + + Args: + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. + + Returns: + torch.Tensor: computed loss. + """ + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) + + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) + else: + loss = loss_fn(*loss_args) + loss.backward() + return loss + + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) + + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() + + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError + + +class TorchDistributedStrategy(DistributedStrategy): + """Abstract class to define the distributed backend methods for + PyTorch models. + """ + @abc.abstractmethod + def init(self) -> None: + """Initializes the chosen distributed backend""" + + # @abc.abstractmethod + # def distributed_engine( + # self, model: nn.Module, optimizer: Optimizer, + # lr_scheduler: Optional[LRScheduler] = None + # ) -> ModelEngine: + # """Build a distributed model engine.""" + + @abc.abstractmethod + def distributed( + self, model: nn.Module, optimizer: Optimizer, + lr_scheduler: Optional[LRScheduler] = None + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" @abc.abstractmethod def dist_gwsize(self) -> int: @@ -604,7 +851,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: """ -class DDPDistributedStrategy2(TorchDistributedStrategy2): +class DDPDistributedStrategy(TorchDistributedStrategy): """PyTorch DDP distributed strategy class. Args: @@ -612,6 +859,7 @@ class DDPDistributedStrategy2(TorchDistributedStrategy2): """ backend: str + model: DDPModelEngine def __init__(self, backend: str) -> None: super().__init__() @@ -621,21 +869,42 @@ def init(self) -> None: """Initializes the distributed process group and the distributed package. """ - if torch.cuda.is_available(): + if torch.cuda.is_available() and torch.cuda.device_count() > 1: dist.init_process_group(backend=self.backend) - - def distribute(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: Distributed model replicas across all devices. - that are to be synchronized. - """ + else: + print("WARNING: trying to run distributed on insufficient" + " resources. Skipping distributed process group setup.") + + # def distributed_engine( + # self, model: nn.Module, optimizer: Optimizer, + # lr_scheduler: Optional[LRScheduler] = None, + # mixed_precision: bool = False + # ) -> ModelEngine: + # """Build a distributed model engine.""" + # if torch.cuda.is_available(): + # # device = self.dist_lrank() + # model = model.to(self.dist_device()) + # dist_model = torch.nn.parallel.DistributedDataParallel( + # model, + # device_ids=[self.dist_device()], + # output_device=self.dist_device() + # ) + # else: + # dist_model = model + + # model_engine = DDPModelEngine( + # dist_model, optimizer, lr_scheduler, + # mixed_precision=mixed_precision + # ) + + # return model_engine + + def distributed( + self, model: nn.Module, optimizer: Optimizer, + lr_scheduler: Optional[LRScheduler] = None, + **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" if torch.cuda.is_available(): # device = self.dist_lrank() model = model.to(self.dist_device()) @@ -647,36 +916,7 @@ def distribute(self, model: nn.Module) -> nn.Module: else: dist_model = model - return dist_model - - def broadcast_params( - self, - model: nn.Module, - optimizer: optim.Optimizer - ) -> None: - """Do nothing. Only applicable for Horovod. - - Args: - model (nn.Module): ML model - optimizer (optim.Optimizer): Optimizer - """ - pass - - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. - - Args: - optimizer (optim.Optimizer): optimizer. - model (nn.Module): ML model. Unused here. - - Returns: - optim.Optimizer: Distributed optimizer. - """ - return optimizer + return dist_model, optimizer, lr_scheduler def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -733,7 +973,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: return res -class DSDistributedStrategy2(TorchDistributedStrategy2): +class DSDistributedStrategy(TorchDistributedStrategy): """DeepSpeed distributed strategy class. Args: @@ -770,53 +1010,23 @@ def init(self) -> None: # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) - def distribute(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: Distributed model replicas across all devices - that are to be synchronized. - """ + def distributed( + self, model: nn.Module, optimizer: Optional[Optimizer] = None, + lr_scheduler: Optional[LRScheduler] = None, + model_parameters: Optional[Any] = None, **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - distrib_model, __, __, __ = deepspeed.initialize( + # To prioritize optim in the config, you need to pass optim=None + distrib_model, optimizer, _, lr_scheduler = deepspeed.initialize( model=model, - model_parameters=model.parameters(), + model_parameters=model_parameters, + optimizer=optimizer, + lr_scheduler=lr_scheduler, dist_init_required=True, config=self.config ) - return distrib_model - - def broadcast_params( - self, model: nn.Module, optimizer: optim.Optimizer - ) -> None: - """Only applicable for Horovod. Does nothing. - - Args: - model (nn.Module): ML model. - optimizer (optim.Optimizer): optimizer. - """ - pass - - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. - - Args: - optimizer (optim.Optimizer): torch optimizer. - model (nn.Module): torch neural network. - - Returns: - optim.Optimizer: distributed optimizer. - """ - return optimizer + return distrib_model, optimizer, lr_scheduler def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -871,26 +1081,28 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy2(TorchDistributedStrategy2): +class HVDDistributedStrategy(TorchDistributedStrategy): """Horovod distributed strategy class.""" def init(self) -> None: """Initializes the Horovod distributed backend.""" hvd.init() - def distribute(self, model: nn.Module) -> nn.Module: - """Only applicable for DDP and DeepSpeed. - For Horovod, returns the same model passed as argument. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: ML model passed in the argument. - """ - return model + def distributed( + self, model: nn.Module, optimizer: Optional[Optimizer] = None, + lr_scheduler: Optional[LRScheduler] = None, + **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" + self._broadcast_params(model, optimizer) + distOptimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + op=hvd.Average + ) + return model, distOptimizer, lr_scheduler - def broadcast_params( + def _broadcast_params( self, model: nn.Module, optimizer: optim.Optimizer ) -> None: """Broadcasts variables from root rank to all other processes. @@ -904,29 +1116,6 @@ def broadcast_params( hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module - ) -> optim.Optimizer: - """Constructs a DistributedOptimizer, for computing single-process - gradient values and applying gradient updates after the gradient values - have been combined across all the Horovod ranks. - - Args: - optimizer (optim.Optimizer): Optimizer to be distributed. - model (nn.Module): ML model to be trained. - - Returns: - optim.Optimizer: Distributed optimizer across all ranks. - """ - distOptimizer = hvd.DistributedOptimizer( - optimizer, - named_parameters=model.named_parameters(), - op=hvd.Average - ) - return distOptimizer - def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). diff --git a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py index 35c7a4ab..38f2e6b9 100644 --- a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py +++ b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py @@ -3,29 +3,17 @@ Depending on the strategy you choose, you need to run this script with different ad-hoc commands: -Torch DistributedDataParallel (DDP). Launch with torchrun:: +Torch DistributedDataParallel (DDP). Launch with torchrun: >>> micromamba run -p ../../.venv-pytorch/ torchrun \ --rdzv_backend=c10d \ --rdzv_endpoint=localhost:0 \ --nnodes=1 \ --nproc_per_node=4 \ - ex0_multiple_torch_strategies.py -s ddp - - -Using a SLURM jobscript: - -1. Torch DistributedDataParallel (DDP): -set STRATEGY="ddp" in ``torchrun ex0_multiple_torch_strategies.sh`` -2. Horovod: -set STRATEGY="horovod" in ``torchrun ex0_multiple_torch_strategies.sh`` -3. DeepSpeed: -set STRATEGY="deepspeed" in ``torchrun ex0_multiple_torch_strategies.sh`` - -Execute ``torchrun ex0_multiple_torch_strategies.sh`` in a slurm environment: - ->>> sbatch ex0_multiple_torch_strategies.sh - + ex0.1_multiple_torch_strategies.py -s ddp +DeepSpeed. Launch with deepspeed: +>>> micromamba run -p ../../.venv-pytorch/ deepspeed \ + ex0.1_multiple_torch_strategies.py -s deepspeed --deepspeed """ from typing import Any import os @@ -39,7 +27,8 @@ TorchDistributedStrategy, DDPDistributedStrategy, HVDDistributedStrategy, - DSDistributedStrategy + DSDistributedStrategy, + # ModelEngine ) @@ -54,7 +43,16 @@ def parse_args() -> argparse.Namespace: "--shuffle_dataloader", action=argparse.BooleanOptionalAction ) - return parser.parse_args() + + # DeepSpeed + import deepspeed + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed + + return args class UniformRndDataset(Dataset): @@ -79,7 +77,7 @@ def trainer_entrypoint_fn( """Dummy training function. This emulates custom code developed by some use case. """ - strategy.init_backend() + strategy.init() print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") @@ -88,8 +86,10 @@ def trainer_entrypoint_fn( optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = nn.MSELoss() # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim, model) + # model_engine: ModelEngine = strategy.distributed(model, optim) + model, optim, lr_sched = strategy.distributed( + model, optim, lr_scheduler=None + ) # Data train_set = UniformRndDataset(x_size=3, y_size=4) @@ -114,14 +114,21 @@ def trainer_entrypoint_fn( y = y.to(device) optim.zero_grad() + y_pred = model(x) + loss = loss_fn(y_pred, y) loss.backward() + optim.step() if strategy.is_main_worker(): print(f"Loss [epoch={epoch}]: {loss.item()}") + # Update scheduler + if lr_sched: + lr_sched.step() + strategy.clean_up() return 123 @@ -140,7 +147,9 @@ def trainer_entrypoint_fn( elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(...) + strategy = DSDistributedStrategy( + backend='nccl', config=dict(train_batch_size=4) + ) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") diff --git a/tutorials/distributed-ml/ex0_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0_multiple_torch_strategies.py index 35c7a4ab..0f7f1e17 100644 --- a/tutorials/distributed-ml/ex0_multiple_torch_strategies.py +++ b/tutorials/distributed-ml/ex0_multiple_torch_strategies.py @@ -3,7 +3,7 @@ Depending on the strategy you choose, you need to run this script with different ad-hoc commands: -Torch DistributedDataParallel (DDP). Launch with torchrun:: +Torch DistributedDataParallel (DDP). Launch with torchrun: >>> micromamba run -p ../../.venv-pytorch/ torchrun \ --rdzv_backend=c10d \ --rdzv_endpoint=localhost:0 \ @@ -36,10 +36,10 @@ from torch.utils.data import DataLoader, Dataset, DistributedSampler from itwinai.torch.distributed import ( - TorchDistributedStrategy, - DDPDistributedStrategy, - HVDDistributedStrategy, - DSDistributedStrategy + TorchDistributedStrategy_old, + DDPDistributedStrategy_old, + HVDDistributedStrategy_old, + DSDistributedStrategy_old ) @@ -74,7 +74,8 @@ def __getitem__(self, index): def trainer_entrypoint_fn( - foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy + foo: Any, args: argparse.Namespace, + strategy: TorchDistributedStrategy_old ) -> int: """Dummy training function. This emulates custom code developed by some use case. @@ -136,11 +137,11 @@ def trainer_entrypoint_fn( or not torch.cuda.device_count() > 1): raise RuntimeError('Resources unavailable') - strategy = DDPDistributedStrategy(backend='nccl') + strategy = DDPDistributedStrategy_old(backend='nccl') elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy() + strategy = HVDDistributedStrategy_old() elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(...) + strategy = DSDistributedStrategy_old(...) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") From 2a3d6ec4aeac4e60b2945723e91f5ed034bf5c9d Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 9 Feb 2024 16:29:04 +0100 Subject: [PATCH 023/171] UPDATE horovod strategy --- src/itwinai/torch/distributed.py | 6 ++++++ .../distributed-ml/ex0.1_multiple_torch_strategies.py | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index d82105ee..1a963b38 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1087,6 +1087,7 @@ class HVDDistributedStrategy(TorchDistributedStrategy): def init(self) -> None: """Initializes the Horovod distributed backend.""" hvd.init() + torch.cuda.set_device(hvd.local_rank()) def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, @@ -1094,7 +1095,12 @@ def distributed( **kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" + + model.to(self.dist_device()) self._broadcast_params(model, optimizer) + + # TODO: here you may need to scale the lr + distOptimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), diff --git a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py index 38f2e6b9..2d841ea7 100644 --- a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py +++ b/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py @@ -13,7 +13,11 @@ DeepSpeed. Launch with deepspeed: >>> micromamba run -p ../../.venv-pytorch/ deepspeed \ - ex0.1_multiple_torch_strategies.py -s deepspeed --deepspeed + ex0.1_multiple_torch_strategies.py -s deepspeed + +Horovod. Launch with horovodrun: +>>> micromamba run -p ../../.venv-pytorch/ horovodrun -np 4 \ + python ex0.1_multiple_torch_strategies.py -s horovod """ from typing import Any import os @@ -44,7 +48,7 @@ def parse_args() -> argparse.Namespace: action=argparse.BooleanOptionalAction ) - # DeepSpeed + # DeepSpeed: needs to be removed import deepspeed parser.add_argument('--local_rank', type=int, default=-1, help='local rank passed from distributed launcher') From f798c111d1b752cfcb778a17b5cd52e8c1a9fcd2 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 10 Feb 2024 18:18:22 +0100 Subject: [PATCH 024/171] UPDATE tutorial on torch distributed strategies --- .gitignore | 2 + Makefile | 6 +- env-files/torch/createEnvJSC.sh | 196 ++++++++++++++++++ pyproject.toml | 27 ++- .../ex0_multiple_torch_strategies.py | 150 -------------- .../ex0_multiple_torch_strategies.sh | 137 ------------ .../torch-ddp-deepspeed-horovod/0 | 0 .../torch-ddp-deepspeed-horovod/ddp_slurm.sh | 65 ++++++ .../deepspeed_slurm.sh | 57 +++++ .../torch-ddp-deepspeed-horovod/hvd_slurm.sh | 53 +++++ .../train.py} | 19 +- 11 files changed, 407 insertions(+), 305 deletions(-) create mode 100644 env-files/torch/createEnvJSC.sh delete mode 100644 tutorials/distributed-ml/ex0_multiple_torch_strategies.py delete mode 100644 tutorials/distributed-ml/ex0_multiple_torch_strategies.sh create mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 create mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh create mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh create mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh rename tutorials/distributed-ml/{ex0.1_multiple_torch_strategies.py => torch-ddp-deepspeed-horovod/train.py} (91%) diff --git a/.gitignore b/.gitignore index 022d77ab..0d55c2b1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ pl-training.yml *.tar.gz *.pth *.csv +*tar.gz # Use cases files MNIST @@ -22,6 +23,7 @@ exp_data/ # Custom envs .venv* +envAI_hdfml/ # Logs logs/ diff --git a/Makefile b/Makefile index 9e225b19..019fb452 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,11 @@ # Install PyTorch env (GPU support) torch-gpu: env-files/torch/pytorch-env-gpu.yml micromamba env create -p ./.venv-pytorch --file env-files/torch/pytorch-env-gpu.yml -y - micromamba run -p ./.venv-pytorch python -m pip install -e . + micromamba run -p ./.venv-pytorch python -m pip install -e .[distributed] + +# Install PyTorch env (GPU support) on Juelich Super Computer (tested on HDFML system) +torch-gpu-jsc: env-files/torch/createEnvJSC.sh + sh env-files/torch/createEnvJSC.sh # Install PyTorch env (CPU only) torch-cpu: env-files/torch/pytorch-env-cpu.yml diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh new file mode 100644 index 00000000..f414f6e0 --- /dev/null +++ b/env-files/torch/createEnvJSC.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# -*- coding: utf-8 -*- +# author: EI, RS, Matteo Bunino + +# set dir +cDir=$PWD + +# environmental variables +mkdir -p tmp +export TMPDIR=${cDir}/tmp # set tmp dir env var + +# get sys info +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" + +# load modules +ml Stages/2024 GCC OpenMPI CUDA/12 cuDNN MPI-settings/CUDA +ml Python CMake HDF5 PnetCDF libaio mpi4py +# echo "these modules are loaded:" +# ml + +# get python version +pver="$(python --version 2>&1 | awk {'print $2'} | cut -f1-2 -d.)" + +# use pyenv if exist +if [ -d "$HOME/.pyenv" ];then + export PYENV_ROOT="$HOME/.pyenv" + export PATH="$PYENV_ROOT/bin:$PATH" +fi + +# create environment +if [ -d "${cDir}/envAI_${sysN}" ];then + echo 'env already exist' + + source envAI_${sysN}/bin/activate +else + python3 -m venv envAI_${sysN} + + # activate env + source envAI_${sysN}/bin/activate + + echo "envAI_${sysN} environment is created in ${cDir}" +fi + +# get wheel -- setuptools extension +pip3 install --no-cache-dir wheel + +# install Torch +if [ -f "${cDir}/envAI_${sysN}/bin/torchrun" ]; then + echo 'Torch already installed' +else + pip3 install --no-cache-dir \ + torch==2.1.0+cu121 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 +fi + +# HPO - RayTune +if [ -f "${cDir}/envAI_${sysN}/bin/ray" ]; then + echo 'Ray already installed' +else + pip3 install --no-cache-dir ray ray[tune] +fi + +# install deepspeed +if [ -f "${cDir}/envAI_${sysN}/bin/deepspeed" ]; then + echo 'DeepSpeed already installed' +else + export DS_BUILD_CCL_COMM=1 + export DS_BUILD_UTILS=1 + export DS_BUILD_AIO=1 + export DS_BUILD_FUSED_ADAM=1 + export DS_BUILD_FUSED_LAMB=1 + export DS_BUILD_TRANSFORMER=1 + export DS_BUILD_STOCHASTIC_TRANSFORMER=1 + export DS_BUILD_TRANSFORMER_INFERENCE=1 + + # this will pass + pip3 install --no-cache-dir DeepSpeed + + # fix .triton/autotune/Fp16Matmul_2d_kernel.pickle bug + line=$(cat -n envAI_${sysN}/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py | grep os.rename | awk '{print $1}' | head -n 1) + sed -i "${line}s|^|#|" envAI_${sysN}/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py +fi + +# # install heat +# if [ -d "${cDir}/envAI_${sysN}/lib/python${pver}/site-packages/heat" ]; then +# echo 'HeAT already installed' +# else +# # need to modify setup.py to accep torch>2.1 for heat +# git clone --recurse-submodules https://github.com/helmholtz-analytics/heat.git +# line=$(cat -n heat/setup.py | grep torch | awk '{print $1}' | head -n 1) +# var=' "torch>=2.1.0",' +# sed -i "${line}s|.*|$var|" heat/setup.py + +# # create tar! +# rm -rf heat.tar.gz +# tar czf heat.tar.gz heat + +# # install +# pip3 install --no-cache-dir 'heat.tar.gz[hdf5,netcdf]' +# fi + +# install horovod +if [ -f "${cDir}/envAI_${sysN}/bin/horovodrun" ]; then + echo 'Horovod already installed' +else + # compiler vars + export LDSHARED="$CC -shared" && + export CMAKE_CXX_STANDARD=17 + + # CPU vars + export HOROVOD_MPI_THREADS_DISABLE=1 + export HOROVOD_CPU_OPERATIONS=MPI + + # GPU vars + export HOROVOD_GPU_ALLREDUCE=NCCL + export HOROVOD_NCCL_LINK=SHARED + export HOROVOD_NCCL_HOME=$EBROOTNCCL + + # Host language vars + export HOROVOD_WITH_PYTORCH=1 + export HOROVOD_WITHOUT_TENSORFLOW=1 + export HOROVOD_WITHOUT_MXNET=1 + + # need to modify for torch 2.1.0 + git clone --recurse-submodules https://github.com/horovod/horovod.git + line=$(cat -n horovod/CMakeLists.txt | grep CMAKE_CXX_STANDARD | awk '{print $1}' | head -n 1) + var='set(CMAKE_CXX_STANDARD 17)' + sed -i "${line}s|.*|$var|" horovod/CMakeLists.txt + line=$(cat -n horovod/horovod/torch/CMakeLists.txt | grep CMAKE_CXX_STANDARD | awk '{print $1}' | head -n 1) + var=' set(CMAKE_CXX_STANDARD 17)' + sed -i "${line}s|.*|$var|" horovod/horovod/torch/CMakeLists.txt + + # create tar! + rm -rf horovod.tar.gz + tar czf horovod.tar.gz horovod + + # install + pip3 install --no-cache-dir horovod.tar.gz +fi + +# get required libraries in reqs.txt +if [ -f "${cDir}/envAI_${sysN}/lib/python${pver}/site-packages/torchnlp/_third_party/weighted_random_sampler.py" ]; then + echo 'required libs already exist' +else + pip3 install -r Scripts/reqs.txt --no-cache-dir + + # fix int bug: modify l.4 of /torchnlp/_third_party/weighted_random_sampler.py + var='int_classes = int' + sed -i "4s|.*|$var|" \ + ${cDir}/envAI_${sysN}/lib/python${pver}/site-packages/torchnlp/_third_party/weighted_random_sampler.py +fi + +# fix IB IP config - FZJ specific +if [ -f "${cDir}/envAI_${sysN}/bin/torchrun" ]; then + sed -i -e '5,100s/^/#/' ${cDir}/envAI_${sysN}/bin/torchrun + echo """ +import re +import sys +from torch.distributed.run import main +from torch.distributed.elastic.agent.server import api as sapi + +def new_get_fq_hostname(): + return _orig_get_fq_hostname().replace('.', 'i.', 1) + +if __name__ == '__main__': + _orig_get_fq_hostname = sapi._get_fq_hostname + sapi._get_fq_hostname = new_get_fq_hostname + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) +""" >> ${cDir}/envAI_${sysN}/bin/torchrun +fi + +# JUBE benchmarking environment +if [ -f "${cDir}/envAI_${sysN}/bin/jube" ]; then + echo 'JUBE already installed' +else + pip3 install --no-cache-dir http://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=latest +fi + +# unit tests +echo +echo 'AI4HPC is installed!' +echo + +# some tests +echo "unit tests:" +for item in 'torch' 'deepspeed' 'horovod';do + python3 -c "import $item; print('$item version:',$item.__version__)" +done + +# Install itwinai +pip install --upgrade pip +pip install -e . + +# cleanup +rm -rf horovod *.tar.gz \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index c50e1e86..15637745 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,17 +26,20 @@ maintainers = [ classifiers = ["Development Status :: Beta", "Programming Language :: Python"] dependencies = [ - "wandb>=0.15.11", - "mlflow>=2.7", - "jsonargparse[signatures]>=4.17.0", - "pyyaml>=6.0.1", - "omegaconf>=2.3.0", - "submitit>=1.4.6", - "typing-extensions==4.5.0", - "typing_extensions==4.5.0", - "urllib3>=2.0.5", - "deepspeed>=0.13.1", - "horovod[tensorflow,keras,pytorch]>=0.28.1", + "wandb", + "mlflow", + "jsonargparse[signatures]", + "pyyaml", + "omegaconf", + # "wandb>=0.15.11", + # "mlflow>=2.7", + # "jsonargparse[signatures]>=4.17.0", + # "pyyaml>=6.0.1", + # "omegaconf>=2.3.0", + # "submitit>=1.4.6", + # "typing-extensions==4.5.0", + # "typing_extensions==4.5.0", + # "urllib3>=2.0.5", ] # dynamic = ["version", "description"] @@ -45,6 +48,8 @@ dependencies = [ # TODO: add torch and tensorflow # torch = [] # tf = [] +distributed = ["deepspeed>=0.13.1", "horovod[tensorflow,keras,pytorch]>=0.28.1"] +cli = ["rich>=13.5.3", "typer>=0.9.0"] dev = [ "pytest>=7.4.2", "pytest-mock>=3.11.1", diff --git a/tutorials/distributed-ml/ex0_multiple_torch_strategies.py b/tutorials/distributed-ml/ex0_multiple_torch_strategies.py deleted file mode 100644 index 0f7f1e17..00000000 --- a/tutorials/distributed-ml/ex0_multiple_torch_strategies.py +++ /dev/null @@ -1,150 +0,0 @@ -""" -Show how to use DDP, Horovod and DeepSpeed strategies interchangeably. -Depending on the strategy you choose, you need to run this script with -different ad-hoc commands: - -Torch DistributedDataParallel (DDP). Launch with torchrun: ->>> micromamba run -p ../../.venv-pytorch/ torchrun \ - --rdzv_backend=c10d \ - --rdzv_endpoint=localhost:0 \ - --nnodes=1 \ - --nproc_per_node=4 \ - ex0_multiple_torch_strategies.py -s ddp - - -Using a SLURM jobscript: - -1. Torch DistributedDataParallel (DDP): -set STRATEGY="ddp" in ``torchrun ex0_multiple_torch_strategies.sh`` -2. Horovod: -set STRATEGY="horovod" in ``torchrun ex0_multiple_torch_strategies.sh`` -3. DeepSpeed: -set STRATEGY="deepspeed" in ``torchrun ex0_multiple_torch_strategies.sh`` - -Execute ``torchrun ex0_multiple_torch_strategies.sh`` in a slurm environment: - ->>> sbatch ex0_multiple_torch_strategies.sh - - -""" -from typing import Any -import os -import argparse - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset, DistributedSampler - -from itwinai.torch.distributed import ( - TorchDistributedStrategy_old, - DDPDistributedStrategy_old, - HVDDistributedStrategy_old, - DSDistributedStrategy_old -) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser() - parser.add_argument( - "--strategy", "-s", type=str, - choices=['ddp', 'horovod', 'deepspeed'], - default='ddp' - ) - parser.add_argument( - "--shuffle_dataloader", - action=argparse.BooleanOptionalAction - ) - return parser.parse_args() - - -class UniformRndDataset(Dataset): - """Dummy torch dataset.""" - - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn( - foo: Any, args: argparse.Namespace, - strategy: TorchDistributedStrategy_old -) -> int: - """Dummy training function. This emulates custom code developed - by some use case. - """ - strategy.init_backend() - print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim, model) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - # Distributed dataloader - train_loader = DataLoader( - train_set, batch_size=10, num_workers=1, - sampler=DistributedSampler( - train_set, - num_replicas=strategy.dist_gwsize(), - rank=strategy.dist_grank(), - shuffle=args.shuffle_dataloader - ) - ) - - # Device allocated for this worker - device = strategy.dist_device() - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{device}") - x = x.to(device) - y = y.to(device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.clean_up() - return 123 - - -if __name__ == "__main__": - - args = parse_args() - - # Instantiate Strategy - if args.strategy == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - - strategy = DDPDistributedStrategy_old(backend='nccl') - elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy_old() - elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy_old(...) - else: - raise NotImplementedError( - f"Strategy {args.strategy} is not recognized/implemented.") - - # Launch distributed training - trainer_entrypoint_fn("foobar", args, strategy) diff --git a/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh b/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh deleted file mode 100644 index 445ee3d6..00000000 --- a/tutorials/distributed-ml/ex0_multiple_torch_strategies.sh +++ /dev/null @@ -1,137 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=TorchTest -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err -#SBATCH --time=00:15:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=4 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# parallelization STRATEGY (ddp, horovod, deepspeed) -STRATEGY='ddp' - -# parameters -debug=false # do debug -bs=32 # batch-size -epochs=10 # epochs -lr=0.01 # learning rate - - -# set modules -ml --force purge - -ml Stages/2022 NVHPC/22.1 ParaStationMPI/5.5.0-1-mt NCCL/2.11.4-CUDA-11.5 cuDNN/8.3.1.22-CUDA-11.5 -ml Python/3.9.6 CMake HDF5 PnetCDF libaio/0.3.112 mpi-settings/CUDA - -# set env -source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/envAI_hdfml/bin/activate - -# sleep a sec -sleep 1 - -# job info -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -#launch -# srun python train.py --STRATEGY hvd --n_workers_per_node $SLURM_GPUS_PER_NODE - -if [[ $STRATEGY == *"horovod"* ]]; -then - echo "NOT IMPLEMENTED" - # COMMAND="horovod_trainer.py" - - # EXEC="$COMMAND \ - # --batch-size $bs \ - # --epochs $epochs \ - # --lr $lr \ - # --data-dir $dataDir" - - # # MB: how local worker processes are spawned? - # srun --cpu-bind=none python3 -u $EXEC - -elif [[ $STRATEGY == *"ddp"* ]]; -then - COMMAND="ex0_multiple_torch_strategies.py --strategy ddp" - - EXEC="$COMMAND" - # --batch-size $bs \ - # --epochs $epochs \ - # --lr $lr \ - # --nworker $SLURM_CPUS_PER_TASK \ - # --data-dir $dataDir" - - srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $EXEC" - -else - echo "NOT IMPLEMENTED" - # COMMAND="DS_trainer.py" - - # EXEC=$COMMAND" --batch-size $bs - # --epochs $epochs - # --nworker $SLURM_CPUS_PER_TASK - # --data-dir $dataDir" - - # #### do not change this part - # # create node-list - # sysN=$(eval "scontrol show hostnames") - # for i in $sysN; do - # x+=\"$i\":[$CUDA_VISIBLE_DEVICES], - # done - # WID=`echo {${x::-1}} | base64 -w 0` - - # # modify config file with parameters - # sed -i "2s|.*| \"train_micro_batch_size_per_gpu\": ${bs},|" DS_config.json - # sed -i "7s|.*| \"lr\": ${lr}|" DS_config.json - # #### - - # # launch - # srun python -m deepspeed.launcher.launch \ - # --node_rank $SLURM_PROCID \ - # --master_addr ${SLURMD_NODENAME}i \ - # --master_port 29500 \ - # --world_info $WID \ - # $EXEC --deepspeed_mpi --deepspeed_config DS_config.json - -fi diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 new file mode 100644 index 00000000..e69de29b diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh new file mode 100644 index 00000000..8caa80d7 --- /dev/null +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TorchTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="train.py -s ddp" + +srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" + diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh new file mode 100644 index 00000000..599c4d5c --- /dev/null +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TorchTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="train.py -s deepspeed" + +srun --cpu-bind=none bash -c "deepspeed $TRAINING_CMD" + diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh new file mode 100644 index 00000000..55752820 --- /dev/null +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Hor-IT +#SBATCH --account=intertwin +#SBATCH --partition=batch +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="train.py -s horovod" + +srun --cpu-bind=none python3 -u "$TRAINING_CMD" + diff --git a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py similarity index 91% rename from tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py rename to tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py index 2d841ea7..5254cab5 100644 --- a/tutorials/distributed-ml/ex0.1_multiple_torch_strategies.py +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py @@ -3,21 +3,28 @@ Depending on the strategy you choose, you need to run this script with different ad-hoc commands: -Torch DistributedDataParallel (DDP). Launch with torchrun: +Torch DistributedDataParallel (DDP). Launch from terminal with torchrun: >>> micromamba run -p ../../.venv-pytorch/ torchrun \ --rdzv_backend=c10d \ --rdzv_endpoint=localhost:0 \ --nnodes=1 \ --nproc_per_node=4 \ - ex0.1_multiple_torch_strategies.py -s ddp + train.py -s ddp +with SLURM: +>>> sbatch ddp_slurm.sh -DeepSpeed. Launch with deepspeed: +DeepSpeed. Launch from terminal with deepspeed: >>> micromamba run -p ../../.venv-pytorch/ deepspeed \ - ex0.1_multiple_torch_strategies.py -s deepspeed + train.py -s deepspeed +with SLURM: +>>> sbatch deepSpeed_slurm.sh -Horovod. Launch with horovodrun: +Horovod. Only works with SLURM: +>>> sbatch horovod_slurm.sh + +Horovod. Launch with horovodrun (NOT WORKING YET): >>> micromamba run -p ../../.venv-pytorch/ horovodrun -np 4 \ - python ex0.1_multiple_torch_strategies.py -s horovod + python train.py -s horovod """ from typing import Any import os From 0635065c793963a33752c7de572f15bb288b29d8 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 10 Feb 2024 18:47:15 +0100 Subject: [PATCH 025/171] UPDATE torch strategies tutorial --- .gitignore | 1 + .../torch-ddp-deepspeed-horovod/README.md | 32 +++++++++++++++++++ .../torch-ddp-deepspeed-horovod/ddp_slurm.sh | 2 +- .../deepspeed_slurm.sh | 14 +++++--- .../torch-ddp-deepspeed-horovod/hvd_slurm.sh | 10 +++--- 5 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md diff --git a/.gitignore b/.gitignore index 0d55c2b1..7f714a0c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ pl-training.yml *.pth *.csv *tar.gz +0 # Use cases files MNIST diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md new file mode 100644 index 00000000..4df309bf --- /dev/null +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md @@ -0,0 +1,32 @@ +# Tutorial: distributed strategies for PyTorch + +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. + +First, from the root of this repo, build the environment containing +pytorch, horovod and deepspeed. You can *try* with: + +```bash +# Creates a Python venv called envAI_hdfml +make torch-gpu-jsc +``` + +Each distributed strategy has its own SLURM job script, which +should be used to run it: + +If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: + +```bash +sbatch ddp_slurm.sh +``` + +If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: + +```bash +sbatch deepspeed_slurm.sh +``` + +If you want to distribute the code in `train.py` with **Horovod**, run from terminal: + +```bash +sbatch hvd_slurm.sh +``` diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh index 8caa80d7..8cf0280b 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=TorchTest +#SBATCH --job-name=Torch_DDP_tutorial #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh index 599c4d5c..f93de05c 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=TorchTest +#SBATCH --job-name=Torch_DeepSpeed_tutorial #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 @@ -43,12 +43,18 @@ if [ "$debug" = true ] ; then fi echo -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" +# # set comm +# export CUDA_VISIBLE_DEVICES="0,1,2,3" +# export OMP_NUM_THREADS=1 +# if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +# export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +# fi +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s deepspeed" diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh index 55752820..3ba6c9fe 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Hor-IT +#SBATCH --job-name=Torch_HVD_tutorial #SBATCH --account=intertwin #SBATCH --partition=batch #SBATCH --output=job.out @@ -39,15 +39,17 @@ if [ "$debug" = true ] ; then fi echo -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" +# set vars +# export NCCL_DEBUG=INFO +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod" -srun --cpu-bind=none python3 -u "$TRAINING_CMD" +srun --cpu-bind=none python3 -u $TRAINING_CMD From b40ce169f00e9577326a7eeecdd96b0af8449a58 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:38:37 +0530 Subject: [PATCH 026/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index f414f6e0..456022a1 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -170,18 +170,6 @@ if __name__ == '__main__': """ >> ${cDir}/envAI_${sysN}/bin/torchrun fi -# JUBE benchmarking environment -if [ -f "${cDir}/envAI_${sysN}/bin/jube" ]; then - echo 'JUBE already installed' -else - pip3 install --no-cache-dir http://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=latest -fi - -# unit tests -echo -echo 'AI4HPC is installed!' -echo - # some tests echo "unit tests:" for item in 'torch' 'deepspeed' 'horovod';do @@ -193,4 +181,4 @@ pip install --upgrade pip pip install -e . # cleanup -rm -rf horovod *.tar.gz \ No newline at end of file +rm -rf horovod *.tar.gz From d865ee9cf013bc8569f17a2d895d796f69de8f0a Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:53:20 +0530 Subject: [PATCH 027/171] Update hvd_slurm.sh --- .../distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh index 3ba6c9fe..32e8112f 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh @@ -51,5 +51,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod" -srun --cpu-bind=none python3 -u $TRAINING_CMD +srun --cpu-bind=none python -u $TRAINING_CMD From 737be3781508adb3d09c9f00f5c5d7134564c9b0 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:55:11 +0530 Subject: [PATCH 028/171] Update README.md --- tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md index 4df309bf..b3f121d4 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md @@ -1,6 +1,6 @@ # Tutorial: distributed strategies for PyTorch -In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. First, from the root of this repo, build the environment containing pytorch, horovod and deepspeed. You can *try* with: From c7999e4b8b93c3dc8fe8627ac7cfc23f3ff94c2c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 12 Feb 2024 17:02:40 +0100 Subject: [PATCH 029/171] UPDATE distributed tutorial --- src/itwinai/torch/distributed.py | 1124 ++++++++--------- .../deepspeed_slurm.sh | 12 + .../torch-ddp-deepspeed-horovod/train.py | 2 +- 3 files changed, 575 insertions(+), 563 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 1a963b38..ca482909 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -20,186 +20,420 @@ from ..distributed import DistributedStrategy -class TorchDistributedStrategy_old(DistributedStrategy): - """Abstract class to define the distributed backend methods for - PyTorch models. - """ - @abc.abstractmethod - def init_backend(self) -> None: - """Initializes the chosen distributed backend""" - - @abc.abstractmethod - def distribute_model(self, model: Any) -> Any: - """Distributes a machine learning model. - - Args: - model (Any): a generic ML model to be distributed. - - Returns: - Any: distributed model instance. - """ +class OptimizerConfig: + def __init__(self, optim_class, **kwargs) -> None: + self.optim_class = optim_class + self.kwargs = kwargs - @abc.abstractmethod - def broadcast_params(self, model: Any, optimizer: Any) -> None: - """Broadcasts variables from root rank to all other processes/ + def to_optim(self, parameters) -> optim.Optimizer: + return self.optim_class(parameters, **self.kwargs) - Args: - model (Any): distributed model. - optimizer (Any): optimizer. - """ - @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: - """Distribute optimizer. +class LRSchedulerConfig: + def __init__(self, scheduler_class, **kwargs) -> None: + self.scheduler_class = scheduler_class + self.kwargs = kwargs - Args: - optimizer (Any): optimizer. - model (Any): distributed model. + def to_scheduler(self, optim) -> LRScheduler: + return self.scheduler_class(optim, **self.kwargs) - Returns: - Any: distributed optimizer. - """ - @abc.abstractmethod - def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size). +class ModelEngineConfig(BaseModel): + mixed_precision: bool = False - Returns: - int: global world size. - """ - @abc.abstractmethod - def dist_lwsize(self) -> int: - """Returns the number of local workers available on a node - (local world size). - Usually it is equal to the number of available GPUs. +class ModelEngine(abc.ABC): + """Wrapper around distributed model""" - Returns: - int: local world size. - """ + model: nn.Module + _model_parameters: Any + optimizer: optim.Optimizer + lr_scheduler: LRScheduler + # config: ModelEngineConfig + mixed_precision: bool = False + grad_scaler: amp.GradScaler = None - @abc.abstractmethod - def dist_grank(self) -> int: - """Returns the global rank of the current process. - Rank ranges from 0 to world_size. + def __init__( + self, + model: nn.Module, + # model_parameters: Any, + optimizer: Union[optim.Optimizer, OptimizerConfig], + lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, + mixed_precision: bool = False + # config: Optional[ModelEngineConfig] = None + ) -> None: + super().__init__() + self.model = model + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + # self._model_parameters = model_parameters + # if isinstance(optimizer, OptimizerConfig): + # self.optimizer = optimizer.to_optim(model_parameters) + # else: + # self.optimizer = optimizer - Returns: - int: global rank. - """ + # if isinstance(lr_scheduler, LRSchedulerConfig): + # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) + # else: + # self.lr_scheduler = lr_scheduler - @abc.abstractmethod - def dist_lrank(self) -> int: - """Returns the local rank of the current process. + # if not config: + # self.config = ModelEngineConfig() + self.mixed_precision = mixed_precision + if mixed_precision: + self.grad_scaler = amp.GradScaler() - Returns: - int: local rank. - """ + def __call__(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + # Wrapper of self.forward() + return self.forward(*args, **kwds) - def is_main_worker(self) -> bool: - """Checks if local worker has global rank equal to zero. + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + return self.model(*args, **kwds) - Returns: - bool: True if main worker. - """ - return self.dist_grank() == 0 + def train(self, mode: bool = True) -> nn.Module: + """Set model in training mode.""" + self.model.train(mode=mode) + return self.model - def dist_device(self) -> str: - """Device used by local worker. + def eval(self) -> nn.Module: + """Set model in inference mode.""" + self.model.eval() + return self.model - Returns: - str: torch device in the form 'cuda:N'. - """ - return f"cuda:{self.dist_lrank()}" + def to(self, device) -> nn.Module: + """Move model to specified device.""" + self.model.to(device) + return self.model @abc.abstractmethod - def clean_up(self) -> None: - """Cleans up resources allocated by distributed strategy.""" + def zero_grad(): + """Set gradients to zero for the optimizer.""" @abc.abstractmethod - def par_allgather_obj(self, obj: Any) -> List[Any]: - """Gathers any object from the whole group in a list (to all workers). + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. Args: - obj (Any): object to gather from all workers. + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. Returns: - List[Any]: list of objects gathered from all workers. + torch.Tensor: computed loss. """ + @abc.abstractmethod + def optimizer_step(self): + """Perform optimizer step.""" -class DDPDistributedStrategy_old(TorchDistributedStrategy_old): - """PyTorch DDP distributed strategy class. + @abc.abstractmethod + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + # This should be incorporated in the optim step: + # https://deepspeed.readthedocs.io/en/latest/schedulers.html + # scheduler is updated automatically at each training step - Args: - backend (str): Name of the communication backend to employ. - """ + @abc.abstractmethod + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" - backend: str - def __init__(self, backend: str) -> None: - super().__init__() - self.backend = backend +class DDPModelEngine(ModelEngine): + """Model engine for torch DDP distributed strategy.""" - def init_backend(self) -> None: - """Initializes the distributed process group and the distributed - package. - """ - if torch.cuda.is_available(): - dist.init_process_group(backend=self.backend) + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) - def distribute_model(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. Args: - model (nn.Module): ML model to be distributed. + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. Returns: - nn.Module: Distributed model replicas across all devices. - that are to be synchronized. + torch.Tensor: computed loss. """ - if torch.cuda.is_available(): - # device = self.dist_lrank() - model = model.to(self.dist_device()) - dist_model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[self.dist_device()], - output_device=self.dist_device() - ) + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) + + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) else: - dist_model = model + loss = loss_fn(*loss_args) + loss.backward() + return loss - return dist_model + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) - def broadcast_params( - self, - model: nn.Module, - optimizer: optim.Optimizer - ) -> None: - """Do nothing. Only applicable for Horovod. + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() - Args: - model (nn.Module): ML model - optimizer (optim.Optimizer): Optimizer + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError + + +class DSModelEngine(ModelEngine): + """Model engine for DeeSpeed distributed strategy.""" + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) + + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. + + Args: + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. + + Returns: + torch.Tensor: computed loss. """ - pass + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) + else: + loss = loss_fn(*loss_args) + loss.backward() + return loss + + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) + + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() + + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError + + +class TorchDistributedStrategy(DistributedStrategy): + """Abstract class to define the distributed backend methods for + PyTorch models. + """ + @abc.abstractmethod + def init(self) -> None: + """Initializes the chosen distributed backend""" + + # @abc.abstractmethod + # def distributed_engine( + # self, model: nn.Module, optimizer: Optimizer, + # lr_scheduler: Optional[LRScheduler] = None + # ) -> ModelEngine: + # """Build a distributed model engine.""" + + @abc.abstractmethod + def distributed( + self, model: nn.Module, optimizer: Optimizer, + lr_scheduler: Optional[LRScheduler] = None + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" + + @abc.abstractmethod + def dist_gwsize(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + + @abc.abstractmethod + def dist_lwsize(self) -> int: + """Returns the number of local workers available on a node + (local world size). + Usually it is equal to the number of available GPUs. + + Returns: + int: local world size. + """ + + @abc.abstractmethod + def dist_grank(self) -> int: + """Returns the global rank of the current process. + Rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + + @abc.abstractmethod + def dist_lrank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + + def is_main_worker(self) -> bool: + """Checks if local worker has global rank equal to zero. + + Returns: + bool: True if main worker. + """ + return self.dist_grank() == 0 + + def dist_device(self) -> str: + """Device used by local worker. + + Returns: + str: torch device in the form 'cuda:N'. + """ + return f"cuda:{self.dist_lrank()}" + + @abc.abstractmethod + def clean_up(self) -> None: + """Cleans up resources allocated by distributed strategy.""" + + @abc.abstractmethod + def par_allgather_obj(self, obj: Any) -> List[Any]: + """Gathers any object from the whole group in a list (to all workers). Args: - optimizer (optim.Optimizer): optimizer. - model (nn.Module): ML model. Unused here. + obj (Any): object to gather from all workers. Returns: - optim.Optimizer: Distributed optimizer. + List[Any]: list of objects gathered from all workers. """ - return optimizer + + +class DDPDistributedStrategy(TorchDistributedStrategy): + """PyTorch DDP distributed strategy class. + + Args: + backend (str): Name of the communication backend to employ. + """ + + backend: str + model: DDPModelEngine + + def __init__(self, backend: str) -> None: + super().__init__() + self.backend = backend + + def init(self) -> None: + """Initializes the distributed process group and the distributed + package. + """ + if torch.cuda.is_available() and torch.cuda.device_count() > 1: + dist.init_process_group(backend=self.backend) + else: + print("WARNING: trying to run distributed on insufficient" + " resources. Skipping distributed process group setup.") + + # def distributed_engine( + # self, model: nn.Module, optimizer: Optimizer, + # lr_scheduler: Optional[LRScheduler] = None, + # mixed_precision: bool = False + # ) -> ModelEngine: + # """Build a distributed model engine.""" + # if torch.cuda.is_available(): + # # device = self.dist_lrank() + # model = model.to(self.dist_device()) + # dist_model = torch.nn.parallel.DistributedDataParallel( + # model, + # device_ids=[self.dist_device()], + # output_device=self.dist_device() + # ) + # else: + # dist_model = model + + # model_engine = DDPModelEngine( + # dist_model, optimizer, lr_scheduler, + # mixed_precision=mixed_precision + # ) + + # return model_engine + + def distributed( + self, model: nn.Module, optimizer: Optimizer, + lr_scheduler: Optional[LRScheduler] = None, + **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" + if torch.cuda.is_available(): + # device = self.dist_lrank() + model = model.to(self.dist_device()) + dist_model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[self.dist_device()], + output_device=self.dist_device() + ) + else: + dist_model = model + + return dist_model, optimizer, lr_scheduler def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -256,7 +490,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: return res -class DSDistributedStrategy_old(TorchDistributedStrategy_old): +class DSDistributedStrategy(TorchDistributedStrategy): """DeepSpeed distributed strategy class. Args: @@ -286,60 +520,30 @@ def _load_config(self, ds_config): else: raise ValueError("ds_config is not a dictionary not a path.") - def init_backend(self) -> None: + def init(self) -> None: """Initializes the distributed process group and the distributed package. """ # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) - def distribute_model(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: Distributed model replicas across all devices - that are to be synchronized. - """ + def distributed( + self, model: nn.Module, optimizer: Optional[Optimizer] = None, + lr_scheduler: Optional[LRScheduler] = None, + model_parameters: Optional[Any] = None, **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - distrib_model, __, __, __ = deepspeed.initialize( + # To prioritize optim in the config, you need to pass optim=None + distrib_model, optimizer, _, lr_scheduler = deepspeed.initialize( model=model, - model_parameters=model.parameters(), + model_parameters=model_parameters, + optimizer=optimizer, + lr_scheduler=lr_scheduler, dist_init_required=True, config=self.config ) - return distrib_model - - def broadcast_params( - self, model: nn.Module, optimizer: optim.Optimizer - ) -> None: - """Only applicable for Horovod. Does nothing. - - Args: - model (nn.Module): ML model. - optimizer (optim.Optimizer): optimizer. - """ - pass - - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. - - Args: - optimizer (optim.Optimizer): torch optimizer. - model (nn.Module): torch neural network. - - Returns: - optim.Optimizer: distributed optimizer. - """ - return optimizer + return distrib_model, optimizer, lr_scheduler def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -394,26 +598,34 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy_old(TorchDistributedStrategy_old): +class HVDDistributedStrategy(TorchDistributedStrategy): """Horovod distributed strategy class.""" - def init_backend(self) -> None: + def init(self) -> None: """Initializes the Horovod distributed backend.""" hvd.init() + torch.cuda.set_device(hvd.local_rank()) - def distribute_model(self, model: nn.Module) -> nn.Module: - """Only applicable for DDP and DeepSpeed. - For Horovod, returns the same model passed as argument. + def distributed( + self, model: nn.Module, optimizer: Optional[Optimizer] = None, + lr_scheduler: Optional[LRScheduler] = None, + **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Setup model, optimizer and scheduler for distributed.""" - Args: - model (nn.Module): ML model to be distributed. + model.to(self.dist_device()) + self._broadcast_params(model, optimizer) - Returns: - nn.Module: ML model passed in the argument. - """ - return model + # TODO: here you may need to scale the lr - def broadcast_params( + distOptimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + op=hvd.Average + ) + return model, distOptimizer, lr_scheduler + + def _broadcast_params( self, model: nn.Module, optimizer: optim.Optimizer ) -> None: """Broadcasts variables from root rank to all other processes. @@ -427,29 +639,6 @@ def broadcast_params( hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module - ) -> optim.Optimizer: - """Constructs a DistributedOptimizer, for computing single-process - gradient values and applying gradient updates after the gradient values - have been combined across all the Horovod ranks. - - Args: - optimizer (optim.Optimizer): Optimizer to be distributed. - model (nn.Module): ML model to be trained. - - Returns: - optim.Optimizer: Distributed optimizer across all ranks. - """ - distOptimizer = hvd.DistributedOptimizer( - optimizer, - named_parameters=model.named_parameters(), - op=hvd.Average - ) - return distOptimizer - def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -488,301 +677,60 @@ def clean_up(self) -> None: """Shuts Horovod down.""" hvd.shutdown() - def par_allgather_obj(self, obj: Any) -> list[Any]: - """Gathers scalar objects across all workers to a - list with size(#worker), uses horovod communicator - - Args: - obj (Any): object in a worker. - - Returns: - list: gathered list with size(#worker). - """ - return hvd.allgather_object(obj) - - -################################################################ - -class OptimizerConfig: - def __init__(self, optim_class, **kwargs) -> None: - self.optim_class = optim_class - self.kwargs = kwargs - - def to_optim(self, parameters) -> optim.Optimizer: - return self.optim_class(parameters, **self.kwargs) - - -class LRSchedulerConfig: - def __init__(self, scheduler_class, **kwargs) -> None: - self.scheduler_class = scheduler_class - self.kwargs = kwargs - - def to_scheduler(self, optim) -> LRScheduler: - return self.scheduler_class(optim, **self.kwargs) - - -class ModelEngineConfig(BaseModel): - mixed_precision: bool = False - - -class ModelEngine(abc.ABC): - """Wrapper around distributed model""" - - model: nn.Module - _model_parameters: Any - optimizer: optim.Optimizer - lr_scheduler: LRScheduler - # config: ModelEngineConfig - mixed_precision: bool = False - grad_scaler: amp.GradScaler = None - - def __init__( - self, - model: nn.Module, - # model_parameters: Any, - optimizer: Union[optim.Optimizer, OptimizerConfig], - lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, - mixed_precision: bool = False - # config: Optional[ModelEngineConfig] = None - ) -> None: - super().__init__() - self.model = model - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - # self._model_parameters = model_parameters - # if isinstance(optimizer, OptimizerConfig): - # self.optimizer = optimizer.to_optim(model_parameters) - # else: - # self.optimizer = optimizer - - # if isinstance(lr_scheduler, LRSchedulerConfig): - # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) - # else: - # self.lr_scheduler = lr_scheduler - - # if not config: - # self.config = ModelEngineConfig() - self.mixed_precision = mixed_precision - if mixed_precision: - self.grad_scaler = amp.GradScaler() - - def __call__(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - # Wrapper of self.forward() - return self.forward(*args, **kwds) - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - return self.model(*args, **kwds) - - def train(self, mode: bool = True) -> nn.Module: - """Set model in training mode.""" - self.model.train(mode=mode) - return self.model - - def eval(self) -> nn.Module: - """Set model in inference mode.""" - self.model.eval() - return self.model - - def to(self, device) -> nn.Module: - """Move model to specified device.""" - self.model.to(device) - return self.model - - @abc.abstractmethod - def zero_grad(): - """Set gradients to zero for the optimizer.""" - - @abc.abstractmethod - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - - @abc.abstractmethod - def optimizer_step(self): - """Perform optimizer step.""" - - @abc.abstractmethod - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - # This should be incorporated in the optim step: - # https://deepspeed.readthedocs.io/en/latest/schedulers.html - # scheduler is updated automatically at each training step - - @abc.abstractmethod - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - - -class DDPModelEngine(ModelEngine): - """Model engine for torch DDP distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) - - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() - - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() - - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError - - -class DSModelEngine(ModelEngine): - """Model engine for DeeSpeed distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) + def par_allgather_obj(self, obj: Any) -> list[Any]: + """Gathers scalar objects across all workers to a + list with size(#worker), uses horovod communicator - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() + Args: + obj (Any): object in a worker. - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() + Returns: + list: gathered list with size(#worker). + """ + return hvd.allgather_object(obj) - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError +################################################################### -class TorchDistributedStrategy(DistributedStrategy): +class TorchDistributedStrategy_old(DistributedStrategy): """Abstract class to define the distributed backend methods for PyTorch models. """ @abc.abstractmethod - def init(self) -> None: + def init_backend(self) -> None: """Initializes the chosen distributed backend""" - # @abc.abstractmethod - # def distributed_engine( - # self, model: nn.Module, optimizer: Optimizer, - # lr_scheduler: Optional[LRScheduler] = None - # ) -> ModelEngine: - # """Build a distributed model engine.""" + @abc.abstractmethod + def distribute_model(self, model: Any) -> Any: + """Distributes a machine learning model. + + Args: + model (Any): a generic ML model to be distributed. + + Returns: + Any: distributed model instance. + """ @abc.abstractmethod - def distributed( - self, model: nn.Module, optimizer: Optimizer, - lr_scheduler: Optional[LRScheduler] = None - ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: - """Setup model, optimizer and scheduler for distributed.""" + def broadcast_params(self, model: Any, optimizer: Any) -> None: + """Broadcasts variables from root rank to all other processes/ + + Args: + model (Any): distributed model. + optimizer (Any): optimizer. + """ + + @abc.abstractmethod + def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: + """Distribute optimizer. + + Args: + optimizer (Any): optimizer. + model (Any): distributed model. + + Returns: + Any: distributed optimizer. + """ @abc.abstractmethod def dist_gwsize(self) -> int: @@ -851,7 +799,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: """ -class DDPDistributedStrategy(TorchDistributedStrategy): +class DDPDistributedStrategy_old(TorchDistributedStrategy_old): """PyTorch DDP distributed strategy class. Args: @@ -859,52 +807,30 @@ class DDPDistributedStrategy(TorchDistributedStrategy): """ backend: str - model: DDPModelEngine def __init__(self, backend: str) -> None: super().__init__() self.backend = backend - def init(self) -> None: + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package. """ - if torch.cuda.is_available() and torch.cuda.device_count() > 1: + if torch.cuda.is_available(): dist.init_process_group(backend=self.backend) - else: - print("WARNING: trying to run distributed on insufficient" - " resources. Skipping distributed process group setup.") - - # def distributed_engine( - # self, model: nn.Module, optimizer: Optimizer, - # lr_scheduler: Optional[LRScheduler] = None, - # mixed_precision: bool = False - # ) -> ModelEngine: - # """Build a distributed model engine.""" - # if torch.cuda.is_available(): - # # device = self.dist_lrank() - # model = model.to(self.dist_device()) - # dist_model = torch.nn.parallel.DistributedDataParallel( - # model, - # device_ids=[self.dist_device()], - # output_device=self.dist_device() - # ) - # else: - # dist_model = model - # model_engine = DDPModelEngine( - # dist_model, optimizer, lr_scheduler, - # mixed_precision=mixed_precision - # ) + def distribute_model(self, model: nn.Module) -> nn.Module: + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. - # return model_engine + Args: + model (nn.Module): ML model to be distributed. - def distributed( - self, model: nn.Module, optimizer: Optimizer, - lr_scheduler: Optional[LRScheduler] = None, - **kwargs - ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: - """Setup model, optimizer and scheduler for distributed.""" + Returns: + nn.Module: Distributed model replicas across all devices. + that are to be synchronized. + """ if torch.cuda.is_available(): # device = self.dist_lrank() model = model.to(self.dist_device()) @@ -916,7 +842,36 @@ def distributed( else: dist_model = model - return dist_model, optimizer, lr_scheduler + return dist_model + + def broadcast_params( + self, + model: nn.Module, + optimizer: optim.Optimizer + ) -> None: + """Do nothing. Only applicable for Horovod. + + Args: + model (nn.Module): ML model + optimizer (optim.Optimizer): Optimizer + """ + pass + + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module = None + ) -> optim.Optimizer: + """Returns the optimizer from argument. + + Args: + optimizer (optim.Optimizer): optimizer. + model (nn.Module): ML model. Unused here. + + Returns: + optim.Optimizer: Distributed optimizer. + """ + return optimizer def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -973,7 +928,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: return res -class DSDistributedStrategy(TorchDistributedStrategy): +class DSDistributedStrategy_old(TorchDistributedStrategy_old): """DeepSpeed distributed strategy class. Args: @@ -1003,30 +958,60 @@ def _load_config(self, ds_config): else: raise ValueError("ds_config is not a dictionary not a path.") - def init(self) -> None: + def init_backend(self) -> None: """Initializes the distributed process group and the distributed package. """ # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) - def distributed( - self, model: nn.Module, optimizer: Optional[Optimizer] = None, - lr_scheduler: Optional[LRScheduler] = None, - model_parameters: Optional[Any] = None, **kwargs - ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: - """Setup model, optimizer and scheduler for distributed.""" + def distribute_model(self, model: nn.Module) -> nn.Module: + """Achieves data parallelism by synchronizing the gradients + across each model replica located in each available + computing device. + + Args: + model (nn.Module): ML model to be distributed. + + Returns: + nn.Module: Distributed model replicas across all devices + that are to be synchronized. + """ # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - # To prioritize optim in the config, you need to pass optim=None - distrib_model, optimizer, _, lr_scheduler = deepspeed.initialize( + distrib_model, __, __, __ = deepspeed.initialize( model=model, - model_parameters=model_parameters, - optimizer=optimizer, - lr_scheduler=lr_scheduler, + model_parameters=model.parameters(), dist_init_required=True, config=self.config ) - return distrib_model, optimizer, lr_scheduler + return distrib_model + + def broadcast_params( + self, model: nn.Module, optimizer: optim.Optimizer + ) -> None: + """Only applicable for Horovod. Does nothing. + + Args: + model (nn.Module): ML model. + optimizer (optim.Optimizer): optimizer. + """ + pass + + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module = None + ) -> optim.Optimizer: + """Returns the optimizer from argument. + + Args: + optimizer (optim.Optimizer): torch optimizer. + model (nn.Module): torch neural network. + + Returns: + optim.Optimizer: distributed optimizer. + """ + return optimizer def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). @@ -1081,34 +1066,26 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy(TorchDistributedStrategy): +class HVDDistributedStrategy_old(TorchDistributedStrategy_old): """Horovod distributed strategy class.""" - def init(self) -> None: + def init_backend(self) -> None: """Initializes the Horovod distributed backend.""" hvd.init() - torch.cuda.set_device(hvd.local_rank()) - - def distributed( - self, model: nn.Module, optimizer: Optional[Optimizer] = None, - lr_scheduler: Optional[LRScheduler] = None, - **kwargs - ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: - """Setup model, optimizer and scheduler for distributed.""" - model.to(self.dist_device()) - self._broadcast_params(model, optimizer) + def distribute_model(self, model: nn.Module) -> nn.Module: + """Only applicable for DDP and DeepSpeed. + For Horovod, returns the same model passed as argument. - # TODO: here you may need to scale the lr + Args: + model (nn.Module): ML model to be distributed. - distOptimizer = hvd.DistributedOptimizer( - optimizer, - named_parameters=model.named_parameters(), - op=hvd.Average - ) - return model, distOptimizer, lr_scheduler + Returns: + nn.Module: ML model passed in the argument. + """ + return model - def _broadcast_params( + def broadcast_params( self, model: nn.Module, optimizer: optim.Optimizer ) -> None: """Broadcasts variables from root rank to all other processes. @@ -1122,6 +1099,29 @@ def _broadcast_params( hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) + def distribute_optimizer( + self, + optimizer: optim.Optimizer, + model: nn.Module + ) -> optim.Optimizer: + """Constructs a DistributedOptimizer, for computing single-process + gradient values and applying gradient updates after the gradient values + have been combined across all the Horovod ranks. + + Args: + optimizer (optim.Optimizer): Optimizer to be distributed. + model (nn.Module): ML model to be trained. + + Returns: + optim.Optimizer: Distributed optimizer across all ranks. + """ + distOptimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + op=hvd.Average + ) + return distOptimizer + def dist_gwsize(self) -> int: """Returns the total number of processes (global world size). diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh index f93de05c..2a484cd5 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh @@ -59,5 +59,17 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s deepspeed" +# This command does not integrate well with torch.distributed +# because, e.g., global rank is not recognized -> all processes print to console. +# It raises the error: AssertionError: LOCAL_RANK (2) != OMPI_COMM_WORLD_LOCAL_RANK (0), not sure how to proceed as we're seeing conflicting local rank info. srun --cpu-bind=none bash -c "deepspeed $TRAINING_CMD" + +# srun python -m deepspeed.launcher.launch \ +# --node_rank $SLURM_PROCID \ +# --master_addr ${SLURMD_NODENAME}i \ +# --master_port 29500 \ +# --world_info $WID \ +# $TRAINING_CMD --deepspeed_mpi + + diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py index 5254cab5..0a08a5eb 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py @@ -61,7 +61,7 @@ def parse_args() -> argparse.Namespace: help='local rank passed from distributed launcher') parser = deepspeed.add_config_arguments(parser) args = parser.parse_args() - # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed + os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed return args From 20a217159de788be149dab3c323adc9268c03414 Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:13:26 +0100 Subject: [PATCH 030/171] Delete tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 --- tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/0 deleted file mode 100644 index e69de29b..00000000 From e9f62a4506d03d563064f839565cb9b8030936d1 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 13 Feb 2024 19:21:44 +0530 Subject: [PATCH 031/171] Fixes to deepspeed startscript --- .../deepspeed_slurm.sh | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh index 2a484cd5..0987da05 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh @@ -57,19 +57,10 @@ fi export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training -TRAINING_CMD="train.py -s deepspeed" - -# This command does not integrate well with torch.distributed -# because, e.g., global rank is not recognized -> all processes print to console. -# It raises the error: AssertionError: LOCAL_RANK (2) != OMPI_COMM_WORLD_LOCAL_RANK (0), not sure how to proceed as we're seeing conflicting local rank info. -srun --cpu-bind=none bash -c "deepspeed $TRAINING_CMD" +export MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_PORT=29500 +TRAINING_CMD="train.py -s deepspeed" -# srun python -m deepspeed.launcher.launch \ -# --node_rank $SLURM_PROCID \ -# --master_addr ${SLURMD_NODENAME}i \ -# --master_port 29500 \ -# --world_info $WID \ -# $TRAINING_CMD --deepspeed_mpi - +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed From 2caa42307a8d924a05d3eaf53ea31d604f37a473 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 20 Feb 2024 18:31:42 +0530 Subject: [PATCH 032/171] Update distributed.py --- src/itwinai/tensorflow/distributed.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index 36d29c7b..83eb4fa9 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -1,7 +1,14 @@ -from ..distributed import DistributedStrategy +import tensorflow as tf +def get_strategy(): + """Strategy for distributed TensorFlow training""" + implementation = tf.distribute.experimental.CommunicationImplementation.NCCL + communication_options = tf.distribute.experimental.CommunicationOptions(implementation=implementation) -class TFDistributedStrategy(DistributedStrategy): - """Abstract class to define the distributed backend methods for - TensorFlow models. - """ + # declare distribution strategy + tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy(communication_options=communication_options) + + # get total number of workers + print("Number of devices: {}".format(tf_dist_strategy.num_replicas_in_sync)) + + return tf_dist_strategy, tf_dist_strategy.num_replicas_in_sync From ac6f0efb5c304efb220999fd06483004aecd763f Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 20 Feb 2024 19:05:29 +0530 Subject: [PATCH 033/171] Update trainer.py --- src/itwinai/tensorflow/trainer.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/itwinai/tensorflow/trainer.py b/src/itwinai/tensorflow/trainer.py index f1a10214..35e544cb 100644 --- a/src/itwinai/tensorflow/trainer.py +++ b/src/itwinai/tensorflow/trainer.py @@ -4,8 +4,8 @@ from jsonargparse import ArgumentParser import tensorflow as tf -from ..components import Trainer, monitor_exec - +from ..components import Trainer +from itwinai.tensorflow.distributed import get_strategy def import_class(name): components = name.split('.') @@ -31,6 +31,8 @@ class TensorflowTrainer(Trainer): def __init__( self, epochs, + train_dataset, + validation_dataset, batch_size, callbacks, model_dict: Dict, @@ -54,6 +56,16 @@ def __init__( # Create distributed TF vars if self.strategy: + tf_dist_strategy, n_devices = get_strategy() + # get total number of workers + print("Number of devices: {}".format(n_devices)) + # distribute datasets among MirroredStrategy's replicas + dist_train_dataset = tf_dist_strategy.experimental_distribute_dataset( + train_dataset + ) + dist_validation_dataset = tf_dist_strategy.experimental_distribute_dataset( + validation_dataset + ) with self.strategy.scope(): # TODO: move loss, optimizer and metrics instantiation under # here @@ -61,6 +73,7 @@ def __init__( # https://www.tensorflow.org/guide/distributed_training#use_tfdistributestrategy_with_keras_modelfit # self.model: tf.keras.Model = parser.instantiate_classes( # model_dict).model + # TODO: add dataloaders and model instances self.model: tf.keras.Model = instance_from_dict(model_dict) compile_conf = self.instantiate_compile_conf(compile_conf) self.model.compile(**compile_conf) From c37136abc91c8fa1a0b8b0ce7f6d9e0f842e61e7 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 22 Feb 2024 15:10:16 +0100 Subject: [PATCH 034/171] UPDATE tutorial --- .../torch-ddp-deepspeed-horovod/deepspeed_slurm.sh | 9 ++------- .../distributed-ml/torch-ddp-deepspeed-horovod/train.py | 3 ++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh index 0987da05..e0326c98 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=1 +#SBATCH --nodes=4 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 @@ -43,12 +43,7 @@ if [ "$debug" = true ] ; then fi echo -# # set comm -# export CUDA_VISIBLE_DEVICES="0,1,2,3" -# export OMP_NUM_THREADS=1 -# if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then -# export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -# fi +# set env vars export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py index 0a08a5eb..284b61e6 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py +++ b/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py @@ -61,7 +61,7 @@ def parse_args() -> argparse.Namespace: help='local rank passed from distributed launcher') parser = deepspeed.add_config_arguments(parser) args = parser.parse_args() - os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed + # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed return args @@ -135,6 +135,7 @@ def trainer_entrypoint_fn( if strategy.is_main_worker(): print(f"Loss [epoch={epoch}]: {loss.item()}") + print(f"NNLoss [epoch={epoch}]: {loss.item()}") # Update scheduler if lr_sched: From 02e93f08c8976d70e1f26a43dcbc744a924a1012 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 27 Feb 2024 17:13:06 +0100 Subject: [PATCH 035/171] ADD draft MNIST tutorial --- src/itwinai/torch/distributed.py | 267 +---------------- src/itwinai/torch/engine.py | 276 ++++++++++++++++++ .../README.md | 0 .../ddp_slurm.sh | 0 .../deepspeed_slurm.sh | 0 .../hvd_slurm.sh | 0 .../train.py | 1 - .../distributed-ml/tutorial-1-mnist/README.md | 32 ++ .../tutorial-1-mnist/ddp_slurm.sh | 65 +++++ .../tutorial-1-mnist/deepspeed_slurm.sh | 61 ++++ .../tutorial-1-mnist/hvd_slurm.sh | 55 ++++ .../distributed-ml/tutorial-1-mnist/train.py | 144 +++++++++ 12 files changed, 634 insertions(+), 267 deletions(-) create mode 100644 src/itwinai/torch/engine.py rename tutorials/distributed-ml/{torch-ddp-deepspeed-horovod => tutorial-0-basics}/README.md (100%) rename tutorials/distributed-ml/{torch-ddp-deepspeed-horovod => tutorial-0-basics}/ddp_slurm.sh (100%) rename tutorials/distributed-ml/{torch-ddp-deepspeed-horovod => tutorial-0-basics}/deepspeed_slurm.sh (100%) rename tutorials/distributed-ml/{torch-ddp-deepspeed-horovod => tutorial-0-basics}/hvd_slurm.sh (100%) rename tutorials/distributed-ml/{torch-ddp-deepspeed-horovod => tutorial-0-basics}/train.py (99%) create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/README.md create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/train.py diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index ca482909..72447dd2 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1,284 +1,20 @@ import abc -from typing import Any, Union, List, Dict, Optional, Callable, Tuple +from typing import Any, Union, List, Dict, Optional, Tuple from pathlib import Path import json -from pydantic import BaseModel - import deepspeed import torch import torch.distributed as dist import horovod.torch as hvd import torch.nn as nn -# from torch.nn.modules import Module import torch.optim as optim from torch.optim.lr_scheduler import _LRScheduler as LRScheduler from torch.optim.optimizer import Optimizer -from torch.cuda import amp -from torch import autocast from ..distributed import DistributedStrategy -class OptimizerConfig: - def __init__(self, optim_class, **kwargs) -> None: - self.optim_class = optim_class - self.kwargs = kwargs - - def to_optim(self, parameters) -> optim.Optimizer: - return self.optim_class(parameters, **self.kwargs) - - -class LRSchedulerConfig: - def __init__(self, scheduler_class, **kwargs) -> None: - self.scheduler_class = scheduler_class - self.kwargs = kwargs - - def to_scheduler(self, optim) -> LRScheduler: - return self.scheduler_class(optim, **self.kwargs) - - -class ModelEngineConfig(BaseModel): - mixed_precision: bool = False - - -class ModelEngine(abc.ABC): - """Wrapper around distributed model""" - - model: nn.Module - _model_parameters: Any - optimizer: optim.Optimizer - lr_scheduler: LRScheduler - # config: ModelEngineConfig - mixed_precision: bool = False - grad_scaler: amp.GradScaler = None - - def __init__( - self, - model: nn.Module, - # model_parameters: Any, - optimizer: Union[optim.Optimizer, OptimizerConfig], - lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, - mixed_precision: bool = False - # config: Optional[ModelEngineConfig] = None - ) -> None: - super().__init__() - self.model = model - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - # self._model_parameters = model_parameters - # if isinstance(optimizer, OptimizerConfig): - # self.optimizer = optimizer.to_optim(model_parameters) - # else: - # self.optimizer = optimizer - - # if isinstance(lr_scheduler, LRSchedulerConfig): - # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) - # else: - # self.lr_scheduler = lr_scheduler - - # if not config: - # self.config = ModelEngineConfig() - self.mixed_precision = mixed_precision - if mixed_precision: - self.grad_scaler = amp.GradScaler() - - def __call__(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - # Wrapper of self.forward() - return self.forward(*args, **kwds) - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - return self.model(*args, **kwds) - - def train(self, mode: bool = True) -> nn.Module: - """Set model in training mode.""" - self.model.train(mode=mode) - return self.model - - def eval(self) -> nn.Module: - """Set model in inference mode.""" - self.model.eval() - return self.model - - def to(self, device) -> nn.Module: - """Move model to specified device.""" - self.model.to(device) - return self.model - - @abc.abstractmethod - def zero_grad(): - """Set gradients to zero for the optimizer.""" - - @abc.abstractmethod - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - - @abc.abstractmethod - def optimizer_step(self): - """Perform optimizer step.""" - - @abc.abstractmethod - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - # This should be incorporated in the optim step: - # https://deepspeed.readthedocs.io/en/latest/schedulers.html - # scheduler is updated automatically at each training step - - @abc.abstractmethod - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - - -class DDPModelEngine(ModelEngine): - """Model engine for torch DDP distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) - - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() - - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() - - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError - - -class DSModelEngine(ModelEngine): - """Model engine for DeeSpeed distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) - - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() - - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() - - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError - - class TorchDistributedStrategy(DistributedStrategy): """Abstract class to define the distributed backend methods for PyTorch models. @@ -376,7 +112,6 @@ class DDPDistributedStrategy(TorchDistributedStrategy): """ backend: str - model: DDPModelEngine def __init__(self, backend: str) -> None: super().__init__() diff --git a/src/itwinai/torch/engine.py b/src/itwinai/torch/engine.py new file mode 100644 index 00000000..7084d6ec --- /dev/null +++ b/src/itwinai/torch/engine.py @@ -0,0 +1,276 @@ +""" +Model engine which wraps a torch NN. Still under development. May be removed... +""" + +import abc +from typing import Any, Union, Optional, Callable + +from pydantic import BaseModel + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim.lr_scheduler import _LRScheduler as LRScheduler +from torch.cuda import amp +from torch import autocast + + +class OptimizerConfig: + def __init__(self, optim_class, **kwargs) -> None: + self.optim_class = optim_class + self.kwargs = kwargs + + def to_optim(self, parameters) -> optim.Optimizer: + return self.optim_class(parameters, **self.kwargs) + + +class LRSchedulerConfig: + def __init__(self, scheduler_class, **kwargs) -> None: + self.scheduler_class = scheduler_class + self.kwargs = kwargs + + def to_scheduler(self, optim) -> LRScheduler: + return self.scheduler_class(optim, **self.kwargs) + + +class ModelEngineConfig(BaseModel): + mixed_precision: bool = False + + +class ModelEngine(abc.ABC): + """Wrapper around ML model, which abstracts from distributed and + mixed-precision models. + """ + + model: nn.Module + _model_parameters: Any + optimizer: optim.Optimizer + lr_scheduler: LRScheduler + # config: ModelEngineConfig + mixed_precision: bool = False + grad_scaler: amp.GradScaler = None + + def __init__( + self, + model: nn.Module, + # model_parameters: Any, + optimizer: Union[optim.Optimizer, OptimizerConfig], + lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, + mixed_precision: bool = False + # config: Optional[ModelEngineConfig] = None + ) -> None: + super().__init__() + self.model = model + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + # self._model_parameters = model_parameters + # if isinstance(optimizer, OptimizerConfig): + # self.optimizer = optimizer.to_optim(model_parameters) + # else: + # self.optimizer = optimizer + + # if isinstance(lr_scheduler, LRSchedulerConfig): + # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) + # else: + # self.lr_scheduler = lr_scheduler + + # if not config: + # self.config = ModelEngineConfig() + self.mixed_precision = mixed_precision + if mixed_precision: + self.grad_scaler = amp.GradScaler() + + def __call__(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + # Wrapper of self.forward() + return self.forward(*args, **kwds) + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + return self.model(*args, **kwds) + + def train(self, mode: bool = True) -> nn.Module: + """Set model in training mode.""" + self.model.train(mode=mode) + return self.model + + def eval(self) -> nn.Module: + """Set model in inference mode.""" + self.model.eval() + return self.model + + def to(self, device) -> nn.Module: + """Move model to specified device.""" + self.model.to(device) + return self.model + + @abc.abstractmethod + def zero_grad(): + """Set gradients to zero for the optimizer.""" + + @abc.abstractmethod + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. + + Args: + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. + + Returns: + torch.Tensor: computed loss. + """ + + @abc.abstractmethod + def optimizer_step(self): + """Perform optimizer step.""" + + @abc.abstractmethod + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + # This should be incorporated in the optim step: + # https://deepspeed.readthedocs.io/en/latest/schedulers.html + # scheduler is updated automatically at each training step + + @abc.abstractmethod + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + + +class DDPModelEngine(ModelEngine): + """Model engine for torch DDP distributed strategy.""" + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) + + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. + + Args: + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. + + Returns: + torch.Tensor: computed loss. + """ + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) + + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) + else: + loss = loss_fn(*loss_args) + loss.backward() + return loss + + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) + + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() + + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError + + +class DSModelEngine(ModelEngine): + """Model engine for DeeSpeed distributed strategy.""" + + def forward(self, *args: Any, **kwds: Any) -> Any: + """Performs the forward operation.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + return self.model(*args, **kwds) + else: + return self.model(*args, **kwds) + + def zero_grad(self): + """Set gradients to zero for the optimizer.""" + self.optimizer.zero_grad() + + def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: + """Perform backward pass and return the loss. + + Args: + loss_fn (Callable): computes the loss. + *loss_args: are the arguments to be passed to ``loss_fn``. + + Returns: + torch.Tensor: computed loss. + """ + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html + # Runs the forward pass with autocasting. + with autocast(device_type='cuda', dtype=torch.float16): + loss = loss_fn(*loss_args) + + # Scales loss. Calls backward() on scaled loss to create scaled + # gradients. + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for + # corresponding forward ops. + loss = self.grad_scaler.scale(loss) + else: + loss = loss_fn(*loss_args) + loss.backward() + return loss + + def optimizer_step(self): + """Perform optimizer step.""" + if self.mixed_precision: + # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training + # scaler.step() first unscales the gradients of the optimizer's + # assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() + # is then called, + # otherwise, optimizer.step() is skipped. + self.grad_scaler.step(self.optimizer) + + # Updates the scale for next iteration. + self.grad_scaler.update() + else: + self.optimizer.step() + + def lr_scheduler_step(self): + """Perform lr scheduler step, if present.""" + if self.lr_scheduler: + self.lr_scheduler.step() + + def save_checkpoint(self): + """Save checkpoint to persistent storage.""" + raise NotImplementedError diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md b/tutorials/distributed-ml/tutorial-0-basics/README.md similarity index 100% rename from tutorials/distributed-ml/torch-ddp-deepspeed-horovod/README.md rename to tutorials/distributed-ml/tutorial-0-basics/README.md diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh similarity index 100% rename from tutorials/distributed-ml/torch-ddp-deepspeed-horovod/ddp_slurm.sh rename to tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh similarity index 100% rename from tutorials/distributed-ml/torch-ddp-deepspeed-horovod/deepspeed_slurm.sh rename to tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh similarity index 100% rename from tutorials/distributed-ml/torch-ddp-deepspeed-horovod/hvd_slurm.sh rename to tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh diff --git a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py similarity index 99% rename from tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py rename to tutorials/distributed-ml/tutorial-0-basics/train.py index 284b61e6..4fb71cea 100644 --- a/tutorials/distributed-ml/torch-ddp-deepspeed-horovod/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -39,7 +39,6 @@ DDPDistributedStrategy, HVDDistributedStrategy, DSDistributedStrategy, - # ModelEngine ) diff --git a/tutorials/distributed-ml/tutorial-1-mnist/README.md b/tutorials/distributed-ml/tutorial-1-mnist/README.md new file mode 100644 index 00000000..b3f121d4 --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/README.md @@ -0,0 +1,32 @@ +# Tutorial: distributed strategies for PyTorch + +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. + +First, from the root of this repo, build the environment containing +pytorch, horovod and deepspeed. You can *try* with: + +```bash +# Creates a Python venv called envAI_hdfml +make torch-gpu-jsc +``` + +Each distributed strategy has its own SLURM job script, which +should be used to run it: + +If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: + +```bash +sbatch ddp_slurm.sh +``` + +If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: + +```bash +sbatch deepspeed_slurm.sh +``` + +If you want to distribute the code in `train.py` with **Horovod**, run from terminal: + +```bash +sbatch hvd_slurm.sh +``` diff --git a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh new file mode 100644 index 00000000..8cf0280b --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DDP_tutorial +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="train.py -s ddp" + +srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" + diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh new file mode 100644 index 00000000..e0326c98 --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DeepSpeed_tutorial +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set env vars +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +export MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_PORT=29500 + +TRAINING_CMD="train.py -s deepspeed" + +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed + diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh new file mode 100644 index 00000000..32e8112f --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_HVD_tutorial +#SBATCH --account=intertwin +#SBATCH --partition=batch +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set vars +# export NCCL_DEBUG=INFO +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +TRAINING_CMD="train.py -s horovod" + +srun --cpu-bind=none python -u $TRAINING_CMD + diff --git a/tutorials/distributed-ml/tutorial-1-mnist/train.py b/tutorials/distributed-ml/tutorial-1-mnist/train.py new file mode 100644 index 00000000..e66007f8 --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/train.py @@ -0,0 +1,144 @@ +""" +TODO: add description +""" +from typing import Any +import os +import argparse + +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset, DistributedSampler + +from itwinai.torch.distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + HVDDistributedStrategy, + DSDistributedStrategy, +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--strategy", "-s", type=str, + choices=['ddp', 'horovod', 'deepspeed'], + default='ddp' + ) + parser.add_argument( + "--shuffle_dataloader", + action=argparse.BooleanOptionalAction + ) + + # DeepSpeed: needs to be removed + import deepspeed + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed + + return args + + +class UniformRndDataset(Dataset): + """Dummy torch dataset.""" + + def __init__(self, x_size: int, y_size: int, len: int = 100): + super().__init__() + self.x_size = x_size + self.y_size = y_size + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return torch.rand(self.x_size), torch.rand(self.y_size) + + +def trainer_entrypoint_fn( + foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy +) -> int: + """Dummy training function. This emulates custom code developed + by some use case. + """ + strategy.init() + print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " + f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") + + # Local model + model = nn.Linear(3, 4) + optim = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = nn.MSELoss() + # Distributed model + # model_engine: ModelEngine = strategy.distributed(model, optim) + model, optim, lr_sched = strategy.distributed( + model, optim, lr_scheduler=None + ) + + # Data + train_set = UniformRndDataset(x_size=3, y_size=4) + # Distributed dataloader + train_loader = DataLoader( + train_set, batch_size=10, num_workers=1, + sampler=DistributedSampler( + train_set, + num_replicas=strategy.dist_gwsize(), + rank=strategy.dist_grank(), + shuffle=args.shuffle_dataloader + ) + ) + + # Device allocated for this worker + device = strategy.dist_device() + + for epoch in range(2): + for (x, y) in train_loader: + # print(f"tensor to cuda:{device}") + x = x.to(device) + y = y.to(device) + + optim.zero_grad() + + y_pred = model(x) + + loss = loss_fn(y_pred, y) + loss.backward() + + optim.step() + + if strategy.is_main_worker(): + print(f"Loss [epoch={epoch}]: {loss.item()}") + print(f"NNLoss [epoch={epoch}]: {loss.item()}") + + # Update scheduler + if lr_sched: + lr_sched.step() + + strategy.clean_up() + return 123 + + +if __name__ == "__main__": + + args = parse_args() + + # Instantiate Strategy + if args.strategy == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPDistributedStrategy(backend='nccl') + elif args.strategy == 'horovod': + strategy = HVDDistributedStrategy() + elif args.strategy == 'deepspeed': + strategy = DSDistributedStrategy( + backend='nccl', config=dict(train_batch_size=4) + ) + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + # Launch distributed training + trainer_entrypoint_fn("foobar", args, strategy) From c90f5e2402e635df06235904669b529e1858976c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 12 Mar 2024 17:01:53 +0100 Subject: [PATCH 036/171] UPDATE DDP tutorial for MNIST --- experimental/trainer/general_trainer.py | 2 +- src/itwinai/parser.py | 420 +-------------- .../tutorial-1-mnist/checkpoint.pth.tar | Bin 0 -> 180274 bytes .../tutorial-1-mnist/config.yaml | 26 + .../tutorial-1-mnist/ddp_slurm.sh | 8 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 8 +- .../tutorial-1-mnist/hvd_slurm.sh | 8 +- .../distributed-ml/tutorial-1-mnist/runall.sh | 4 + .../distributed-ml/tutorial-1-mnist/train.py | 505 +++++++++++++++--- 9 files changed, 471 insertions(+), 510 deletions(-) create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/config.yaml create mode 100644 tutorials/distributed-ml/tutorial-1-mnist/runall.sh diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py index e22c871d..33c21ced 100755 --- a/experimental/trainer/general_trainer.py +++ b/experimental/trainer/general_trainer.py @@ -230,7 +230,7 @@ def main(): # initializes the distributed backend which will take care of sychronizing nodes/GPUs my_trainer.init_backend(backend=args.backend) -# deterministic testrun + # deterministic testrun if args.testrun: torch.manual_seed(args.nseed) g = torch.Generator() diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index 8e393652..7c0e7b5a 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -5,190 +5,10 @@ import logging import os -from typing import Dict, Any, List, Type, Union, Optional +from typing import List, Type, Union, Optional from jsonargparse import ArgumentParser as JAPArgumentParser from jsonargparse import ActionConfigFile -import json from jsonargparse._formatters import DefaultHelpFormatter -from omegaconf import OmegaConf -from pathlib import Path - -from .components import BaseComponent -from .pipeline import Pipeline -from .utils import load_yaml - - -def add_replace_field( - config: Dict, - key_chain: str, - value: Any -) -> None: - """Replace or add (if not present) a field in a dictionary, following a - path of dot-separated keys. Adding is not supported for list items. - Inplace operation. - Args: - config (Dict): dictionary to be modified. - key_chain (str): path of nested (dot-separated) keys to specify the - location - of the new value (e.g., 'foo.bar.line' adds/overwrites the value - located at config['foo']['bar']['line']). - value (Any): the value to insert. - """ - sub_config = config - for idx, k in enumerate(key_chain.split('.')): - if idx >= len(key_chain.split('.')) - 1: - # Last key reached - break - - if isinstance(sub_config, (list, tuple)): - k = int(k) - next_elem = sub_config[k] - else: - next_elem = sub_config.get(k) - - if not isinstance(next_elem, (dict, list, tuple)): - sub_config[k] = dict() - - sub_config = sub_config[k] - if isinstance(sub_config, (list, tuple)): - k = int(k) - sub_config[k] = value - - -class ConfigParser: - """ - Parses a pipeline from a configuration file. - It also provides functionalities for dynamic override - of fields by means of nested key notation. - - Args: - config (Union[str, Dict]): path to YAML configuration file - or dict storing a configuration. - override_keys (Optional[Dict[str, Any]], optional): dict mapping - nested keys to the value to override. Defaults to None. - - Example: - - >>> # pipeline.yaml file - >>> pipeline: - >>> class_path: itwinai.pipeline.Pipeline - >>> init_args: - >>> steps: - >>> - class_path: dataloader.MNISTDataModuleTorch - >>> init_args: - >>> save_path: .tmp/ - >>> - >>> - class_path: itwinai.torch.trainer.TorchTrainerMG - >>> init_args: - >>> model: - >>> class_path: model.Net - >>> loss: - >>> class_path: torch.nn.NLLLoss - >>> init_args: - >>> reduction: mean - - >>> from itwinai.parser import ConfigParser - >>> - >>> parser = ConfigParser( - >>> config='pipeline.yaml', - >>> override_keys={ - >>> 'pipeline.init_args.steps.0.init_args.save_path': /save/path - >>> } - >>> ) - >>> pipeline = parser.parse_pipeline() - >>> print(pipeline) - >>> print(pipeline.steps) - >>> - >>> dataloader = parser.parse_step(0) - >>> print(dataloader) - >>> print(dataloader.save_path) - """ - - config: Dict - pipeline: Pipeline - - def __init__( - self, - config: Union[str, Dict], - override_keys: Optional[Dict[str, Any]] = None - ) -> None: - self.config = config - self.override_keys = override_keys - if isinstance(self.config, (str, Path)): - self.config = load_yaml(self.config) - self._dynamic_override_keys() - self._omegaconf_interpolate() - - def _dynamic_override_keys(self): - if self.override_keys is not None: - for key_chain, value in self.override_keys.items(): - add_replace_field(self.config, key_chain, value) - - def _omegaconf_interpolate(self) -> None: - """Performs variable interpolation with OmegaConf on internal - configuration file. - """ - conf = OmegaConf.create(self.config) - self.config = OmegaConf.to_container(conf, resolve=True) - - def parse_pipeline( - self, - pipeline_nested_key: str = "pipeline", - verbose: bool = False - ) -> Pipeline: - """Merges steps into pipeline and parses it. - - Args: - pipeline_nested_key (str, optional): nested key in the - configuration file identifying the pipeline object. - Defaults to "pipeline". - verbose (bool): if True, prints the assembled pipeline - to console formatted as JSON. - - Returns: - Pipeline: instantiated pipeline. - """ - pipe_parser = JAPArgumentParser() - pipe_parser.add_subclass_arguments(Pipeline, "pipeline") - - pipe_dict = self.config - for key in pipeline_nested_key.split('.'): - pipe_dict = pipe_dict[key] - # pipe_dict = self.config[pipeline_nested_key] - pipe_dict = {"pipeline": pipe_dict} - - if verbose: - print("Assembled pipeline:") - print(json.dumps(pipe_dict, indent=4)) - - # Parse pipeline dict once merged with steps - conf = pipe_parser.parse_object(pipe_dict) - pipe = pipe_parser.instantiate_classes(conf) - self.pipeline = pipe["pipeline"] - return self.pipeline - - def parse_step( - self, - step_idx: Union[str, int], - pipeline_nested_key: str = "pipeline", - verbose: bool = False - ) -> BaseComponent: - pipeline_dict = self.config - for key in pipeline_nested_key.split('.'): - pipeline_dict = pipeline_dict[key] - - step_dict_config = pipeline_dict['init_args']['steps'][step_idx] - - if verbose: - print(f"STEP '{step_idx}' CONFIG:") - print(json.dumps(step_dict_config, indent=4)) - - # Wrap config under "step" field and parse it - step_dict_config = {'step': step_dict_config} - step_parser = JAPArgumentParser() - step_parser.add_subclass_arguments(BaseComponent, "step") - parsed_namespace = step_parser.parse_object(step_dict_config) - return step_parser.instantiate_classes(parsed_namespace)["step"] class ArgumentParser(JAPArgumentParser): @@ -245,241 +65,3 @@ def __init__( "-c", "--config", action=ActionConfigFile, help="Path to a configuration file in json or yaml format." ) - - -# class ConfigParser2: -# """ -# Deprecated: this pipeline structure does not allow for -# nested pipelines. However, it is more readable and the linking -# from name to step data could be achieved with OmegaConf. This -# could be reused in the future: left as example. - -# Parses a configuration file, merging the steps into -# the pipeline and returning a pipeline object. -# It also provides functionalities for dynamic override -# of fields by means of nested key notation. - -# Example: - -# >>> # pipeline.yaml -# >>> pipeline: -# >>> class_path: itwinai.pipeline.Pipeline -# >>> steps: [server, client] -# >>> -# >>> server: -# >>> class_path: mycode.ServerOptions -# >>> init_args: -# >>> host: localhost -# >>> port: 80 -# >>> -# >>> client: -# >>> class_path: mycode.ClientOptions -# >>> init_args: -# >>> url: http://${server.init_args.host}:${server.init_args.port}/ - -# >>> from itwinai.parser import ConfigParser2 -# >>> -# >>> parser = ConfigParser2( -# >>> config='pipeline.yaml', -# >>> override_keys={ -# >>> 'server.init_args.port': 777 -# >>> } -# >>> ) -# >>> pipeline = parser.parse_pipeline() -# >>> print(pipeline) -# >>> print(pipeline.steps) -# >>> print(pipeline.steps['server'].port) -# >>> -# >>> server = parser.parse_step('server') -# >>> print(server) -# >>> print(server.port) -# """ - -# config: Dict -# pipeline: Pipeline - -# def __init__( -# self, -# config: Union[str, Dict], -# override_keys: Optional[Dict[str, Any]] = None -# ) -> None: -# self.config = config -# self.override_keys = override_keys -# if isinstance(self.config, str): -# self.config = load_yaml(self.config) -# self._dynamic_override_keys() -# self._omegaconf_interpolate() - -# def _dynamic_override_keys(self): -# if self.override_keys is not None: -# for key_chain, value in self.override_keys.items(): -# add_replace_field(self.config, key_chain, value) - -# def _omegaconf_interpolate(self) -> None: -# """Performs variable interpolation with OmegaConf on internal -# configuration file. -# """ -# conf = OmegaConf.create(self.config) -# self.config = OmegaConf.to_container(conf, resolve=True) - -# def parse_pipeline( -# self, -# pipeline_nested_key: str = "pipeline", -# verbose: bool = False -# ) -> Pipeline: -# """Merges steps into pipeline and parses it. - -# Args: -# pipeline_nested_key (str, optional): nested key in the -# configuration file identifying the pipeline object. -# Defaults to "pipeline". -# verbose (bool): if True, prints the assembled pipeline -# to console formatted as JSON. - -# Returns: -# Pipeline: instantiated pipeline. -# """ -# pipe_parser = JAPArgumentParser() -# pipe_parser.add_subclass_arguments(Pipeline, pipeline_nested_key) -# pipe_dict = self.config[pipeline_nested_key] - -# # Pop steps list from pipeline dictionary -# steps_list = pipe_dict['steps'] -# del pipe_dict['steps'] - -# # Link steps with respective dictionaries -# if not pipe_dict.get('init_args'): -# pipe_dict['init_args'] = {} -# steps_dict = pipe_dict['init_args']['steps'] = {} -# for step_name in steps_list: -# steps_dict[step_name] = self.config[step_name] -# pipe_dict = {pipeline_nested_key: pipe_dict} - -# if verbose: -# print("Assembled pipeline:") -# print(json.dumps(pipe_dict, indent=4)) - -# # Parse pipeline dict once merged with steps -# conf = pipe_parser.parse_object(pipe_dict) -# pipe = pipe_parser.instantiate_classes(conf) -# self.pipeline = pipe[pipeline_nested_key] -# return self.pipeline - -# def parse_step( -# self, -# step_name: str, -# verbose: bool = False -# ) -> BaseComponent: -# step_dict_config = self.config[step_name] - -# if verbose: -# print(f"STEP '{step_name}' CONFIG:") -# print(json.dumps(step_dict_config, indent=4)) - -# # Wrap config under "step" field and parse it -# step_dict_config = {'step': step_dict_config} -# step_parser = JAPArgumentParser() -# step_parser.add_subclass_arguments(BaseComponent, "step") -# parsed_namespace = step_parser.parse_object(step_dict_config) -# return step_parser.instantiate_classes(parsed_namespace)["step"] - - -# class ItwinaiCLI2: -# """ -# Deprecated: the dynamic override does not work with nested parameters -# and may be confusing. - -# CLI tool for executing a configuration file, with dynamic -# override of fields and variable interpolation with Omegaconf. - -# Example: - -# >>> # train.py -# >>> from itwinai.parser import ItwinaiCLI -# >>> cli = ItwinaiCLI() -# >>> cli.pipeline.execute() - -# >>> # pipeline.yaml -# >>> pipeline: -# >>> class_path: itwinai.pipeline.Pipeline -# >>> steps: [server, client] -# >>> -# >>> server: -# >>> class_path: mycode.ServerOptions -# >>> init_args: -# >>> host: localhost -# >>> port: 80 -# >>> -# >>> client: -# >>> class_path: mycode.ClientOptions -# >>> init_args: -# >>> url: http://${server.init_args.host}:${server.init_args.port}/ - -# From command line: - -# >>> python train.py --config itwinai-conf.yaml --help -# >>> python train.py --config itwinai-conf.yaml -# >>> python train.py --config itwinai-conf.yaml --server.port 8080 -# """ -# _parser: JAPArgumentParser -# _config: Dict -# pipeline: Pipeline - -# def __init__( -# self, -# pipeline_nested_key: str = "pipeline", -# parser_mode: str = "omegaconf" -# ) -> None: -# self.pipeline_nested_key = pipeline_nested_key -# self.parser_mode = parser_mode -# self._init_parser() -# self._parser.add_argument(f"--{self.pipeline_nested_key}", type=dict) -# self._add_steps_arguments() -# self._config = self._parser.parse_args() - -# # Merge steps into pipeline and parse it -# del self._config['config'] -# pipe_parser = ConfigParser2(config=self._config.as_dict()) -# self.pipeline = pipe_parser.parse_pipeline( -# pipeline_nested_key=self.pipeline_nested_key -# ) - -# def _init_parser(self): -# self._parser = JAPArgumentParser(parser_mode=self.parser_mode) -# self._parser.add_argument( -# "-c", "--config", action=ActionConfigFile, -# required=True, -# help="Path to a configuration file in json or yaml format." -# ) - -# def _add_steps_arguments(self): -# """Pre-parses the configuration file, dynamically adding all the -# component classes under 'steps' as arguments of the parser. -# """ -# if "--config" not in sys.argv: -# raise ValueError( -# "--config parameter has to be specified with a " -# "valid path to a configuration file." -# ) -# config_path = sys.argv.index("--config") + 1 -# config_path = sys.argv[config_path] -# config = load_yaml(config_path) - -# # Add steps to parser -# steps = filter( -# lambda itm: itm[0] != self.pipeline_nested_key, -# config.items() -# ) -# steps = { -# step_name: step_data['class_path'] -# for step_name, step_data in steps -# } - -# for st_nested_key, step_class_str in steps.items(): -# step_class = dynamically_import_class(step_class_str) -# self._add_step_arguments( -# step_class=step_class, nested_key=st_nested_key) - -# def _add_step_arguments(self, step_class, nested_key): -# self._parser.add_subclass_arguments( -# baseclass=step_class, nested_key=nested_key) diff --git a/tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar b/tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..264bb34c2b236ae1229f45f4a0ef49429c9269df GIT binary patch literal 180274 zcmaHy30zIz*Z-R*l}3`%Ac+Q*QunNVZ%K%fgoH||2u-(vP&8;lA*qm3l2j5>-LrOA z2q9CFB4nP)9K!$N_dNga@Ao|a|Lt|Ub%ys^XYc)4>z=*#^>CGx5)+e?6Z;>pp<;u? z0z-lWS42iE3y-mgj0qVR6c7_&5xF9Cij%C^g1@gsi3GlwiMO=q^5Dpbz!1K;>mY9h z(SOk~0Wragf|dow@FfBRBSJ%i17ns&ghwmPj0y^l3J!7>9Z}#*j`8G6nRu&Za z0$;| Q`{y@;rQrNMmJzn3UX35^JdnJv0pv{QjE=PfQe$C5AaEhBmlj13B4Ecptq z-(vWKX1R))xGK1cyGpxC$M6+RT!)E%q+P{h_)3`}zx`8=;ScuYt9Yyar<0491w`{z zLqr~g$cFs2V8vJarv-Iag&4ku$bzdvDqqu8Je5D>FLP)NU(1s}>_2bZ`oC^m``@V6 z{Newk>deSckKyb7aa=>>`X9$fn7DrXFUR%%8qts88+h^!|I=}6i$9K!{5KQh`9}XV zG0IgVhHw1aag9{|=--Z;{AG@b;g9v?oBorzB=Eoc%k1BnHhlAcGA(8#@?!YoenYKi zIEtt7Ek(awMekOB(bh5i@t%Ae@4^4KoqwbMIBWZFU|ar#e}e5?MVH$D;imFAznx|N z5+R09Jo(&zclN*AnD=kY34DitG971p6pet1zo81Q)@ghvSL@%dPWlUWj^R)CY2q!~E5@Ilz@PJHysr$7iWcdKKi5?}`j3Ub`r&&e@aKj6*{S${+wqU6_}&Tp z`Ty%7YyN@+{=&b9{$rF|FA9o^h~)bu@O}Rt|E=wRNmbN4e7^+#qW|T9HQzshAMocu zZ1iI9-{EC(aCFR~fWSch;;D{#j%JFriDG~5G$Jx)S=h2Q!BPCc1b)!pQ1T~u@PiZh zOH5qF68KC1)zD$V;W4pcix$T&St2?;CY|BcAuNBxr+{k!X8{?&D{zq@YbU&g8!ew-(Nb+oG}6t4LPLqc@Z zR+jv=|AfT;56#^?#X(G5jP?esZ*{WCB0sKbcni4gX}O{?=;RZ^uMh zP5{q= zAC8Xo{wU(0s)NVk8SU>54*VI*VG)0a zBEN2`L$Bxz(d*Ax4hjg13=UrwzLbC5RqPM-Zz5Y16dV|^nqM#4`CAI%A}It%MXcmE zWd6kk#Ql)~zcKTVwwFXi1qX<7sleS^S#)~PvftxfH1?MTEDjCkvzdSH?vKCGeDvj0 zkw`@OOTtysRZ5f$WL#x~V*_GiEj(OhiqrW2kC}g( z{I|cm8*I1@dntGMXfff$*yE26W!w15eu6W_(4HI}7YS)ApB<>1ruGa}|cij>y&36-?x+pJH-;l)h z=~5R;%`+25k^A)WXI)`#)9I$l<4>AC<>(9Rx3qA3lotx6i-@p0>?+qrhw{#bJ>|ZT zcIBCT*vDgKm-E(NP~^SP(-rC`aCkSxBzbCHJx$r4#|hU3@1naWh47Y5AH)mny2e#d z;qpGda^aqTY}Zue{kbV&Hn%Cm-?7Q)MLh3mHmB)0y-t>m&udx~%{1-lUCt}G=_S0L zu!=i(d~(yBm&bSyZk*zsb?fDJzeuOk3iEk22FJL0w^wn;e!s-?U}o`VP7UYoHjEdR znrRDP2E5^Y`XcRc_fc7sLic!K)tp4(={40n`Se8I=kfcwxf(4^+&4G4`xOpwU$>v) zRn$G=7Ryy}JygVakzuGNwAbZ?Sa)0!^|!mv%7gsDpVczN1wyg?BW8f9(&yz{~VFsFKqZutnB?xn_fsU(JphxU3_KA3E8fbZ8-YH+~unkLiU8SvSGYSBErI ziId~+uQMG9)o`WxC>-__fThX>sP^8;02oFZ)gxd7&wJcGsn zc_Nqh2y)amfY`=vxU~E%Tw1mXMoCt(YYKKj%Xe9ln4(KMpXrf-A5*|haueKKVGR|h zE`hq^7!o?&igXKaF-0@?vmNYR&XX;Vp>CKODdp~h&0f*W&5CPGN7N-)(WObu*wOat z57HTavlH8HJQ9D7%Ef!5;uv@PMJ#8UCTf1DU|)GVvbS1~aZITgn$*_{21-{j$BW!x z8=5f*Pt@4{qt={4PZ^L98w)OlI-G;|-g2~Nh1kDXI0CZFCqbaD5*gUD0WQjfLdMHy z?3Hab3_L1>fje0cqG1h=S?Z)%GMTVVm*MlF;iOQVclI@q;%Ltrw;bLVwvsA8tt>|j9i*e0hdZ&8Aru!-&wp`CH@yUgH zUk}2Ze)*EI-Tnf6JcNuy%tXkXBT3A~=aV;AYv9hlxrBA|ASX;F6Aqn0j(krh;`urx z{;?P%cltXt%7#Olz9!@u9>hs$skqk^!E@F~khy0-muM?e;x-t|^L9dU@D+Ho=PRU} zv&4YuAo)jgiSxoKB>B^J((ZYTXzO?qcd2n?{nguWPT!uK-|0(yUig#GooeJ~&;=M< z)dr^zmh~4e0jQV{Qdhp$BvSy(&DdgnatNK0{lq*_e_jx^ZdbkH0 zH3K>SWJFb3jw29lL4;>nU}1CYEb6{NOhlMUa-5S8Qhptswa^?q1{3z_d6 zx9rJ?Uz$+1eG>YGzF``?LW$N`DH4!1oKzL5lRGxyB+2zItouBa*m|gt;*JI6!xDG0 zKK}r5=<*|$tOPOE$p_<(2FRU1p9p0&Fz4d{=Ea}K_H-SP=sUr<_hh0Gk_Ws=q0w{5E|Mgd8V-{KL$(vG1A1iL zu}jP}F=_H`P#Kxmc!sH@+_zL2QOfkWdM2nE0{+_bnWYo6jn6r>r#QP8p%gbrd`%5(_Sq zMYCkMRCtF>^$Q?-9(91CT^8{f?oZ@auOmM_W|Admy~)OqBqDzW$Ot(yTrHQh3_+qg% z#ypt^=Mz^mq06=L$0{3`kyi>qOU^=;L=tFB2!wYxHo~XyS1^C$PO!?hfVQUFOzVf^ zkbe9+Wb2xOeqsShRhK~n*^*o60*bHXdYd9s(cHkNwfyHLkaAtcZwD%OS8VeOM zy!I5-^p_H&lTM_#_c|Jme}TM}i9{m9hjcfjW7+u&=#FmacQc&qI9AA>d_S0aXIgOl zeM@oefE#R3;-iVnGPd^3IoKR4LthO(3~9r5K=8CoXkPQ4b7n>|UFjMN)y5y;bNW6w zTM|QSfncPH7z#2l#4d;|J-my_#kBcy-c%vxU#CNAo!5ZCq{a)Lrg#i=^-KBSy@%ROXz z&3o;4932A}rDhN*0|&DFem+ zRhedF8Pf?N&O=darwgvDTZKkeQ(14rW+q6}n&`Z6C1nSWa-#PpvaepXLyTi9Z2#_s zMK?~fPUD(ES)&(DEU`ib$IaN+YJpq0O;EYCgR_2w6TZ{bVCNpmW>0rIzfX&ola*(nw$GoeJ(ofH$_~K8{xHZ$(j@kUbBS*C70`b&m~%=s6{U|Pk^0Zt zWX^$9p!rph?U2t1N{mR~`%Un`G6q6}BuI_$C2KTjC~3bfMRLujkvSK%Nymg6sK0O) zbV=Hf@={k4nz@-MPpc)96&zs5W<4@_#AfmU_mSw)iR8@o9B4J4Ny2Uv5X(WOB=*rQ za8FqTTVEa_uQT(=h4Vv6fxjOyS>6hv0o%x{A=g1{WTGIdB9$C#QH4c=ogwhh2WHQj z>(Fzw0%p5+Gi|1_Fs=M9eButk6}=Te%n2vtmMhdsmH=@^x%+RmoAF9dkEEdNjClTCTogqz-5^;>uXm> z6mA?NS(OWk?gdwJ=7A}RY!w4TGKaJ!Pa(ScHLx#sH1u8R0Y#rY5PN3UICuAW@L4_^ zZw4Ea@75_G)47AG^bdl=jkOzFRF>Lk+K7)fDSN-*#qr1I?TC3Kgc~$ z4LSbjVCxV9&H_W$V%HAHpSupq?KGKh%W@#nrGZhtmJ143yBR5yPl8*V^{_N56Aw+3 z!V7mDK>yTMT3ymYCuaAeW5Fo00@k6TkK-_zkgvcTEv!nS2G$%T-c~8dZ30yAsA4?qb`U z3#pa+0or+~mO0&~O*|&ESUaJcp1$Eo+n&t9yTyC)h3YnDZ+r#Q7i5II6DQCNcVcYV zY-Xe7L$u!mcfn!SUJC@YNN0x~p+GGu_Mv7gue;2zN1>+cv3%!6yfteNel40um9C}JNQb$cR?lectg)iC zr}gN!n2orr{u~5ZSku&Pp7iWTP1N_R;+T|$Liqz3DyYoDh|wOPVv@w6<@dn9I~vwl zf5bT-mDo~$O_JfDM5KJBV1%_iwJJ)%j8~qlnaT?CSVMy}zRE>Q{ZLN1?Iql3m_R(_ z7*c=30*#N!(o+|e>E2~AftfW3Yv!fm@aD@f_@NjS*_nVl_c#{qm8NS( zmEeYeLv-GaLI`j2M$PvOy?o;w^_wByBousv<1Y?C+Ycw2esUL$NSj1wf6FIBjjYI} zrU+_zZxwB`m<*OLtl0pYQq*{&OXEkapk1XYbX{2{YwfoQ<9ax>D{<9lR#jusbBP*cYcYaR|0ERzocCmBxE~cBd3;#U;V)28UT`XhEgd*JECU z2BQ{J$gHUUfzw;AV;Xl8Jew;HE3%*BcN+tm@YRNvoxF>e6OLh!`bj4K<|50ZL+O}~BXxz%dy)U_R!c$i|*Iu8xEjWi)T$N~u zku;4;*?@gTQp}?^Cy35p2N;$JR%NM-WY!2=Flh_xTW)~$=EW!+JcznHuf^R3T420n z8}8$tMZf6z*t>OiRT*u|(k!v$j=E5ql7a#*cjh}ANt zQ12Sf)Q?L>nZg*piFw5j(RArk#x8L(^awX# zqPAxyyOX_%Q`&N|=e=X|{)i6aia<|r^v*p=7A znfB;Y%;m&!?9Vgj1)W=qS;xq&%!;-SHr~h{Q*82BFSU&vl~!nsW6QB3PFJv^R2L2( zFJ>1xq~WKdV{v4oAJoi$DyZRqL8a(g+^Z(WvCLh@o+_(HPoG#Qm}X99_q_)0b5ndF z{u*;0?g5oM3)re_p#EH%z52F~PH_{w}Z{V~{(UO9gT&wkaygt~WlQ(_QG zS!TeU>Nq_3^C~R9&W9A$H0JKDAnXs$LP_nf>yoer3eeXPCb4%_( zdCL^yY$Hn^EEk}0uL2CVG$2DR#uICY%OG~xl?k3N4H*X*!Y`Oj+>`wA;nyl)v_?W} zsv)_#*MdZ;7BO4WdBkSbU0Ai;i@2TUkt2miKx_0Sw0h}}8h6Ighkb6es=JR>DqVo- z`%S49T}3x6d59mBbD_C5Uohoh5Pgtz9~Ev-VDD{FCo0iA>Kiazl)o|ry#dLPeL0eR z=@h4>mc`iOyAm9aFNQbZLBx_ZX|B(1G<%SUv&X4X=KwWo{?UxCJ#0>k-z=jiEexom zyfFu*PQfqbQK;5^h`qXXIjoo|Ma=4?Q7btd`_p?Fg|p_w`KAe37We}Df6l^hpY&nq z=*4hLelJ_T_#p1CP@#Ppd@MSh&0G$wfY4F1Fj3+~L$`o|iqIX1fx8RnN%*r28Lf{{V%7YW2Pv7?>)shJb%x0EB<6|tLxD% z_spqvkqwR!_<$f?j;L*ur@e(?xbjCSjL!-rOJC}d_eG7UvF{zDH)jy}YU4v%r~LxM zwg>Epx%0uL`XR`7PbF^}@4}hn3(VNb*C9g3l=$BCA_g<2kO(hrI!wxjKJ6P%AD2&{ zKHE+*9i>xPj^9sQ-S17I_yS7yjw52({equ78_GSv;2onZSQ1=88vRUZY0W9TT6tP9 z)wLA1+!hj^O&C3TtPbBEcnx*d(vfM5pVl$9m{*Ne zziLoh{xqA={)4fdX@REowU{$s4<97;aIRVBgGpI2w&_-4*HnGxOOiIMzIGJ?t3?b; zANJOQ;b6VKgLy*FK++;L`ZYb0CYJ1=g3mtmdL<9PU9zR?ylUtHZY;fBT*5d77L%av z!Ssr<1r3p_VkfQlBDSAyl8KLp(vOGtV|;}yvEE)xu8n(09&>D{n+69rdg_w{ritXU z!7hS7?%>8P6*$fG82c@Y!u0uOWEM9GEP2@wGay68`Z^HBMKj6P{X+$%<`SSeD-j09 z9fN?zYY?uLfo^bm?-t2p;xz8Spz{Ol(I3TN=}--BCqKb#Zy(}1uZ~sm5jGyb zH4?>k>0q1e^G2iT+nFsH&)KVKLHP8D0;c9nguRhvoI)lagG}Su1Nm~CHLZ!TrBemp z>bqj3z8dG9dkeEEv=;fAxi?s;;VG!|>oF7Q+yP@XdV0=;;hA+}3&I21DFnl77&rzX8qujAHs8?{C%mAhM zgE%Uc#qh+LBM&v8jSrkNUNq8kqYf*^6rbC?-eF;EElf=`?b2HYTI56 z36H>j%N^(ua!X)(^DHdwK8I~m2LwM^S&&(-h?PbvSS{Ykxw}N3bZ`pU`2&k^nS(!c zHrvCmh>@^V>^9r&nhjcE+aTq;Jh|5|LGY9!?AwwKT5sjy+!$lH;3tU^2{#!R>5Gsu z^&u?GIt}`^>mk%;9M&$dg}Q`cWb1Qdk`dd%hRVs&63ypeDaR%KYh{US3I~l&8K9^8 zQ@FEsKj_*_BvZ5v3W@)*=^GRBW~UvZVG7J2<^ zDVRu(q+Nl1jFNH!6|?vO!|vRHADMkWA^vwdz&S|68kK>GSNg>Bb*99jZdCE@K zdW_p=Z-$bXo$&2m4?f$pma{PM6xzs)CN+(BndF~!xI}dw95(;N8YX>$jGl>15mmr- z$-6+I?ivKv4Ivq?53(KejOaH_YiOKbj_vwqU~TF)&?@m|z0R4E#+{n9^1^McqB z8t38b#WV1*vjw_*uCO;U_c9~rWuv-l1gWY*)h;zlg(SjAlaSpquF z5%_3Q02=MiLfz++;ndN&kkIFWTid@j&X2GnSEm)Q9-g<@id%^w(EA1|J9i1zPCkc9 zjT_ML#VlspOlx$A=0Iy!I$SO}$DUN!hbMTpSm87bIQn~7$@7r zA79Yo>r&Kvt55BF1L>v|9criCfU0MY!Q4ILNWi>tWX9MQ*fAnoG+SQ;N8$#NalM1U ze|8&FF!u}ul&+x`XQJu01@_db-ilTq?`K=bb7;%BA*lRmAr>B;k2_OVQ(xaEj!M>M zAd(14sWmVZ7ZI;33b=2h<9+)^Ayd@1N_iS*?#yPGdFB zdHR;^JE$Spz5W!Y?VwmYc@dtgZpYoDjHu^#8ESpM7{_Fb6VK){NSU{jiQl&w3H#J447toSuGI!D`% z-f}mjM|~cl+Yw3Hbk&-+rT)S(=f9wMUokqp5-0uw6Jp=w0oTeJnNB*6GK(^q(4+cf zM9(;8s)`nKr(!O(cfE?O-xCCu8%`myEX5k@jnqrUkCt!g#h{_9abmd~HPhZfEiZIn zhsrHf)ZdEzMLf#Yf5x_$CUe%U(}Cp3Le|=55{}aboNIoH({#<7xvdH8TinD1S_5XQ zeqzlZR|BMQV2V9PmM1ojT2Ri|7|z6bF_O5v z{Si`~kIcHssqiQ~jO~xx3B33&cF>9mby9)pu8O% zQ{KZZvubMSz)+3-254qKHP@4O5B)z-196w$1#+x@P*t_o)IW>y&z(V07r;KZkK7*Gc zn6AKg)dJ?6WfR+*sfw1KDHzSJzzvE+S#9|srt#nlcG_bPw1`-PLvAGtj5ZwOG_K#z zR_Lxr&udyZ#5RQ~>J4J2AD++loixV>5rG(c4)9E!9(nfgHY}Q_N}lw0L*P*-qW0@H zT#r;G^E)Mp`>R2uZf8In)ZT-jaAXD1yDCNoztAFP;fkd0w5Xn&kW1WR63Ia5 z6mnqnU_!inNKoE3(oo(Hp%(=3#jT#6uo1QSrF1;_B9adATuqbGU(vQvbLoPDCLBBC zF>XBb4yC5?>G2L{ns~!lusk+}yuGoLjM-y}c@8BIk|jmP@*faem%C)s@Ce+{`~l|t zo_km7AwMOnhz;)t`aibAt3F#WzUwe5ju?z?0kt?}g(%-PT*atI4#=LK3f`+e;J30J zxc!(P^6cxN{$mH29PvfF&;fQ>)Hvo{iX>qdpMtwlvbeon$lN$+4}pE-$kZ+sax`WK zUH{3H8V$FlTB5O(oXMkQy0@sS+I`yFvX>e~jixPU^VzsTnoUVbWwco_jT7OUMl!S% zi2uA~ygogIRDNzIjEW4mI_^GEE1pI+ygp2t1ajP;3st#o=M=dohgZ<)*9~cVPbcg0 z$&l@Fot#S_MFzJnCt5`jxK0qrs^n#$ag#qbJ==ym zPG(|Pz;MnE{Wq+R$2G=MeBAzAkzKuiEpYQAL4 zG!B^TCC;{*6u@rxrdbs8@5D}>f*!-(ru3$Tx!j^1f%O!|%K zu;8*TdpV^Q*DIT&|L73Th#||EsS_7qUc+bhV3w%Xs>gE7Ufl-WYkg3clnh;WKXc^n zDza~miiy@T*pM964a~mRAKAA@0-?)!1JO|(N-BPQWka0pn7Ez$L0V0jsD84CG>@fN zrr;G z!X!d9H^XYF!*I($4QClAkhiZ?tK^VWN#NFtqp;`6ZfNyc2OH*PGadUybMMQ- zF+}4w@N}NThEEH@d-!~g$7x4&k+LG6+p54i^9Q5lGmROtwUb%2{ToQw1TZ?6C({QD zhtaEv5_Fa77BG{PVdna-qFNY9rx#SRB`-&j=jXQ&#}!vOb5Cpq^{D4?@k}e(y1a@k z@OlSk?{~n!o*-h1E#y>52+1}6%w#M*33-JRN%7Vtq;E?k6Rn&tihH4Wq2e$zVXz6g zJlc?qT2li`Gfg4fv>CcgCy-+xO&rxu*oVH1hqHHEpjW8^p3dJPT7Tw%3EFB{7O@iL z>I4Gg?oO5osDwLvGQhxQ5%yI{3M^SAfo|S8=EW-^>n z^~^#2Nui+7SqLi5PZ;J(4|AbpI`;nvf*3yv@5x0b++G%PIDVMlCz>C!rtms>EG*ov z2)u$gmOYXOX&y2wyCO0$+oBcTOPXL#G-ZoQmC@saCL6T)0drYYugg8M0`=hp ztC6!4EidKZv5(;>;W?BpS*42J^s=Ds)=xp!B@aB6;fMagF^qShEflSu$(b!>ik7wO zFno(8KL44{6k2iV>-z>&EiD_b&()`fytlY~`2yOqz<`QL9>Zr>#!(ODVbpD>F_n7u z7VERmgMvVtEZr1{;f0z|*pmP|moFjDO%Ia{h6Cps4ahX-Z!l=sQ^>n0N0!JK;nZKE zIdg6nU3|)nmN&%Uoev}F+YA|cu3VKGyVql%V=VJ&cq${f=D;`(7&3-VS@1@q7;ZtL)qcNspb`HubAhGnmPu{o-t(cFCx z`q|FG<`vOQi=P$Rj-8GTDOoJP&lnBu*5e85O{{|Gv-J?E!DQ*}W5_?Z1{K|Zu<7yz za7T219_MXoa@}rxpiR+r{tb4@R!>&y3YV&i_I<`|yf*Y5Yt+JHxqdcu^R`61KYu8F zWqlZrJ=MZCM=ff3>1->i<%Z&QGjr6>aKb$+@=(X$44X4N@VW7B9OFNpswZjDTf()tw%{Bb zZqFt&HASDGdxiL3;tcz8*kDq(c^?rwQ-+@L4)oPBIqDH<&PMI%f_Fc%=(0pg*F}cW z-Tea?ER%(!E1uD=a~(82JcSPWB}PLYd_f7SiV-pPnBFoBjg)j*E2T5AYV~w^mRClwQ{wyZ zhp{gcfUBI^=gftR02q2iF^xbbNc?vPVN zQNWWY*E6u@&>hx0f0JNU)=7?9K5en-&VjeR5=Cy(Ka2KLH;$OeMV=GKrdh zEK#gMGJSESs4gi_()WjwUj_Z-<%ce^dgFOc<)ca_RiQu-wYHY6lBkDMv(z~;lTDZ_ zmFDp6@l#0K$R&$U)-apCPJzYFr=b7yd3Z7L6yz0zF?k3S{O*SrWT#E785P ziLe%HVcAb9(%Yd+G&+;WVsIdh5u)b^J2`S#*N>E|3@1Ii8{j5y19+iGxYFCf|I84< zKHJHtQY6Ld=&+D=&x}O$*b$XAlgPYkEmmS$A=Gzhkf@W_;hL-myfE}&J6G*wepNq$ z1q~XcBx)j2x70$mG?iI-_dBdP=16RZZYCyOju`1z#*}VSBB#tvNwc9bF$$|`%nSG9 z==aD#okSILHuD2JwR4X^Y}5P+AIq*mhWH$T`r=QGHdPy#g@wNuMzr4Q z?$i~W%DgnDW#bqqejW!p^JR$l*Y)-TB^*X>sUtwR7->{Y1gcpFm-uF6(CUq(VJw#< z9!e)ecFh6vlo3S#v^|kGk0jaFuZWL}EL0>9C98D2i1>N|$uU*pPM=IbE@TLa8dpfP zrFuy2urnmo+>UjnS0F{IoLHRAA>VIvNp0{)PR&dLU-+$%E|EHYJ)D{LITQ619I#||x~Q^J#ksv#31;m#hk{?TS$FMf!7Df`uuLjvGF|U8k4F!| zr?bbiQ%n}KbEIx__WIc{b7BrK_uU4w=hO_@gGFM{zF!=&ypy0%>8$;6s|SK>gUo@q z(-9;i6~Ulz8;zVdl)mp?fYx}Qd0@4I*i;%&xj@lc8U6i~>(R%CBupjUOD-cZbEn%> z6RCSo0vVkqMYxhHQDC@|maf`?-mXK5`TF}9=kORGOPkVt?XEaNw2m>Le>QvBS?^A z&quw&vEo%IkWr@>M#(kF`yQp)XRYWrg;CT>+nXxXoTpABGtg|_MeH55i>j~rNe?&H z)73{*FlJpM{i1uHF78rm+Pm-;J#kQnhF_gTi}t3{!e`D@;=(VsYrH;v{3MYUE=!>f zH*#q32xEHfiZLxVZlhK|F3|a@hiLCq1*-UMF+1w~6TDF5M9TyZa7f%X3{HK(F25~C zg{1{JE6I&DjJt_Pdp4nOQwdIy+lM}-b7_;L7tM3n%FZ777I!zOQ>V1kSiCkGVxHZE zDGxodalvxruJy)Gwx=0=?k2om9!Y!aM0JNVo9XA4ne?+q91~M-LPONe>8k!#8rNJw z%@T9*g{bz{{XSVF-#Qu*ElyQ=lH|yw2{bH!Ev+dHrSV<+n7DE=l)gKbo|jIcy{4jd zAjc~iUt2RYi#0~wO@>tQX#>?cqS2(({)1jCAA!}2C(@_>o9Ld-do=mFd{bem3xx$~ zw6DXD4tzd9`^vA-{k%1_;PEJ0)v%OKIj`1K>w1Ka-<3?Alr`zuw^cNCj%w5La2|DE zlgr7IxxpGQGsmUpM&dA~`{)3&xIrTjKg=42nRZ3^qjWs2dmD;MZ6-MIsGK8hra?#e z&7jS3!?Eq$T{d)T5=K7QL|YB-Vpnx2PMNbFO*Y-f*{irTvrQUII?X6me?)b=HJY*} z4sLSjIz>l?6w=2fVofJ|^qT_vhc&ItzC=BPcT+LDGTN(jl4jM&G<^tJML#%a((_ya zO-~rsFYspiNOEHmz+wJwxST=|I>X0N1k^~S>uB2V|o$19O=2yrqs`QD;hel!Npwy zygc$378rkLE21aj-tD&Z(cwoZHsk5>AeEn-D;bKR-QHMW zvIRHZv!!t^I%p%c8iQSpv2boCY9w3H-2LuY)ISEv$pftVSuL8eRGs?us$)5nu#s;( zaqx6Qx~Fap1~(jn&c{y~iLXnc{3$~mF7-f}#RZ5g_2FFNi<9ar6Ug3e4)#*9j~Ow; zPL9_nMKWBVMi#p?vO89fgV}2?F|T-o$b7Y4@Q6CiSY=FxKD|&ds<;kU_v?~GXLT@2 z+bpQ|-7jc;H3;r~umO$38w6K}YoK%AW#*mKc$nh0gK57#2ZpI`WRrg!VHy{`Pqp%HULSFk=U>BW-i$Bka)~GcBFXkg`pZ5_$ zM;ej$N8Cvpeg@?Onz&cO8P+|WOQr>FA@=ts5Fdl*jgDGVK`c-ok{&9;semT%7sSAt z>CuqHZD)M)OQHU02fUD&06%pvFl#ubprN`SuB=J{@$oO2ewnk3$%zQ~dUPo$N6Nr~ z<+1QnSO^778fXXUFk7wcAZ9`@)3Av$cV?+Edvt%my)ExSkh%!`=GQZ~uVg{&VNJ5~ z{AUO|tO^(9rQw8~GI3ZrhWJmehoGia5dAm}zE$gyu2~aEsgfj_9`PDhW|Tt8Re556 z%!(X8JcgX}Qo+|dmGMc-3eJ;bsu0>JDbVTF5=9PZDkS%&uOUWd}Tjyp`Lv;xr+mz zi$&i#5oZQkr!Nb35r~ZwvQK1lnC~;( zL7P}Y-?TT78&M$078?Qe1-{UVt)d$BT4I)&0A=<|Vet0?Xw3E@js4rno;hMf=jv(r zHd&iIuGmc0-Yy{Ow-ktFstuX6z>I+FR??b$h-hSMgG5j*vt(f$99-T7_gCD2xh+)? z^KCj@mbna#rw5ZSdM99OeFOyRTnBSG1;RU~MPjQ)k{gA=kZKSIZv~s7EAAGAOB$0v z%NFQ#s|1@>>lhQQ0;u^RO?)&m1iH1U>@&re4H>NlOw}U|wov*+<8+Pf9K9nim|>)m zdEKkQR6H5aF&sFPASz#+x&S|l|=hKRijBaV%^kT~zogp3#p*{$E9=(G_iSB1gPrE|ejG|npax53IL zmY$q>kBxk)OlhSu-MV5Ujy?(sp+Lv%j-c3C8Qv%bBcEj*5 zvcz=w0_rwx5Z#|&1IZH|$&`fkWYylIcr#fVwmB;&;v;`@FAsXuoRcD(+@26r8WfGu8R@2Mj=-Fz2g z#f>G2Z`#RRrz_-PtRYS)|6*J!O6|gqWl7Nsy2vfz+uq+y%qv~IS}QXHL&)&sAeHKo=AN81b0O7 z$#8oxaT0FY8O65?kXWnxq3JDcLLXsp2A&HW`etV4HtNq`5zgT09 zv0lxw)_R`jzOVbbJ~#LVzQR%Ms$|d5Wa8B_nf_j`jeQnRAp2!zlV63=L}hRsJ{LHH zblr9*t;)GXHR1?)khYw>PdG?6Z`2^a%2dg+C+kVrS&H`+^kY|vJNVlBXSgx>E?(ss z$?jKehQX}?v}(d>Y!o_=d^$9THJA6{?11gO^xE$@Y}+Z~oF~I>HO<3s?AH^M`4Qxo zD2gl;_vaT?ZRm^-%ZdAc%frAAP3a|@{Ly33z@xEF>ko6Ofq)ahC%)Z+|0>ihL}rOQ}U>mmcBA%97%_*=5{3wVbs(nK9p-Qt*B;40&tIkc>6w z0mg}a2n+PcqpUQdSM`V__dNu^=>s^~B!aA2RZDX2){~TaO}6UcL=vKMi!^NiOx7g# z68F<5@vM(gWTbHqNpEi?eKdg__gams){G&bt4;P~gp=|A#gQ}X6tRAkJSi)ZAlb== zq;J7~BDDEI6; z6t^5bK@}!>gWAL*3%Rvd*NArhoN_s zF*R;*;iQ&L!zEK6qiXk+VD;!4l{ooIc=@;^?w`F$JWGF1gWulZ&n_K}ZRcd*BY#z~ z_Z129!Qdh8)2PP_7TDq4D;#m9y0`}PslzWVGVx|{f1zMmFZ>_?}ZTrlc4hLi)5NWHNG z|JkKX#;gT5@fXC9#}#6?^;SVtb!H+*6k))_v#Uytrf<9(T%m&Ie6u5Of91_JqKQM@EqM ziimSE@mzVu82aC9DJra81YO~QME`gqxvzMdcq@357IhWk8~2L@6gH5Ra+ip#pBMR_ z4S3HOfAS$~Ea@Cf!>&3FXr%3P8hYvkvUIV<^G_ebUt;{Q`HCjgR-1}t@BG7K&X(im zIiK-RVlHm!uEVb7sn{Xm1fGz$6f4b*M^{4nxZ@)cn$&z8y}GQ4oMRQ~;g63|m(qN^ z?q@VIR!Tw_5)9G%*h*}ZH;8vHx`Ibmox~r%j|G{nZe(&7B1Vns%YlXP_dp0i04IrsL$4Q#Y5z=Kkg@hxgv4BbqbsGCE(n@TG;J~G5VG-Ye6**tW3p8Qxxc?^mM%7PYbr)U5y*wJL9zYQq*}i z6&p`JhCjBf!mqS7Q1Qenbk)NS?<`~Tj z4Pr4Pg6LU9le23kkz=2H@%7H5*qr`+;K5i@n+>Zup_Hq|9VDiet5KN<30^98KaIgZ>^oKFt;JCmR} zV~LdPGrT4BGyZ<|6OI;Vjb23>@^ z=?hh?X0sSK)vv&v^4s{mi93-nD+DjR7lk)}kHy)id-*d<6><8Ki+GlFKDHgX1J9LQ zPw#|{#Le?}Vx8ljc*HGFOj9m!3obOHYs(Mfz^6v|rOGGdt<=b=?H8i*o1G}&=w76A zS4UX1XCsc6^TuK2zWBFXKSFiw@Z97-XxgYx@5vv7+7If?-On8=e#cW+bRTb6d>fk5 zALA)`|6z%t!t!^jEoh&!7rNati0|xtfHVyyq2gT-h}p_iCbT>ss-K9DQ{4LlP>w z>I$6QbubDl1Y*9&s+<{dN1Ik} z0*_S-sNu_-C@tU{GWPw;4?nU3dP^^Im(zODAitZsd0s#{U#tQ7O@;O-DVT909@Mqw zVRIQL5K70xorz`OBKRuoRxLqc;(B1Q%uil8%ZaVBn9Bkld$C<_<=BsdJYu<{SxAr^ zTk;|iG&SOA^I3IJd3p=0POGL}8fWmLh~qF?qygWXx3ekrzu|6*GOKOQhp1ITc!~Z3 zx~a^vx6R~he9mJBi%+=g)D7e#RgOL7#0-J#3pC}$4j9aghj$O+Xv?}|w9R-Gcx$@R zqh5=^z9|m^=Fg_Bj<*GWvv%QuyW*_kuy@pFu^IMAI)yDCC*V82Qp_!NG}B5>q)j3n zI=X2C+H!9cWG|nO&Q2&nn*07yiwG-t`fMR>e3ea`a+7G)@Zn&dR7B^-33;i#YiYlf z4A(fAh6bufK;-U1x=|1UZH`XpRrCQqr(_BE7wLnG`))Xtxs*2Qr3l(jn}gaERWdk# z1-W8!i#A$+9cs;rF03a>*OH&-Z^N-I~if& zo=tSPHvAfxa1E@t9+ki9A{D$4JwTYVtR@mn>N-Wz(y6mb|rU zCFtA@vPVCFthCZ3-pZF))TMPS;Xo<4jF}B{pFN|-3YDx_`WYMk>j_)HZ(&yFjiF>l z9X3sWf=~Xd!iT-g_?pNnDojtq!6zABliP^3hx%!R=MYtR??_vn&gBcuZX3o7V%M^3`@@-0Q4<`V{g8JasmkhiZ)Az)uI%*7RJd9@lUz@Z z!@U|SNwwr%l9n4o6jn#ltYtSrUV0AheLa)VG4bH?b3Qj}SpuxOD9MT@xUkA4K1_e- zEb`{@FD$X;7PWl1kWHPK#&+BZC;cjVcw?C@&`$>Jc|bNBaV8QAr#Pd1+my+urDmk= zL^KvpEpXJ<-FV%4@jM~+A~v5Y2l9d(ymZwQ-0JxahhpV0VjyuLqEIY&w9~#b;UCMx_eUD+E#x}UPO#rt(?O;>9udsg)^Vp(VcUCdf z4wp8@;n3SYU{IvZQu1dp*Zp#Ab;oEz*6hXMQCq2JsXQwaTCu6Nlc1|$8Y}r~!V;$$ zvZ5}I`QDepR+Z9JX-OPX54wX7<*dMmj@P4K`=9c=tS9k5c35MF>*kPdlZ+QcpT=pg zrSY1N&iuFM1rWM691r!T;_^r8xXSP}RXF_z3eCpjuQIFg+oepW0P;IbF;Q zNHQhP(|L^3ogi+i1*mkykjmD}f|GhPAUgs_jzL5eTB4) z+E29EY;2}0BIDC4$r8oQYCOzl-5VPAzr|9d{scb`vIj6H#U%5}&wzKCedy+o!5 zy+`{7Rgon>o1`7DC%*T$lg_>0`J&6GAkR^TC}{y48d0450iYSIV9_}ANeNoC;J-SqRhDq$zb>gG{@}|nrb%+{)-b|d+{km zLMn`uY>&X%70-o56C2=J{14m`XitLkwMog912|{zL98QCCSCEP$mX)!;wj};)FV!# zZXa_ScPwziC4TepY4d(6G+zf_=SYx3e@(J0N}E46)d8Lz+(n}#TX5stlW0L&4Xrsf zk!#8sqQ%A8MEdw~68P4P2tM^-g>y2rxif{JvNfdjMUi#==Ek>o?UEwNu` ziS_Y4^zTFn=y7ReZ`?Pm8}){7CtvwdyOx9BazpY;&j+{bM9{8J^PzEBCCqsji1mGw zNyEnbB<*T5F)Uk6h(Rkp7E(`UKI$N4!;?r&^Jm<6W#Og<y%-p;S_hgKPrvWDP-`^d?FGt?y#~Kxz54#%4@G}M^IN~8$zRK7p zBK|j7aV>}h#qxNQ;SaL#sE*BG%X4xls2V3+YoV)x+lV%*B+txUNpAFZy08jB#`_*= zvl9{b@6(9F$OjVo)Rx1<{Wk71ygzc&5>wa>GB5$bfnUiE_Eyy2@+vrnn5u5tF8vc|`VORa8v$YGCv3XW;xNgND z4%vMO>#m#$mr_QutZA0y$Z99@J8Lqr;YSg>5JMcct6I!oI8G8S>XQ?5M-qiC*?3%7 zFsV|KCtY1rKzW=#4LDj$M@T%z%}I^uR(u}Sp1BYAwU?qoA9*smwiri!<*^sSi03bQ+i0y~1S!UHGN*b84;K zid(B&1*u-;_|~XH)W7Z%)w@%Mk3Sg1dfhV!mu^O;UZeO^VkzF#bX@H5_>RF%hGe~I z#ELB~_@6(+o_GwNvr8Qd($3>1&3qdCdlJ}vKL|39*T6#QVw$f15$P&AqLRKFoX(C< zpr1V-Zm&wG)gw}9%$JeWMm?OS_+_H9!dmpxOcFheYolwDuAsSbrFg^?8?10p5`EEn zN5{2K!fjn%c($(#o`hoPp+!FQ`RpSoQ?U&_>ivsCcPoI$nQ)|iOd18RKFerIfqTK_ewcIPQN8y`@jkP3A>Cu&|3UE>-GPA z-YaL_%w7J6k@x?@$us>wpLe>Wz48C+dH?s^|NA~vb%l@8?&epiHt;KxTsX~he}#Uh zpFzw~L)I1^51GT>(=;O=a23VCm?y>pyR%Ed{#FCL{A$Ghy!#15_Pbd#`OT!aTZz8a z7>XqAGMRkrZT6=pm2m~>%&M2LGllA`C05*zNQvji%dWG&Iro^sja#e{7P6~d+t?DP zQWj(Il!g6GV{<<{v64}i%%KA@?f$WFeya+z*k;7KZykZ~D0Q&w(O`ny7ofH%19Wl= zxs7Sdm{He!X8t6YIi0m*Uj65(j*maDX)ev0=gp&|mAYYNunY^^a+rQwxt^am=O8Py*SR#dK498?}8A&Y$=cNnPUH*hRGzR#I7PouM*`&FM{I z4v!|W*GglkL*p`*WTnqqm79e#&wqs6mn$JixXIe-)+^9>iJ*O9EW~xG(|6so+1b8B zaQA&5xU24kj#@j`bbd5jex#0fJa5S6T$s&T-NYHm%Dv3>U>&t9b&cJ?>~Hk5Q64g)@0OR?n4Jq*@2j75>)O$5|7ac7`ENRX z99IrE=E<@K6Is?1{y|uF!I+i6DE8Ms55|o?f=(`;#wLYHGl__C!tX|&{JxlN^p$)a z^(Zfdp{Qiv-2p_o?mJWO{x;2;RD}VEy(9{o4)n zrRO^FGiNUA4U}YyWYt*GI&qI_V!G8rnRr<^9c4~mt?c99Mw+s5 zdtzzX4^2vgocOuJXEE6J1WHPV@pd7$G~DSd6aCxGzMo5hmSSJ(BD%mTy2`;T^B?F> zu%{E^BUs<%Og8(0Hv4M=ARV30?r*bWhI7TiMRFF66|c zt7m4Qr3E$I+JBNrDK4GE{UyS|m%nKeGKblt$MKD3TgoR5s{&LzjJ2mrLs2@^5NI@##>j~%Yl{cVwTOs z|3jI2x?yM^lXh)q-gAanyLu5TKX#Tq*-^{leBQ8{^%YEdSq^hJxsM&$Qpy~CBt;ve zL~Mj)2Gch?$wC)rvIl_=%J1Ec5==mW%z(^?hQfDo@%han!*MGpR>dJ-!iX1jZ`!A9v!#Kh9&fg zHK`so_C~^$`S^H3i{oQ1^q4vuon^p6mL$WGpu4C>$qTI?GZ9bvdy;qca6yMB{idJ1 zW8vVgtDy5Pm#gqhrhALTI#81Une5M?YlbG#geR6XI7%Bvn~R^366@e%ExQ(*)?9rOE0pdNTQEZ&9<6 zH2M2=5}SA+hk1{c6&=>EXFv41*b*ykQRtgG7Ueqmek20QP`XufqStAyJr50Ze70-+fSx6L^_s*`6<%X zS1ag&{By9$Qv>PV`Yl{jB*B#K^+Wi9quiE%Qp}0J4iY^N%17IGQ{VM@F#V!5>=9r4 zD(oo8Se>GMwkmLBb0$R^C*kppeBlAVTePeR}=A#4SR6^&^Wl!Yl$y-Yhydvg~+I8I~^DujbAAn;c79HbH{K6l&oY4 zb8bI@or?Kv%!XU6KYTJ1rk1hP#~<06wO80F;WIFQc8Ikc-NmjyJpB@((Uqhcl7^)!iQSR6ruWm-&1rh)#aw-ZZjeF&Sr z%7SUF0Dew62{)YT>7RNjs=d$#hjm{A)43a%t@19&x_f~>jkbV+h7{KSfrrKe>QIsz z0}emcAZ$Veh`LvT-Q<2@bK-j3-K5R#jZkFL^4;|7<@jrIy8_v3Egj~zL;x@K#!!#R zF6=*_G8z+{O^t>t(75Pe)_s9M(z8mT@4~8b%kEGXw%LKrFn9;|4i`hP-*Ps(MTrT& zuVJ#=qJX=6j=juNVn5wSvZu3-(t9;BOksF9%W*#nem4$LmB=C3d$a;PcJ-o~-d}Vu zc@zsD3Kw>6bLG7#2Q4YTxrMq$bkpaFG0v`s2g*S@1Nxd!SBA%bssFjB{~?E&*#8tpppJtZFZBQ;aqyzJBdf^teWcG9KV=iyaed@V= zCccp^NuJLC!x<)Tp>}V)Aviw??-xyHdAgBoby)>lCsWD#&fa1D#VzdZy^rGD!UNW9 z7QpTwFJZ}kjHwJPXT6KwL&UCG%%#vAq%ZA&Tc6KCx}`N!Ikk}WsmZ~<#{;16_7q&I z#= z3K!!eap`DCF_NlnUjkMi=HLNA75--6fp7Ws;nb{hJp0UP8t`k7%Q#tr%`X<>6H!Z% zo=zpL>@0!9P9ON#DY3NKemIv{+QZLu0p53625vQ8hQ?ZOc&7YmTwAJb^TT2y-c(h9 z2N!-J-IKb>n(%jc_OLusacdsg*W!mehDowllg6_F?^v2QYB{UdeGcWfjhLulA_&}z zS>yG+pycAgrl`8Xt9zwPU$c>VCIxaH_D>+Gt&Uy&Hxg@yWn;fyb=oKXZIl*g@dKU5 z;pD$Nu*kg-p4z)(+Y>b~nuM^NO~&l{s-2juSj>XA#RY|f`){r88-r-dG1m|R9w9-k#Cks;Wz$qt!S)zP@0+rVZ1BHqO8DEVOj zf{2cH63zO*B=PDfJZ_mSxnnz*=w*&1<}&J3V!~l~DNtsIJk{V=S`8ky#2-J=Sb-HL z|H1CHS=c3gA?N0=Pn`crGQXJ(Fy(s@mi;{yka7t`x?7Mc3wLsKV=e5SoeXo9FC>%m z`=QtG1=_X!w7}e89=5y2%Cn}&pr_*ba#L*>&K_UEow)s8SRo0dTPX_*>(x--tz2Y3 z#~q&--HWSzBJry=qu5#xH#BQ7i~bYIG2J00`XK)h`fAw*bzk1YBH?|`GPDHwY>Q!m z-VbTDOf7HLdJ-)P&0?jyCb1MrCysO!33m7X!4cS{`z-1t-mLKd5*TPT!piS&ZFt>Ww=L6ogKJ8 zm&Ff`N3H>j!2FvIJ2XC7to@IqL3|C@dbJRC)_1_h%nWFoA4$ynxLTTGz0Jpn`gpd%oV%~>Lui1 zXd+Pz`$o(*wUgw7vbc9^2!SznWcll8XzG!Kpqcsn_4kDCa;xM57fI9NcwboW{tiqd zC!_sKe^T$2@igmE2d$R7g=;QMgc_;6VC_8#j5TLrKSxRO(b9?fKmNkotBN?hdJ4@E zjixRBa^&XoxA;?NBQLx267?5KK~nS)+PKY0X#KF5IPE_}h6j%)9#bE4y}<{t-Uf4$ zpYevIm=uxXv6G0-O9LWIdynH{wa5&KIYcErlcXFkA|X$#NtKix$@M--LSOI3{=O@r zGU2qq*iQ><`bOc;Vtun_do{NuV;-1qF@ha0TVxG5AGWB z6~kUq>vVV8Y21a>6wA;Ui4?qPd?tE!a z^}hBI>ES;7TJ0;2@O%a)_l848x(06F;E8mMjc81kD%P1j1@{~7KqWi3VR?<6IOb~t zQKX-7PoNz+^L`#_Uw(@m{&-XDk-SZernqsf3C;CcA6v$Tf#%P@B<}jPoYl!4NS{U z0eS0qes!%X6wHz!*9(k^te7`{XW3hP{*MBw+;YgO-`xRsX&4fz+->Nura2yNZ;ErD z$&x|kC~UBHDc(2t(OYYLby~S`>J{$y@jQWu- zx@T?#G|S8dXNMEC`Og$^eSR7`jrDNXN=yFExe;Lg^bXDUvI4J9uDs8F5%sCOLG{Nh zC#yzm=9{hJdGcfw-#ipTa&^Q!M9)ZSUooD)Y_SP$c9~JTA8Yx`dFT1Q)zhG%_!N3Q zXB{QWTKKkho(~VcD0m>3Kzpqwa2MTPa3+a|7dQ&0JW>p0qK!LRQ=O(dNpi6%viID^0ja2BJ=$;NvjH4mu-fUEnaZu zS2;~wyaol&sYUzr`-E%%uEpn)U9sM_`{+sAe|W~_P|mPmII>-%C+5x_q_J0=kZX!O zAwNuDxZF2%xM_x%4H=H7+p3ZcV{?e()@I^fbRIt{IZECi=pw$3e~8420OID+Mb0OD zBb_ZaubPR1ba-u=|4}f;d{BpJGWZwCMBvO0P!uObFBTSsg zrJK$qe8Cc7>wazWBy}=rdKN>+$$3E5!?9#*z8v7D>^+f1zSsXEZdFgn0jWo*UH%$Wr#uJs6j|`LG(yi1@jhIiTm*v! zqe#{J%V?9^X+CaCHEuJ^$L%XBaQH6{{cP9|uI5Tus5B9dEvw_yZRKgf4+UBjAdgg& zBzY%)J#O7>dnO&b8dSur3H#jTU{-R1<-K3Zmh^|xD~;yt%Fk_VzTqgg^^P{H&l`l3 zcidT_;&C{<{~HSWsDr*H9)V)xAtbFNNj%o*lCaHJuvbDel^MAYpR`&Gcft!n)8ZUz zxsi@;{xpG(^Wv4qXVm34WeTIF(gBF9VtlvN_?L$A@`fpsHpK7)^&T%oq6jA z?79?Owz*37YpB>b#nlrv>&wJw{zGhbKty`CoFN0JbxFhP23%SCAHi2v6RrOuNI?2A ztfzgH9PUdc9;Ta!?T6vSz4#Qi-yqIs?LISY&Ukx{wZj&+CyB2KgFy3 zB2kRqYrO7_iP(>N5zJ3G;ogNy&{ELh%{zyco7RtjWFxV^cKSyuvTnscLZ9MO!xP9g z0|EVcUd%MJjKEhWchS;0m$7BGBW^V2px5v_4IAr@m2NLZHLubH?e!_w2CtmvCDwbE zzj=R~8U|Ni>s1txomSy^Tv-`qdp!kTe_(PQ>KNI9QW#{h&7u|VV$E7K(BT- zuXWmy8R*HeuIn3_{OMc#W2J3O?@ljU)9k>;Rr}DNV^} z2{2a7n>}&Vnb)p)LVut4;=W9H#M`q$x=WCc{t7Qb;*(irU*n{it9~FeU0FdF7-%z> zka=vyS_v`7X$=nYOW{@DC8El_YtWDp1*0E)q$VAkQQEuHxWrtNq?ldBwD|@8;E{xU zhfC0v2S-A}`aAsA>vQp)AWP!hc?~~yorzxUNGIxI1F_E0W?k+n8PK-nrK-{o(?y_oE7` zGh%RVOeWJf`GSudaNm_k(xIS)Y6t|^Xkf8TFiSG7Okb6}fFB~eS$45kz(C^$4kz~;_NT+fp4;tvqS+7t{&~9t>nm<@{X(7pLF(;O9bE(W57n;x2b48ZBIPX1be4Dlr>Q1=E zeOVqM@I0AGFWxKT1%>k{{c8)So~P6Did@jycmNJ;FaR~FA9U!IIc%Waur?|g`l{UN zsrRa|_isKzr^U|MMOrB1p8_7T5NpVO>LR41CVDf|lR2wPik`fc6;0IFV);8CvZ&eD z*#6FmG{2^bX0MiGaUVj^@Z?mEpnKL&CP=f1>2c68YXv*5I+;c0yo0W)d{B$_VY(4x z0llU?`*9qeNUZ?l8FTmr-CJOfnk7v2&)|EDWBDyAIsA?*8r-yZWt{2UGO_orjlVgj z1R2zngFtc&abMZX{q@=gtphpui@|H$jDzXL&e6Q{;}F$F?8&n zi}adNGS4PS({9;mbVE)SH99}Yuei+7HvMFJcbPBf--w}kD>bN^_i@SxNPyGJZu)G0 z0R$e4XE)EbGo7|~w9LF8u4&mbs6N4DwIyM7bssFMwqdOu^V#{ynr!iCcedSjHS06b zf$4+tC_v)@EK(5ny&C$^o%0P;6yjLiiDfKQoMBS_wUgz(oWb6C&BHo3cA=z-Fjg+^ z1NN=zq4|$}`OA+)_?*o*9MmP=xw2>>(SF*+(+4j2nD1EZqGW=VOR~`A^^wTFOA`gE zSD<&bV&8|9E|Duak4G)oidVLm;qR%A?Bt0AHhsl0cBM3#H3d?Z5@N@s9}UqRgAT0b zu^9@9%7w+s>ChWxilwqDVV1X;t==({R^;i>#z)tww~G{H4$Y;e!9_IKP6LXzw7_VY z2Uzek6>>&QV2O{UY32cKba<8md#qN<9`^)@e6Km!G*cvD2dc1 z+P1(Fg{H2=1Se2gUsas>&kBV+zJy$51Ly~7eH^#fNwEFg8VFs#6@@WP;f&}RLML9H zbD!qU%UzC!JHdlAp;H@O(6NH}RtI`OQVXOShr@BB)}!!5Zw%?`St zMh%US%LFf>A(PNE#V*6=!Q~C6kTjso_B}{r4gdZyL5QlTu{Mr9t=AOoFr6im&~*~! zcYI_MI8)IFX=jmF+da1Bww|c$tG&q9>n>COwUD_6J>ZpGDRp>y8Q7&;R3*o;yt((N z*aT_?eW6~EW$(bZ35SH1&lXeDxK`@%#!)y%YmdM!-iKb?X+%G5H-Mzr*Jz(k8g>>j zEHr-!kaie0YTXvxzTJbmhu)!C+m_&I?h;si!W8QIQyu-;zYbH0VM0r12_)nl_%RGZH_*4;!aO)Xip3;NHnH@OEhWc21nZbNClnK zIEp@gTY_eq9HFT#h~Dw&<&JIGjyzIS`6(ZAx!%L)P*i#amB+*RgRADE5AI2HwT~*d zkBBuvx6hn>MUxX~TiGGHDc*vr-Jip)i+ajc;(yk>XwX_hDbhN4Z@0C@e_sX81$tE4 z)*RK%+#|e77>Y}O#rYOw(fr#Fga?f-Bc&f!Xjb!eKI@q(mC(wun)Q7M{Y;)A6sLQ| z8I4F(+Tq6)^o*k+TFPkoyLIS|-$8z+r5>_;lgCR09HP6_CkWe@k-S*_XX9wuf_xPbk1Ti5AO@g z`*olH+5Zq3-~Wj2%xOmQLHp5*l=F1-x(~eZfz{ks#kXA8s_*>uPcONYTSv>i1mkF& zlreqEALerUhRVJB!|4r$NA&hQA9PY8e&&%#E3|K_B3^Pu7T=R7L6wT)YA@(Ea>;B$ z&kx)}5`Sb-o?bb+R3(e|UnxVO0)4EQxCD>YHpC|jQqTkQGQ8EV94EaF!LR4bqwyBX zWS>|IzI3{ZOUp_X?*ocNrn0GM2b?Y)Snr|v}ohwpQa_f2WIm`xqIP7WJ; zWFqx{5;%TykWkKmS^gYlg6aJUIJ(wE&}jAg>ZCgg_{OqtD8xIzEUi@k>WZS}xaRjc zH1*nGd6R*HrOl-M_=8I(c6;H3Z#??NA9@poy_^)h9K`>a(8ssm(oHt*MR+3?-3#h633&YaMyj zF5anPp1^^yjPs7QNBv73k*4fMF7aRzi5Su(Do#<@(B~fN>EwlRzozp;3NM8TW|z52 zw=LF6pFeT>PMs*GFA*K-=J;>PZNjOpXRYkq^@UzfZ_=fk3g~^)Otk3JVRX`_5A_{P z$HP7);IdPJxOzt|8g}R!4hwF?4-?{XyW9x;>hcX7_vaoyIoOC2mmI)Zi8t{R?fba= zjy6tsSYUlwR4rHxv(di$>(TN6KdX<0--Rm@T##hyM^4^b*ZO+v3t_hXAafr5cH4`g2Eg%$;PbFaAfXin%ByeIh}ma&+Le1GPlQDQyM)b0gec}}qX6B=mBLF(!f~u%B;K&Qk;_i0<>slJ<(%q#(Kol{-0#Ji+)(l<@pUQY z$3@Gc124{T8QW)|_O3~I9USHo4c&!0gVnrf-WhJBQ91-noWKIUxw5HZ|8&5jjZ}7z zB5cWZhBxzonkRbB zTZ@_pJ}|xXnW7zjwxYGKelth25hAs{<3zf<^hN0-YuSg0NurdAUZRKhez8en7P-dl zW;PXRiMZK1q6FC&EE;KxOv_Y6EqBI=EIbpLjGU}U_L;FLd!mHM`-~j3oBEU;%TZ^g zbzk|7*A4jfwyDg_M}_r!U1sXF|JaQ=+nL_QD!5!9!M;~eAh*?-i=!p0Hu**;$vB~c z&+C}o?Q2lt?7|s6tL4tGPqwaH?o79)tb&>mGnlaO4gKop2oKZ*V4fPo+OGcQZ?ChW zW`PE*U7;7Aty%)In?^FfRo`G*LlN{^eFBdg&*9RXP`Iczp2c~{Fv}7%7FT%}bW;y7 z*@bHW$9S`-N+Yn{TFz7#q)?3^b@Bd0M%^{Utj_~m*zMv@RyuCD$R=a8$hS3vHOPEm z7BekGPmMi9&sD8N{+ioGn_pOn^4%;%1^=dq92+K!>O>|Y$5nqAS9Ab&ULlOQ>|=!w z@6cf%=d-rv9`K!R&9vjvX|_oGxtPb%JN6!A-;trNwToEs+%CvHFF;PuR|-z*cY-6` z&TNmgG4LNQQqnVK35|Vh$9j2@#LGLZ^`nSw`mcsP;;LE5{Rb@m;~3E!ahD{4{G(r9)e zZZVzo98)>rTK+^>C9`{L$(n;J_#Y7?1uYOsfR*b-Pi?pXscs z9U#u*E2#XJ49&q>O!@gR@cxv`UmoobUye)%MQ;oKYJ4Y@hunn7U@ho(cn>IL0sF0b zoE|%14vFU+**nij=$ak|GgK|8usevcw*`EU*+go$y<4zRT76qF%_Mi*Tj@MW7rb429WYp0Fx9a zw)fNsR_IkVzV%0gYIwjW2Pe#y>M~>1zV`uX#ZO+jo-C>|Cuw^L&18ib_6H|;5 z?{)a;$}Ga-*shSB?3ueglYdXxv^fUM*;LG@O`>ei(p5~e^fEZ2Jv7+%4Ub7_=y1``bRi?3`nQeSp1hUPZu);kJx{MbwZU3X}Mr9m(x87RpmqaWT>iz*uIP=!wnK}^Smk~KQk4N zP01z(Q}+_-6&<+YYc7s@7=hhG#q(}C0U3WKUaU71)4?e7DP{7%r?e;9&KO#tELC1QS0B-GseP27S?ZgQo|q6Awnxo+Wo_)SgV}5wp^c%_^h2tfc@c%!4SaE$D;P5S=l17RW+77g%c{ zjA?a%fZdCzm1n&${#1(f`@|vs+*v~usw;)gNQOd3V=12%6NmnueM0lzorMX19r<9< zU3hO$3X)6{{IiWAKt_U14{QaGaDR{$o}sm`uTigfvBzwk2J~1&&=n1i)Uo*-ZFD>X zx9&R7h8@dzdS@y1Qa@ zY^}aPgJXoWa()Z_@`b?A^`lVdD}Y*~ov_#97FD$wMdJ&9@Y>5wAW)S9tp)z{XZu@P z@~)0bc#g%xUOJ$`>j`vdoH{+8nU5wAEjadYnRsueJTms~MJ@-E0jDm2Du*A)YE%zD zPnE&m6M+!$T)Z<$aSHYoYS31*ySzjCpHi198932kO7{d^;gkF=OTBI$AinE#IP3pT z@)19e3J)(lLW({OQ1d0q^#7vhOyg?$+A!Q)2u;!?NhMT@)Y)r2J4q!ZMG}%ZBqUQZ zg-VemO-Pa^M1$1XYpn($B!q-yh$Q_<2uXPN`?W8pbI$&qz4r6m_jO&)KS$?_@94_52ABoac@8LK5rqUm0_tAUb6X*mXf#~<7(5%HrsF_D938?xb+{#@+^+MdK z%d6Yu-EB z-Gl6@0`nqc?&Ts_H3H?`Ea$@HPZ7QU5Ds#;5`tIzV?E>HIPKXpRAt#rTn&G6#q}|Q z-PpwfwCf_{2Yb<&cPEh3cunFTjcIq)N2;gznWl}(q`kM7&>bx!>7|jHlwTswQua@$ z(|vrY_X|v$SQ34C)&|ubPb0xYpNNsxecZVnGQyj0_mSLe9(P&HfbGX7qnLyiWbvpO zsoCz7-iqD@!|yDGmEYB&;-?Be^LPW*6l6i(Yyud$1BA~E=i2|QEO~lsuJqmKW?vaVwM+A1@ZU{XM<$)t74!;UFR#K0L=Vq6Vkz`J*}y#?vETlZun?WE zTZfm$%%lB|RH4tkukgW9rKo#m8x0@u1g5H-yI^0k3J=l3v0=yDO)L0%qQdb|3atBKjRl`ozR*K zBRC7s!`LJ5Beqa~MGA#pblE|f->Er-w@BJR_D|LWlhhWFCZvMVtwT`#yAAXw9DsyB z!{Nu1XRx7Imfzv14q{aXr~T@~m0NPLN`8V=%kK>Ncxfv1ds~Tmn&Q#7xM!HQ7{M&p zY@|Qw1a6#s8+|#|j=PJC@yO}%_(7byFkg=E5j+!~RY33v(Cmn;OhYukApA;7aCE^8k z&1CuA7&PqNPLPa#i}%f6Bq+2v!TPEi+9<2(+EnrekmqrZR@o>L{&X4sVb0Smo^K9n_}yKTy_ z5%>$G-IvKSk)>d5{ardH!b{{znGj#SapX&e*xg!p51qa;o-Ar9AuTS7cWaikDTQ6O=seas%zZVo#Dh-(K^HEU&k2g+4UAjOpk>xrSn+jFmV=GauA<<*$SL$C#28Y3)W-1 zv5L)E2o$}`nFkYL+Qo8s-#-|p|BMEYKkD>L#x?l+@CrDtJ_8LV)7kzxtLU1gbLkt= z#}l3=Y9ZCqY4jf-TJlp7`PO@oYl>f`&liYu=$Fa>pVtsmkCZZXw=D9#MGwDKP^Y72 z9iTaprL_M0S=wD!L?$|0(DJ3)VvjbSrWwDX_x;1jjr>;9(49oXpFW|Hsl6mi@fo?4 zxR&0zoJ8vneU^M0CriWoyre&tbl?HL=LIV#RoFLHeDh~sz=UIWkl*G+{5SVF_6UE5 z-e?3tVv;KKNz3uo7%zx6?+?v~CPLOXUtC~4QTQafwg1@ku@P_nuV-kl5K)s2C8=s2;H`C%cv@zUTE)(z$R>P6?dbrd*Sd4sjc zGks5Q!nTddyz@XW-po{mcllNX-Lp;jx)C4Y#mOSlea;I!B2PgzUJM!hNVqj28Vq9H zAUPahT04MtnF(ZVzXRmUP1rZS61(!Pm^_$=71wCsn(8&&oGN2D_MbCkI)=l_C%NFT zNsGTNe%{k|?gNeT2z+tI5peldjemTb0=3RQ=t;RIZOASbrZ4;}yvrDjEVHvXXT4A& zqgDym8ZW`_42CByio&Ixx4_?X3va3}b{|VZ@sZI(_}-P_yi>9>w5CUbLqR_DJUb8a zy_a!&_duLiRVf@A;0XR3BS5d?7;gTuP?Di6vZ0P^!0o>Ccr_n`j(OFC`R8`YwfBOc zcjYwaf9WbbSz3?Xw}*qHf)a20*PQPkJ_5q?mcXmVI%q#f!=bxsJi0P?z@ty=o3)I z3jd`dt&pE6P2~GAArnIUb@1{BbMWpkh6`_;=jh4@*jVK1s$?Xie5?KB`A=_rbx$(L z8fD-NmtfoYFEKcGMys%<>;h(cAEW3$C2&)zgeYusMm9F}l9{V5c&U7KnVjkgRJqny zYMiDBKc)`Fja`XEJufJY$I0te+fa-K-hgf3L=gTgg$;bOq~@8bs^;> zHtaBQnQcNf9Ul`n&6l`!zC0ZH`VQ2}E5vIg7)&3l!`rOobmqfr5Wi7|zi)L2=5KR^ zJp2%{cgpj3ub+Zl7v=boFXEx0QjxctI*{LLq{4@49feDMioE`UpD;Tt56|0_!=*O) zpy44+*zwpGJRv0+PZ)R?g>W0O&%#75PdY)Uex6F*dgl;a{!JRQ$Wp4ktcIwcP@>9e z7vWphT7LAuSboQ`BmAYXFVK}_%kM0-ljVJ8OzSY{2T|~ zd_oc&&Z;317nHe=Yu|9oEaK6U(J#=HCS7W6J_Y%n+l|8Jj6=c;4_s%}fCE+L;FvRC z$ww!IlW*<92A^H=#{*k&K!g&WWH6LgAB!V~clyMR;uuPPxY8Mka`aV~xsZ8NUUIy~ z#J)?x0PVBPkXCqo!&fa*=oc}&xoAW!Rp{)d>$s6PbhX&S{W+-oYk?LR6)ynX*-l?) z?jU6wWHCJ3%5R_#>j){?7(g4= z-KM`^h`yx4L>hLeUFe)zM{PZw*|lji=#4N9x?bHJl}1YF8Qp#)QriJ+hW&$96eujsO&`n2ol95iQ{4cm87lbsuRh1%v`q$-xBm~48EQw+z`o0Im#p=Y15 zb6f|`e5V06&$htksw6zs)EeJvasZh+2T1QH3*tymNi5(B;nYPyZxhH2Ebo{f*!r$esa}SwE1B!`8C- zk}L?fI4fNKS;^B+9vzD;!fr(x z=*84zaQxOL_1t0#109?Mw=L;#X>&RGxjLNkTf}*m?`4Ui{%T+&BeA)|MBIP+OJwQl z52rh3iCjEQmSXXix*xuX_oZ$H>8Cuda|cD20}c}Nk%`o<_yAqM>p7V-z?oERSt(uQ za9jEzvRe2tB)p9O6^Pb7up&Czz2w9ZPgwe7D!(D&6Cllb*n68ip8ebb#ebWFB1?CY zim(Pe=luwHeRD0&IgO54h#%}cw@Smm6kYl9- zZVWt!&nL{nqXzuJavqn-OY<<85}^iV?+1z5Wn-~_5DgkefsnHMP}!zw!#O@y7GCR{ zflH7+HH+TPlDCAj!|BVJo8M^CW+O%IqiyJtqgQG0u}hrHY&WUlRB=}9rN!*Kk277Q z$0Dw}!xUW=taf`Cm#sOO)C>=yx4i4<*C(+A*2&Y$^DEew<;R${t26m;k`i^Q9L6g5 zhv3PN?~qT+pHn-NJ}N(a7~Ak&7JS>Sz+`osFiTm1oV?JBZm&9qJaY#Lvn#1kk<=rCtL9U2yb4g&ft-0*XmFvtbD1ouU zR2CPrm>vF;Lt=ws+3WLrn8%_(`l9tH!i)P;y%iCx#nYB}+jWox??xP4nTlVPMuKZ` zCDyrH1V&E+(a{8Jh`#(3JqdorY8vgnlI0!o0=;*u2h+1f+FivpFN-?vRV%x6%l8o{xms z8-_4tQU>JhScm1hLLp|$Z5%zl7I&Sf6MDxNW2>rRFmy`^E=YJlG|YZcrDK`wQtecR zzOH1WQ$z5_%K1!wqYaDpn#~eb^-;0zVmx8{Vcg3d}$h=M|{WS0w z?q{Py6FbjRhhr*YZ$uUkos$Bx1!Ex1;~4qA=^v_iw;rpcXOXSz^3g}lBJ}m(TUr$7 z$9_FOj{n+jC#O?v*o%gVXvqkb`Lv4K8P$GzaJfFv%Ng4(KN&dulyE<)46zc z*9{WKLg@JCn9dALu=97(W@f|3u|OR|Y<1vm*>w9H5+n7(vqw)tHYaPPd2!=FHen#n z)jvusw_lasEj~=uPK2}k_Xb3`e*~G;%!RPn547W?EaX0CL}!5!H0u6`1~s(f&d2eB zWA{TzndL27UtS}usk%pW-oB-31{O?KBMz;Qm1ok93MzYFnR+E2BboP~lWS>j#r$iL zpkV!)ybhm)RP5i1-s~CZz5fB)v)Y}m*d2j&y8UtOtBoY5LrVN^GrD=tBv!Vgl;%Ef zq&GiQq6V`LiH&z4TGB99a%XZAdm$%?Op(ttTKgY;p>05FG~bX6-77-Yk`OVQ8jj5B zj-iY(5yU;*m2UcSgeX*oE!_0Sq|{~aT6$ht%U$)kE%=h z(ALdySkH1Z`~9$>?8K~x$Y|VUd_9_D%O-}P(t*2$j<=Gk@8*$ifQ9! zemsWV6`v6HrW-G7&;%9|jw4)sA+$#a6b4Vjf9EcvX(KAJbN6JJn>_-IyrdsF@zdjU&%ew;*o-r3)1_qgc#=b5WATQG+dkshK6ipvq$xjD3b&1;Lmxi zwPi0WoTow;Jl1F78T*A=`;+wiuTMg+t2SMqQi_Yx=Hu#3uhG8)n(zoEK>qL;fNj%J ztjcPf6i@NBHaj6nbt|~$)IyHa03N-H1kWu52JBeP2g$_p>)&MaH`5gP_T%GuKXYSV zf1nOO)k2PMky#GsOrOJpkCov1XAJ-Oauh$wHI+9#G=zUW!D zDpdRnM15z{@T|Fe(6SLdI8f#<9#u$@GI@uuZ=Qj}JHDX9z3sR;-3GcR8K8%cH$eQz zB4~IVfgkmzgJbDh>^|O^AbhTwN1TSr)`uRKyo}mZ^ zdyh$U4$mY}q3W!ktS{>NJcjHWmmvBy!)d2Pm9w3A5nD$0hpMJ?kUdF`?@9AQ#Y2o? zt?m-=nNB#zUb&xkQUa7 zys=z(f2bUXPEmq)-p8qo<~6AHAICpkQ3&Hqi_sC&wXkN(Xb5`pTl((KY#dUdLO*Wt zL^;_L*ldMjx+D5N{rPtcoJ3Pub>9QBV!?L$eacBJY>;6)9Yl{;lDc$y>_BWVauoZ< zi!P<~`QWTM4y=u8gtyB_lm=V~0405YSaV#g;ff|h`}mS$>;>Q zq}W9+jZYKQ+RmUN<6>xhmjPRvyJ^dZcr@qTPx!m*99pw>1Uvjl9k=le23qUD-ohX( zdt4hPh-|Qj@p*WRRRcLxdzoxUMs&)}P|9xHAQewe(ZI!KRN=V-MYH--%b{*8zHSOv zl95EGK6=Z^ubjbxV5f|bZZ!+nz++Q&b?SDoM8cmruA57*Jx&cO`J2DU4?H;L-@tx zw(-e^BYAQ_j-RGBncrgQ!Rt+%!WUhg$j=I|0w!z77i~7>T^9jgK5RJuXq+i;bm1KA zlT#LQF8`4(xM~5T%|AoJI5}bKfE`G+us^D=ON4tyNo8wL5)RuU@|6}$fcVd=q=&aH z6T&VG#W&U!2)jk5@qz_8p!TH<-gF8uuSXl=M-77EH!R_KxhKphnE-y*_rR>c)qwpx zU=gYSr@ieE{?P~T6}b>|muH~Y|9wG21v|L#IRo0XOi<5IUx< z!@m6aFuhltQ=xe9e7_7}wkzD!YJ_<^F3?MAgpQMh;DQhNh;E4lp|ljVU2hg=J2#VDdd4 ztf;VquCT45fA*(R$DhXb+rM_x;0ev*96ErIf?Q;+bXxT69i#~_1fn8WENRyheQsW| zY}^_*R@-LFwniPLmn`(T^*ffco~DCT`ez>f_sD||7?4ZqG+QM2W*>d8cuVZlE#N|Q zbA>gU0m8joD@oV1J?MG6Gt!?EjJMC6f)5%UqxHgC*wD8Ynfy&53wr7?sF{d!rd2r0 z!4q678Q$y`g1jG=A+0O-IA8UxlFP|$!sf*7SXsFMnRaLqy=w{dplPNwdZ#VgtTPvN zh#eEp>T%R0>W(z`$rY-!;V>C=YBK%MX3q*ARI$!257~^fadhpbfy`-j1XJ}+WX7p7 zRJU#vOM7R>J{ssTCELB?=N8AV9!E^IK#Q3jJ%>Yb5Oo+VdTu+#`=+H4-Q(;-)ie!6 zZ{AKgpDT~M6XwC$mVe@No!&`_G9*U>mK8e2mj%P*3kb<=5-T_H`{;{Xs6L8Y0DoO>Hidt48`&1&K^AV?A^4_kWJWWobh++jLAFdrs8&D0 z>b4lt-uN=9XR&grQa-Rdy#~yEm;qg;bBOT2=df^pk=MiZ(A1wvSVezC*Zw12fN|)wC~ZFwGtlM`Zo}(A(EGa@KT=^is$XYVr6V z@z0K-+1jJgys6r3!-~VyVRnL`U2%pwS=q3GV@A-ech=C*`z z;3xVA_p*_T-Z zi?Flb7jzoDVX&2T%{t_R{UrgJ4VoCM=C+JpPQ(4%DMtrdFjIis-80?U-NGNy` z0kb}zgfW%o@Lbayu8)@oi-OxIT+SQkM2Fx`-F=wmx>3b_y3Ba6qi}NO9#H7k2fsOU zBs+_yNVA|@IIsGQ>#hAOq+?>c(LNe4xO)~`3{=8# zk$!Z$g81DX{ULEYe@v?QJ`G)4N~vMjB5E6~%Mz!LB}ZFt(tC0TNRTLpED4Px^1VOk zuLfPz*5S_{$f@E5<}O_6QUUL3RAn7Q2IBX;JD83w0gu3i@WR7}2F+E0^ob78?Wu-u zkKMsuK8U1Ug9|wSz;HTnaS~gqvy*)nTxethqP0y%wE58(rvCX53;Tb*-;~hq5fQ9G zGnJjI-$zaIx3Mms()6QFY>~n|GB9~QTdQ%Hw*4nzA1^17@WrWIpT0Y`7t*oM<}%b~ zR*6!&pW+ABN^oc8APC&9K&zXN(|gsKBv5t`w(u43ODi7#m4uUt3w=po`9-3%yOG?G zFG0Kz^w5lF16QNRP4= zo7V11w`nh<2Vz!ps*{~rY{PG)q1;H7uI$I_mOrMiPt3*{U$5Zc(xZ4rjrRIihdH4v6um z6Zq4e92~xGI7)15AdW#sSYnuoQ?vV{kB?+P=T|sx=MQrkSKiU{GqbTq%yvA#p9`M9 zpU^QZTQGRtpSWJm#*xR*Vwu-Jr25=cV)yw!p`Vz?X{kD4zh$-@?p~ULtj*+v%Bd41 z;R8;f+Rs!tE^AGD(mvuhyOJT@>p!?w*8(xB=V(XiY+4l~52}s#an8#hWMj@l>Zic7 zhXHC7mkxs3onz?O06Vt7Z!scC4a6<%gz#CbhujoDbYR*jb}m|< zO2<5=57P$I>aGp6$%%=1%t$Jk6G?6NV`i}Rio~+!w4gA^k-PV?m^)ajK^^bRr#IGG z(THT%GPN!H$s{NscPiA#k3ZJjl^Od9N0j-SZ#VH_La94f|CZ&_5>X|%lZh8Jdi2!mRI33#=3Zwx46r@uLtMj z^&v6*8~&?49+d8g{09f|tQ)QfcP5xZt6wZ$t(b(bhF`$5e72$D9Bcg0un6^@9fY#I zWAUH~KSaLFd!!ar%q_6=!~a}{V_V#&$K$7i<_aBKWN?%10<2@DkG1Q%h?2)|?!WL~Xvx7N95>-6 zaosouo&UNQ2XsC_uVbf^nK)l`h1|pqIeU>$AP2WPKLtz0sO)G9#Gqy0?Q1};9Aj(w!&vBWKa6S0E+5c^$J##yHeCD93=@#@j5uuM(~agg~cX~NDR*DnX# z-8sd{D9qr3(?6l;>>Q+%Rd4TTnasUZZ$!&v7(QD&0`)YcU?=|?JkG-jwKYuRuI*BQ zmp-{TZq{J@SJ=<#G>rtP*~UHf_9mo!9KJr)0^Se+=W>lGkSTMn`ooy42h6yRAYX>f?`g+Y6+<4=a`>7;wA z=(g7_Jf*4;vxQq|$DEt^#Gm~*9&4aS$3}}Dm0o;!{XwjhKxw_$mGm9-AGTQ(gxCMG zp}#DACBC;~(VuHhcoII2A4k_q9DA=wN)p4c-iZO&;ORw)$=69pQ?SO9RC91nZ48$u z>xycHVAK}(2{knt;I&pNpfko2hJ`4CX=Ny0UQ>q$Pk4zvl5gT0(<1QJGYtO}y(1z0 zH{jS)rTD}+V>q|^E4B zZ#o*eXD`_s9xoL8|KFuOB&xhKy}1Iw{99$z1btQZ@O#$>dbE1XiB^GJg z#o^uMF8J=9p>ReSp|B@QK&t!#JMVahU$78ZQC^RummMTcszTKSO7a znu%p*+>;(}jz*&=jKk-?%86daMH5X`lf;=_Huk=q%B^rXFfn9C8Kh3!C6O$s7S_A_ z3&|zfuu{dD3|O^Spo2=J$KOXl?Q9NYTPEWZYcqrmdEsE2{ul3FD*E#B%7x=S6gC~t zgy0)H(D#%BcliyCnLDUsvlf+b)6&)|u7D1Grbm0Xe) z_v~K=lGU@k@TeyaMEF&QjTR_E;j0ZK>)AvIBYv=|NR5P7D3TQ_zlC1+4xuHXLgeuD z;85*kicCi});~1um@-@Sn&Sv#CCaXDvN~EABtQ&%BRA z)e(JubZj)JDvaYDCobdz0;ljnLx%IN^IdtbLyo+~k!%S5?#jp3&)^^Ry9&G(%`Nj&lEYF@8CuK*&}B1L5VfC{#3wD5I&MA%OHcHY z>IpOHJdY*N`(z<(Uo?~cDEo!i_8$ZN)4j0akzric1yxDmlN_P>{yZ|b*+$Gt{1)`o zhYLo%C%BA+F(@)T9qoRY#|=;M~zxbTs9o3kvPW0eG2Gs7E}ecY_v zHB@C_5t(f!!Ip;Ubp7Le;w5t4YCcPFu;);!c_Ie4Oj3f8`X5Q!v6*!7q2(ZZWDcC$ z+$as$u?b~tZWA`@D&qt{0c-8{M7J`&`f3*;9auB?7)tIYk7o3TRhsYPb+{JH2xNf*OH&q)<73Hb$O#3OB`SK1L(PBqA zy%o6CSqYLeO~6q@(az&i7?>uDd-KY_)UfCj3(`wtihuSp5IYjF%|+~^&sP@o^aLAn za|O#gd6=E>D`Kyf<}n+a=S=qQpmN)<4b16kA|nzZW`#RPmn|i70TZ9@&3#1w?#P10C`Fq5H2t;l_!-NweqTlp)VRZ~IuD zKj4p_7Td!^mwx=!aYK0B@-C>oZo`jCli`~RvPE86G_Ro_#0!plK&w0e*SBne$3YLk zzcd*n&(7nrf7vkR+!D|oBf5zcwxK0n4v-p5fJB~zgT8jCu@HkKr~@s!S&Az@PR7mI zvuN#wfizRxd2f+#rPWy?J7er1GX0-~NFBug<%$&?OCLa=wVQ+6lUZcjFCMCv48(yN z)wFW)NGfNb3FKxpOwQKD!)9qrwCwnRr#Z*N%J@6d!!t);X&qieQ}Q>VlrBTNsuzBC z9ogT7Pn-5oZ;NIj^NFRT(Cstq8lcU)Z+a&7e-(wM8(GlYcnIE1KZdpH)W}hP1^)Gb zJ8)&4LVjPU{4TFc+dv^qJ(7i{D zeP__kGmL1(<>h32|LNrZky!dOU}aErwQ9-I@O2iLt^vNdGI<*IyzYvB&&@+WP4nOD{QA&T69>_e zw2I4(PsT=Jg-FFElhcW?1*Bbb;Nr9j4kqM)e^PwWT22_I&!w+L?e(ghdYQJbMwIBMDR(?7|r}b>7bJ2X- z_PbALPFcX2sC}Sm-=9)m&rZ;|hq#XhTgX6%#mHfK8_L=u_A5uVvq)Rp^4%Bam)A6# zmQQ|rn;950Gs4j*M><}YWB<)tj=>O~fIG@1=a&8FIg7uoQs z1Iy>Eu`i!e(8kQ2Et&oXE2cEog{g>4TwQIR?OIa6R-gaE&5r&-vQ192+7&z5+ie%A zz;w9Ig!}BCLo-WOuVk@1*R!tYjbxd7qHt?aJ>9vbhQ%Hf@KNY_YuEpZC1+2X|hiz_2Y`v$S;k+SFO+nAQqI1>2gEX#5J z%x-6_V7ghunBUAcR+*U1W(L2dnsS3!a7;Mq(t1P|I4+~MFDBE?`C-IGWD>|nVzzm> zA2n*dE$UIJ4!bW)Oupc4QSpWANiyjum+8&K# z?SBH;C9MKB_Tgo=R5^{h8=Pd`7XO%;yeIo@zgZC6>e-q1gn1b?(g+`8p+#igk6#+h z19rg-gs=LH} z)%_^;@n{7*t$UjJWGk~&^90sqd59798&uEk7fpdMHnlE@z4JLkLp}Sm27@v-gB!(^ z{%Em5S~X%v`VcZVS0{1BmbBPUo^GE0i<@Z>L36ts>9)LTa!}V9?OJq+9)B+OuWyQ+ z9f$8kuJkAEb4;M?_MDY2Z{eAxd>PS_T#Tz;Ef$TcAMn8c4Ei+}0 zzFieG+l!d%(mr-l{x)43x}+?>K)L)<{w;R;$4S!h_oe;p274wM`;8jx+eHgM1k$o; zS7=1^5$<(^wM1dfak@=Qo=&-!g~I2A3k~~*uwB7Jm=?$guLI0j9`k0)GF6!0BRh8Q zmn=I!Y6Mj~Jc?PqT*CTv!{~rj?c~n;E-D-Pl)jp{h$e{l0griJH2caA+S)jP4VsWh z-TS=Ah9)0UabQ0A)z(PE!XD7n^VRfsTsiH(LchVCbtLUXMD`;%hNT$%U zl1idS(bi+$RBuQun=|GM8Ug`A;8K4tiGQ1~<&h_uNln!!D?n*KPAG zSDV($CdzcOxgJx?os3P&2fy9RY96!-Q-7P&-ll^r=|xk*IyrC~8@VEi)!KQ9d(f}ao&aYSS2d7%?Y}`6#y$}G8HSO?+)*;uFO=9{ za-m(luV}sROy)cIHoMoqj@4T1WP=;BnN#{c)^I$OISGm^Uuw+0nz^wRb_dvLOMN8$ zc%Cyc$-?cgjghs+X^E0*?8qq;=b6D*&%+-CSXfa{2v=Uu@nI&GHM~%`9uoBdQkELvG$U$DVxr zLqCuHO@|-1U{ee$$i3v<;;i%?i`jmO^=3N~JDU#j>a04l-H565ck!f zf1+PAoN?pTyjLr|E0r{e^z+J0bLju zx(sIo_ermhn?N4gAB4XX`k?c@9v>#J&40{^!#!8xK{BxzXk!X=y6l4*uQa&zLJjik z<8Xv-1gr>3fVYPBVCy;r*ySCNRbm67>BjK>ej{o4@(ro`|HGHQ$^w+k=GHyj4>P^b zLgdZ!c!2J1{CvSd2*08teQTg7o?An(N{TiLDH=iU`KA$zOoX-DZjf<#XN9h%n#64X zTzFZgLoctLfo?5OqoXud(AQI@;GRtbxl20kbk)TsBC~7_+4uRN@KIw?*};}qr4#(M z=xTLNXdNFzJQ@`23on_{o8xkXI=K`<)vQ9On_EcM!U$?|=pX0J=h~;ohA=HnFDCCc zlvGVRCWIy|WIk_R(0xyxsH*O9E_n7!KmPFU)og{tT>C}19S-SV>2O{%+AnBYuiLShmOt^&u=)B$x zk~_bTy6ktPyIUTRuI;+S6}6!&r*+`|*CRNF=+ct^d|_q4QSgmSmV~x9ldbLd$rG(* zD4@0rsf0}6S;XlG@d-- z!ZXUrt>|)UU~rBycRQ*YafzJ&beQIS3LrzLp2Wso2ZRCBlS$yhV0JU+4BL6R)Ba&p zxa8MtM%DP8tnN^~q&6a1ND8=2|HPi73auiO+Bb<-?nz*jn*ygs5z$$Lyz>5L?I;^o24WhL&sBZsS> zuElE##v#5l3>`dJM0T#~;hx${XkKsvv3t5gD5rwt#sp(r`Vr&O;Q^@2e~JBg=NH`B zvOC<0t&1eB?$@}+BZfF*Dv#$Lm<$cPE!0ep6lap_arE7{++jCcth)L*7p?GFP}!P9 z(I)8_g{Q2X#_xYK4kVz8WI<^ImGCL)Bnr{c@yBdt=dBfw=hM+T70Zkoy3QhkS zjW_is3wdKH-h8V9_uEgAxGus^y4=B|)e!E;o{_qDWufzP7jP~bztOUR1;W$5DB+XG zBB_u1KF)2CF?7FC6my6BxM5+k;8SG=dn5PaUAnoPhM_v>)%#)N2${AC&W~FHjS{5p|ZOTX_s3Lh1JUJ zUr!b-J?lxcnrvuK;$w2*x*^RQC$ds)K9RrkwP>93INIu^PZplvM_a01kU7&n5v?6+ z)OPX%vS{rs;&9T1I(u5u9=o%2zCk0cZJ)*#5iRyMT#5bry@?Io`>>q|zGgwZ4DkK^CSpErGEN&XK~ zTDgJxE{&uC8$D^*gaLH?(-2yfw2P0TezyL?!Y~NZW@L>$K;}* zL3wDy&aKEUX_T<^qAOlxIs<#O&iWrkXC6q^_w`{3AqgQ#k_wd&;-01JKkq%~p0n3lpY=SrY_t_; z6&xg)>u=A?sH{Z?=59i#e^2J)zen-vp*(uEydKS76paqe?L##h(!xG}Hn(u4B5HmU z&1=nEA*wdEN0$z|6-HF!D5DFQ$c>HbJ zE>3wvA97x$jIVtR!eM?7@y;R{e$25-Ixx2zAB!o(I&KlxeW7jCIpQV%$MYHXZ`R_q zVhBwcGaPJ{Zy+>QQc@6A)TLBk5dg$Le*y{f^G5NAL$B9ggOe<08IULu3O*Rdk=MEBohLb309 z@Fn7pvPz!?qJKZIZf*)uD~`a@Gf!jVhCiq`As(MRq(Y{qy~8zk z4rBM$D?;C`T)+bT!ao0XqS$P0(p-9$4;w#(chmh>Vz+51$?BK~QWF<4?XFl>GyExy zu&3ag*9j8Oanw2aA~HujxqbgCk!$;ab*fzOUB`0rLM@EQrp$E@hu&Nun>`B2u1l_DSNthFdinsFH!+1|?XVRO{V`44mh_Z--6zbx)})AM z+&UpXPt3*A&+^FPA47?Q$`IW8!I4FrK23Z#1afX?ilEcUo(wyxPyE-^@rOh6g)`}Jyku=RZhkrsoTa+(T{uSb_ME56U!=*Dz5uM-w$!4%zn%{=*vS3T zu}ACN(~;=|OJX8Fj)qpb(-yBbtBlfBBrW_Q7JIvMW0bwAY~3AjyxT;}Wh&v{l}b@V zMjBo6Yd&Nce1NRJDx7H@iZU;sqjra-@Vu&Zc)_lR;LgdgTa}OD%A6&9>dX_IQq@1a ztv!}%oKzsCYWwiHT116TC55s3&!F-ud) z0reGdBwLeBI<^EH*GQ~em2=_AyQ{2D_8gd>A4F@nO%_KU@F3$;9-=Q|OLRW$Cz186 zCt=OU@XZ%bY0J4&H2uLLtS)dkX4b}__c7}9{TW3--4SqNoC$4FSpuU!WrIQMS^6#V zBu;m_Lec|N#oKOwB1e++NUOtriX02c=c&D<%3&6nyqDoT-Fy~rz#1)#ze{U zN#&%?<0tKGUcsLla~wZ9T!H;(O@V+dp*a3+7JPRhP)A2$b$d5#oGVX@qI^lfh5~vd z;Uvx3mrWb)%;t>ihro>sT5PS_8`$&gCpYTtLXoaijKrxv%sQy$WjC@kz++~qqSbR|srZ(i{&s?WT4Ta~OdnAidboqhZ<|x<;UizV zR!OTg_S1&aPIb%D&AQRd$$kVgPT9g5x(>1Vwrkn;zpGiix|j_+FbJ{61Z-FDXX}J` zy=(9i_EkI+DvH!uUyL1dRg#BklcnkH(RrZWD=>Mr&Ow^fN|qxN4_ePQQkhAc(W`np zn6*6+4#h>m_05C)oF$d~u@lE>f!;~pjF+cL$|ctH-7Ef(|9;AOctgaIInbAnz~?4{ zlwu(pH?oZz|KCa4s1QrtA15F!7b&EBjM5EuCU{<_IX+UT2EE^Hna%#mY}2Kws6z8A zH+heUjq{hmBZthR*Uc12;B;H+V4IE?-O_{!>r6c#AcNO*(z1(4ZCV)h9N&f(&-jR@x}U&ZcHY>w zv>II>I0?opH&gvOp*FW6kl;0j7GnOs`+DC%7;pr7gPt|QYM1({g0Hr z>O-UEEyKf?oA8~L>HPgy8t~G49QzZylifbC5xRRDp(-m_nRG?f0hM*G-mbp!?8*a&pEj4;h+DWiE2oMz#yE6o==)Z%Veff2Q(5V z6_g-nwV$*ip@y0(%);gWy+t*>8gMr(0(tH*D!FuFH`uLz3^ji(c&Ytm{G{QDpi=0B zQhwBdQPwrEU0XqaM~{K?X|AyHW;SdKlY*Hq1CX`0fiP>_#CkJVF{QIg;Hq}4F=XIJF6$6$b$IjC!d3?)Mpc=Nl^RTN^$H^_Wm?IN)RV);VW&W4O)RcYeTf&%-v*uu zD!kd@7Myis5Vt(K$(2ThW94VzSgNVo+GfEs!LxCU*P9j0DLBiq<=e-=RAKHHZ|A@{ zM4X3!gSR9TT4eYi2eNp3y@TAE{|zfvaB>&bO}@!T+!#VD?w*IS3g&E?K@jU79>F4< zf>`a~3YHwv1B1&kJAah0l5+{{>)<&ycl>JhuJtmzEYM}uylh!`u>qK$&Zb`yhQJyq zgrcp-neJo{oH|ip(_UD@@=EKNN_8^zba7`n-uqee@>g&>ZaS@~8OHXd7r{t+lKtL0 z4J~PrN5>^kVPSV29F91I0-kP1G4~Sqx-na=PYOP{l}CggmBRqP_Q_hp!7Rhz6@Ov}Dy7 zxcjdV|NJ8hd%8tPE%G{&{~n2x&&K2DLz{4Y)<=Bqq&?OeHw2y~&Vlu59^kfb2f8G< zYrcJT!1_Nd&|R}m^hxR)VjeETciNj^`Bz@!j~v%>1FU@Lk0Gi6YdI7 zL*=((`5K>4T-161UXt9 zLK<>TPbL$K-r}y`j2)*IO?xq8z?v1w~d{#BATr+~n*w;hV$xK@AavQ!ZISKz$E`_r{Peafdj@c~GW68@0 zsF9HS3Vsts8|U=#yPlilkTSvtW&h?EZu7^#zPV#XuTHulDIb@0NReN=33l!q&)Lnm zg#CgxV1wx%5V7D0H{L-S9WL?4za9yBr*%t_-Tq{dOFawoW?JLZAu70Ox(2!)^OwH$ zd`k~2E8u>c1!%9~F$h_G8{BkFcsaQnz$Y3Adnn3m*0l{Vaz&Bwe|ZCEj{JswF6}hy z<1`^(+W>MS5|OV)I(=1i3Ksa>hUl*nx_udfCu@XVv?T-Zv*SGjWHy)P^$Zcr%3h9&^KMMsG!HYfRC^y7^#GzMF0_ zN`YdlgP>*bf~t*4g0JPzVT|2Hh>eS;?Pq$R=|cjWQjx$i%8Xg9MFF!*u4dL}7Bf$B zhNUUrVwT)FX0|bywfi1nCmJ%@wB8%+$ExQ{cUwBUzik%h?Q@&k9p8wninTP7aTOC9w}Znq?Vqisa|Fk8dSQ6 zqrTt8N90e@8S?KTNAn!)nNbXT7Q8_6MgO7Y5tBga#eT@!EQiZZUXmpfylM;1r2u0h6_e3K+#bO z4edP!w~7#a%$NvoMKzFPLduVu^6?+6R{rxSFU@XIAjEAsT z?NHfp3{GfFW3QA(vy|Z1aDaW}rGAZIH$Mjme&a+o&)J<-oWBJontvfB@CisQ{ahTO z_Y;D*7Qpe|TC{R|7W&a#3cYg-A${60lofwOQtmW~jq7z}#@Fqc`iGbBE8hVoFY^aw z>HBbDju~9v2hf)81QkF3LygmX_$G~UY@J*!T>gHPdtA1R9&wA|x}S&AqlZH2q_G{m z##(23v~n9XpVo!P_E(7~3!}ynWdf6RECHt=V&##8@s4R^^Yq~`bn-cL;}3)TEk^KE z{h-9jKMXz14}&)OW|9B66e3@6OQ^fk%aBuiMd31l6*AuZ%UgaYi@| zF7P16Q=Q4T1;e?Z{}%EQv4@GO0tUIk4iq>f4s~YE;m4S8&|7W-V;AkimxX#x=}ik1 z?=6SofQw}1j03cLmnayf+4zr4E`^bH^WyEWS#yn$rN{Xz?+J~#1<%X&3SpA2$%4A;;*)sH_1k1*0B8vmGCfVW)8#jZJ;L|;<{A77h`raX7X(lKlB6xUc`HzNr9 zIK+_h+U=xu-FR}UT(~}#=Sbewow)y3HU4vF97_13!alngv7QYjoZj+j;Nqb`f@cik zW8y$C87+r>deh1KNp@s+!&0oE^^;Z})bxDrF>b@iAl&f17_!_!Y2UK36AhuJOMY6@wc%U(iWJG@EG#M?JQGNVQTcwoH9Z z+A9LMC1q7)-dZ68sehTI58US^j)Ey(vI_nzm2h)A)3Ch6n4zDG+3zlU7XM)p+c-W0 z4-965`NCXa4<@kRl96ElE|S{qI7M69LPS}%t*Cxk0D3IUs(OU$@Jaa#-jsL~)-O)S zgTtjrzQD!w{}IbQQd&tC{`V5M2z(`12Su``b|Ef4cnPU!WmEZy?y&cw8FAFug&lqd z@@$kc^QnRrm(1)L!0zsP^ne5A*(qG9i!D`I*u1wC9qd?dwaJk-*IPYex*#`%3oCuHj#O zwq-8!w5j;?Y|>u!5vhK*WSXl3vCEsSkCAZ z3bbz7R!(L0P{GOa1bY^%T6@tRyffq>mgbw0WW;WMN9#utZu<`N&s2%;j~D#W|76A6 zZ=bbJ4j)ZEOw8pmQ5I|L9LB}BeWPQQhr;L7@x0N!DKJ`k2jmMp_VLk2AoE2Ec51H1 zeuuT;%%x^(dG`=%etnwIHK*~y!NV}^g(vD4I2Ybe?a4&_ZhU{!KdQtXmAD&UHHgbMWA~|qwEtw?kLss53Bq!bOlb-`6q}ZYY%XDqPalQ}9;}0by zcJ5QOJTeif`QAnGRtbXB&z#gKi@6ns`Mgm_67m;VtaTRzkLXMrP+T^iEn20>P{t;# zH0cpnc`z2dA9+9&PUT(7=F@@tER?_Q5*jzY2Cq1>l<1$=C6j+#lzg<9&8eS$YwgnT zt3>AgFl*-{@36g=Fu$1*jAl&!E1a#(;b6rnn6lRoA`X0nz>W5FsP9xf!Se{(GVCd8 z9DW{R#|SQVw;jyk^iNp1-ibXKWyc)!-I%PG8-)F}XOaG|*erp^Z@;*JJx@5m7QYN; z756qXoii_C3;brxQNFaK;Rd_hmJF#E^O?0~K8ubMnofp?CF;x#j>nyV+@wU%y15m$ zFEs`^Z9n*v=}+SZ?ZLX?IxS81rS5}gMbGn_1fOvk9~fUJQa6m?(^kLY0`5)Z|7b_^ zdwRce)s3C>xb`I8e|!#qHBERA$LW!Q205x|ae*f1%93JTHR3h>3HSC^8ZNe3jpyCC zLON5K#-J_p}wWpck1K0m?E!GFo>F@${x6U2A=}u=*$6%?32E ze0FoqbT%^bG@CRxls&h&$}&DHmR^5S#}e1gW^OjC*x0@UP?H!5Klcxy@^};Lg%E?b zoSF*{4pu<-Elcpsn!@|gL|EyO4ts9uV7*n_v8?1a+7%XxHNTkRJRJ?#;yV#^GoOH| zFprzF+zRe*j|2902+sZ_11_zN&@y}uzcacTo(qgdzd1sGccdcj4D#Yzx;OI?mHQ!l z%ur@1tp{T3fm?e_PJ$w@!t%<7(8XneB|E&{W1~Zw;B3<-x#MxzYQ@E~_}${y^f7%X z`7ql~k|9`C)z+J_^=Yf%w<#(4@W}xig&zm`d4{;L$A(5I&E@*P{^D1D&7|{&4oJc` zpTZ^|1c&*aD9J||LoREf4R~uQvfGy9z|2$d(F=KJgGJI1HdY%-6-xN}wi{H}L*V#5 zxw>+V2T!ER)-Gl0rT-)vti4pdiqn zM`@Z~CY8T1muuSV#yz_59UL@nqN($}V9JldYJVXonN zEUj0AeOf(?U9fg$4;2d8pVKL9sedkWZvr+T|B(4KB(T7qGwezD8CIp~&pZd7^4Weh z{Ko(ne3hMIw=UZA_a0~P*H5P)vlX+LH_M}H1tF4Aec?#ba2c77wl98s?M?{|xWy;T zPonKd-k|Mc?_tdu?ReaRH2mPwG}x;#6AJ1(`O7vT^pvp?Zdvbyzg_)|Hp!(3iIgDh z);=2#Icg2s`!>?D&S*6Ig#$|bC(r6WyaBZp&0xBv5l%0Qf%ued%p<}a%7pXz-0pF} zMiTb&`Ee+W3S`!Ov)TP%QzjL!>(=-AUV($Cp?LtrkwjfqK9p_U)GoTf(xSepwRe9$f%B-HRciB8%3`4&z>&^uv3)P9fV~ z0&|3UUr&uI@wK@EpSSn&FM|vr^L#SasT@i&#;XYX19CVv+!Vgl`csGC!<>N0!OiAD zoWnB(ev`)n=$|j_lKu`y#_u1%byCf0dkR5+_a|;%h6URsyOKFDi+8}x^BjkE~r(oxHUt%`= ztE6+j25%~kKn|We$R}MjRv7dH?z$^Okg)6QT3v%1=gvnh_&qwZJ`ML?l*ThJE0R6y zdXT(RDb;pRBUyLX6RlnQ$o9YE$Sx~s9HK4EU={zv?#oYc3*XBUSR~EJ-GgZBk}Ife z>Qt7Im&=^z55WWWPtcF69Bxia#%JS>W1nwpQGl{F%RhXPtv`Q)J$p8a#Z1#;)dSJ& zT6QMeKYRoG`(-%0KK~@k^9*ONjLMl~?G<+S;~i!_>ljn36tT9~x;U(DEa?$?g~zRf zh}86@BvVZj?~+jQ*&HQ<8)V5!h4G~0S0T2PzJ?D>qxk1fjHe2` zEWZs5mqnW5D$bZlznjb)pH}d>3NMh7qAdBO@dO&<)^K-MoJ2wDWt?y48Nm@Bf{&+Z zavuHT8Mn`YjEoz z!>H%$YqZrR5sFte(?#@>DC5^Ps^?)#?~hKWgTHjJ{<8J>&7@Wu7W4$Ynmz&h&$i*d z*sef_@-&fS9z!(?jli(w1ozSS4qg55IliC2k;xgGuxN8V`epJC-aMgNVBig>{{5MB z=j4;{=Cr^=(;vmY6pdjss6JcS;Yj)yWfE=K7W`LT2FlMciKpxT_)ib5gl2Rrcf(MF zb=4M#f{d*B!x@**;IedH`g0&3vB90ceEbPGb`JsgoJMk17GoScinR`l#BLKVBR$Q# zk{Pu+f*Yq2&qx|e+C3bx)}zPt$d7PrRc44=3VTF32M?g>AI9Ly+$$|tGWAWQ`oS+be!0EgR38SjW^x71?E3vanRyy6p%lZBnlpL@pYM!vST&H zeiKh%W#?Ax8XGHcx;=2gg4NXQMFWkSF_n+mZVVNzr_eO@d)W4?8-4iQ3KGZsMSD1T za=EFT3o5&5ZTVmu>QeQ?A%{}wSo7PoTC18O}adVDY21@sVRAu)(%`r1h^D$a!^izi*+?@9hyO^Y@UXr-@IgyN~DIip1lz z!$j$;$KY-D-N@Na4yQ?Nvff`EOqLfNA|D1O5+99CI45H$Hr`>2-R4dq(<=W7-~AjG zn6X zyA(Tk)Am*Tq04D>(G^peF|3^a@B16Dnf(xQQ`S+HAQ^g~VLWssEu%iq&Vts45!n0@ zqD_W&Y{R0-Z0Z^{Jo)f2rroND*1vs^8n5IbQOp}C$gM(IBZEb@-oGPo7&OLCu4Bx=3ChMlmwT2nSf=y z`VsZD!g8DmmOc6fWu%&t^Eefu=*uNaT)yAM$3^XtJ_sM|fm_HlZ9h4)52cCv26e z1w=1ufTYu&?AJyY_GCgQTivQz>i+gM+c;_nySrPpG}}d^barP1t2!)j$0{P&P8`R+ zxNT)xdY@>w`7zdg`yNC^#zCmJGF4qNl9fkYV-|A*nDYDu5Pfkvdlb*1lnWfV3B9>3 zqL1{|0T+5>NH9J4VGOJ~y9mX-dq8V6rci=+z`VKEc!E?O>>bw!`-Ahj;;nyqw~tX! zUbz~c#K@sp9wyXJa~L%XtE5?qiNe`BhjwLF!|+uxbjQ{(>h{-+1Qm4Bzj-IP1{n{D zPopKNWxXgh6Ocn?u|&o>3~uNQ2dT&9T&df9DrHepQe0w0)sn6vsn&8Nxv7hCZC`>Y zp^eYVAHwAYjAS~qo0N9c0>4nHk$7`Vhf|kE`eX?jA%%4oeSTJ)NRV?Fz*I_ujNcS;P#VhPhJiyUq#R*7ce)YSf5sP{^s4B zWa+gh|M7$U6Z!jV<57gfMid{Z&lxYgh}f#bT>ru=w35%qTKz-F`SKrlw~{BGVmg@= zr|l)%c5fk(()uLH@(TGElugz@JWPsH5{UN4%cLXd6fxVGLHds+lQ{_-36S4MuCA7l zl$~KDKIbpD#p(qoGKr=!%i_`RXX>IFMIA1#atci-9ZBWY---O^j^?swZAZf5N3DN$;k?vVTrygbER~igLk|AnbOsOM_E%rAUuqwA-9B8Hmu^Ry zZ`9E+NfT=S{XZlh{0Z;gR)|;Z>)@OkRbW!)UHWL%6{sAi%96Amqr7H0ree{|erx^{ z9Cvr=1MMpCjk^tcF&vbCFJ~vc{aK%sD%(1L4Qn-Q;0s57jQJ%R1SWL_rc5JEpUJRReoTrIt#r!ljXZDW=%!= znaFS&bK7FW7S9V}`(BP`0mkE)Nl!PV4D4i9%#r=st;&?A9AeRJ2Ux|~hcFuRa4mE% znchakyJw5VDIrNDcGO=oWyv>ktLY%l9ULoOJN6z4-6so^6Su%9?=@ty3LMNy{{GSvD% zj&l-1NWad0`m3eyVjqawuA`u&=oumcNXW%&o+F1YXd^mGj5yn3KO-rI%AUMz+ zPMO6}&Gu$o9G+r5;nG3AsagbeLtf%9UzEA{J1Mll(Sjz5))9FJDL51{9+H&QAuwnw zZr2LOwb3{5K7U1G@pd?wX`Bq@g4bccq9J8?-QHm0 zvf~4h_7Z$1QxR#t@|sw*){{ktbICgI@#2g-O(bQ{N`j9Q@$)m&#D9$U5~s3<*ly|} z(*0aU9K5`O_)S_!>cd*d+Jsu-bt040E{{P{0}se~-4=3iYCh?Hu>}W>9YOm0g>&Am z5%{}tBl7aGA2gIZKm8^wLe)J96)e{Zk>MBu5nQpQO3QCA8eFO%zcQLfii2 z@bTxJBr2`F+}xHKG&fPiZ|xYy+edCf^RJB}cK(i-Z*+tQJ__Iz{S)7w@er1O?S~m| zLf-y>3jO*&3A_pRhbtNeP_0RE+;LOx(IN@wx=)u*UB4XvPF~B`OiM&&&6;RYwH1mi zn9iH&hwxRi!)U9iBSkM&XwT}Y!dyLzzw)AzDvx+bH@p)(oytYDFz*R1$TNUv<9uk4 zjD3l{^hvyUoCq(tatKd7o&YC0r$g?jHPGI&0**D`!cj+F(cVlo>@zO_OI^r89wWwM z@7iEkIX4b&9NmZIU%TPQ3MZk;4d!kp=fpRNr+F_Y^?fU;^>{&~;#9;Y>vKuOtbTID?mdb5C&8CycaTq- z-9+Tqgc>geqkzNfVf;i3$srAU`hE5^BCfm$#ow00pC8gRt=FC$yE#>;PlxhmgC}X- z`>kMnH&&wbPM4SDj4h3k zj4S=IYFKGz(RHR5TFDl!k+6-|4zqzThFH3{kbXO9gG0Zr2RDP=Y_f0;(u|zK7XR8u zf32-zg*VkoH_b9FJ$K?E%Mw_289#(9h3jJGkevgCve#i-ybumq-7Yg(|BO>cjTH3DhY26E*%l9v&K-!(}po{<77k zKSLW~%|~@Ow`~aTIJJvDtzFBPj*&>q1=7YJzb`<2s(=Tft3AwK*xLP)q9r~9D{sqC< zKK(9NwfGqw{EtOaaj5&5CXl^=+x$BR9|F_*2RCM zpQhiY`)7DFePYNumX?8~Sr$Ft)Cnn{Ur^b*3!p`)TfU6$vl-?ij&T&b0Q^etxFxEg*tP2ksEIAy(h z*Drj#B^5+nk0phjp`z@MD~askDj53UB{eS_F3EZSj0L`TWGUyBxz|^TNPW@;cGmP7 zY|Os~rX`9z7wEx07i2NX88^_1?G^1h`-=I!zQ7ipeoB{)jxLE4yeHlb7__`AY4Qve zYP&=Ve|;Py(c3(VIz&9=Y!hZ8size~%Z5>eT zN}_59Q|YXZH&kuM08Pxk59XuNV990%H34T}*v#4RXA=)?J8r>8|7Eb<^E^dQ6S&LC zR*>|>iymB6O3P2&X9q^>l_qW1Dt&Eyhh^O~DmAvAS{jlvwlsb~v($I}S=QI`h8?=I zwA6O`#L}t4f5+@Un^L`#uBAG4`Ao)iAOCmBd1@u}uHsTt*}$l?qKE%G%P*>$&MwGo zWG@G#Bu$pt{8y)H2yrNY5#9OxE}cXe_{>AU!zf7hT*yRY-m}Bfs-=z_HB0wInL$Y2 zLV>mLitSqy!xnB*WjEg}XXNKw2pQ)ATNW5IUGv3kWML?5EdRmj4L>NUv$KW{jiIcd zQl3543p3<~;Lc3lhxO9AVcnK~XuKP7&n z*RxA$j<^8-8lz2=zD_6lrO~9bN||hwZNd*cx^b@N8GLuhShCpNjjU5?Cq}C-k{=&W z6AiVOBxnpH&NXAO!RS7$KXM|;xhKJ|@6W;kk&8%Ink<=BznQ4~&?ie5x8jEva*6u% zD@0my9j|+I3o~bdb<4k}mttE*7gFTNt8zKKw@#hfH3X5IyZN}>`6OPLF&#bfPQ<8o z51%RzIK=J`_ER%O-_QRQ4f|Xx`Y(0^CpJ0CO~@TjQJaErB?*jZ(W9TkM6>+g}WD$2NP#uqxQRioW=od{%Pv&2jzu_+jokJC+ z+Gs<`PxR+)I-dKi8IOGZA9i)-u$4~)PW$^64;7p;EB?EP(f#juT#P%O^g9l(t4qXv zms4<-qXvFzu?n4jp@X;leu}sLmw}|rtF7KxT*Rj*HsLO-P}ID}76<0v$6KEa!&$4B zU>(id$YD(+DzsmVEVEyuET`w#!K;=8%~uq!FxiFuDjZ1|M@Y1nJ_(X}Lt3(U@=hk4 zTzJ2L+?v=yb{19>uF93%+O&iycUj<+1}|Lgi?PD+y>xCtnee-h#_sN_NI{23i^CJC z)iFD~>bWj-sf`5PmkD^%&uKVEuOG#{dxQp@9w0wS2-=21kkZt%*zMVL{B3In3U>*? z^GZ*XH!IzmCS zsa&$a<$ht0FE151eoqm^FSDh?dU`~`+iqEn8=L~+ZU3WM>I&#*QD@1&o3GKmU=3^; z)4@+Jet;C#r68r2(da|vYrIElELr8i%He!{fQh1}i0`8LA0GscOBIMg9vNiW3mN6ZI-(J~;XWfyJG%dN#AM*(+ z>Cw_bO7avxvsWO}uPTYzs7&H#oX&cf4%l_lhOxpXzO7rV}C26})WgK562<%0~LQG83h}_275yI| zo^-kE7%;FL3bXvZklkJj*y2}3vxbQgOsGVTBOAD}b*|jH-}5-TFKbZDbQf%*uoY*G zUC6obo`_sUeVo3;l%_a1^13t9se{!pex>S6Qnu?FdD|?oZ`Tc{MvCvaaWAKmoPE!c zwW=m5m-8k)v1drU;Y{M67(t?Y^u^NOJjEY>IE!Bemk^utJ>>qB9P+BDU&t&RB)6}c z5ZkDwq~?`2FWq>?z_~p+mmhG$h%d-(#)f zQMf_rFRrO*!{wo{$)x!sLCysl3)9lwaS9Zn|SOGAm%f;_5vWd*tZ zw;0*m+~(Zl?D)s~Yw_K_QcgbRlO$)-I~4XyO8D#~`g7+rK4H>`r8D}m%&|1dq5mx; zD<2La?=w1Z+UiX5(r=phP?=c#y+WHj4!%jk#H}PcR8xGT(^$MyLtf0f){vBE=kU|c zbzRi? z^#yveV-fmr?;SpLdY|BG-Ao&N?dZZ*KjC@lB|(1&@%umLh~~KyV6x~OeQGv>Om_7r zR%@S;Y0F9p792EIe<#z37e7h<&MuPQFp{MGRK?E^cakGZddS#>d&G|$NrJT)F=~s( zdxM2%_m`u*&9YkrP8)~|a+}E`aX6Nkdyw8pS#o%W7hTx76Dj?3gtHBEpe-q%E7!|F zHUf9o>Bt38akCXNtXq(k>k#32xfhHsE(Pt~(X{2jWu&CTsIJlq2pz8o38!Bo6nuyC zQ{5!WR`av`8hDq^E-}H&Eu<*YPrX4vH19EODLTY&6jL$ zK|5=OLHZa8DG_+wkw2%R>e3hFv*#Fc#AY(~vp-uBTXl`Jb!U(de@Br~`;5uhrfX#N zCr#p(gz<3a0zT~24y-*i7uUzk!z=v5B!A{iz)POXp`+U~DO3E5q{&;}`EHS@UP(kF zvhUD6A3orx8MFA%bVKr+P;oH@PGn!Afn|$0#dijD z`jf5Tghzu-?^q}b)}ng88;@|nyTEDSFvh!~` zfe>l&sh;yC^7S`G)uHdv>#hHh!GA`QXRB0j+}RVN%lB4etvj*&(y`%KTO5d=HT|Ur ztE9=FJ%U?oaStgBkrO97PDE*~y%MG0n#6WuCe}ZrPF^Zn6SvG=c;CrIIOS9!uDPK_ z?r7^1P@IW(RSu`B%BDd4hM~+eiMYH-N@6!#hG-uxAUjwI8NcWfof_OF+_&{y%qd6u zJ82tv-_nA8R3q`mku!+lL{oTf`H_$F5_%lZPV%Y8FC*h`4JadKqV}aA4yGUlSj=n1_XiOHf+%uD16?m6%yY7H?q$)$bd%(W)iU`fz zKqm~F2^+m4K&jn@k0CV>^s9wt_b-6F(niSpR|oxtH@L(h8<`7(#qRGoli!C&;FBN@d-WQgj1EQalCjk$`< zE&S?Z$$XuCA=f{zo=c`h$g1xO{sx`>t=?>0__x_*5(Qrf)P`5c_3a9Y$0Mu+~ujZu~?dRzfrqgw*G$upVDJwH}(A`C*$zlFBA zPjD~El~qYO&<7euEKc8o4H&qve9YxxEw57Gd8|xK~@#G??R_C3Kc_7|l8G0{ojtGViz$e$oRw)Z*Gs8<%AYY^g-a{!mq{ zzhV+3$}WPc@j<-PXlI&a(n+P7JLnekbKodG&Q$`XcYH>`;!q`KKJF4HeRvUQc)Qb+ z_l#NE)qf)1^#tttdH~z0YqPYy`}qu|2=uPu7h3f;or{w>z|?lyGB^Rkd*~H46!t#@ zhqplO{utiPdIU-=>teGPbh2wMcI-SG1ujm`v{&~czah1oU(xx8g2oxzV&V*~vlPMn zLV`@rW|u$ zsL9&&+F{2x9|-y;g0$_{@JHb$-2cbmn?eF;tXE^#MKfmd}n%*wL{_ILoi4zH(O#>?58bMsiVVpqw(&%=dz^A(8R_{)s@!M;!=eh6e>VTI!PLtui$C2CRhv?gc$B2pg0(e9>EIyhi65izVjvEvq z*Yg#mmW-!bzR_U$ZWBk#AHwU*sk9}~6gH2NhoSs8B+1YYR_Nx#4C`s6JKTwk2>8i6 zBfi3wy-y+BXD&5z<{bsTG2p#I9Wq^XU}XFWP_H$hvj_VG3q)&Rr}h>$bAK#*?(PLL z$>l%NsU+q)^9{H#YZ+Lr>`fNU;m>}MZfZBcbTA=3o42hpfU~ z?Kd~>y|w}Vc)kFCn7;-WX_ce&rn6{0Cy(ze{ER-gcah~oi}-h!B)leNAMUF6LM=VI z*g@qzid>m1xO@K;5>K`ygBF3zUY>RLaaStzu5%~4&#yt^yb^YYg)w@$ZYO-1WR2WR z51~=07ZklT!RO6!WOFkdHrC#Uk%Oh^<4*;=zqW(ixj75J`_1pZ=TF0iA$hDrd_4YI zHX7wb@XY#=o@~F#a-3*%0Zo}Rozsi+N4fHc@zc>qNs7}Qc;+cV4Kx078)}W zPl!Pq*-|wA_Zjf>nSsZ7Mc`leCGjauQ>rBO3@ukZjdgG*a_~h={HFl?s>Tr?nAS>s zcUxlnep4LUFNgQMng?qnmgB+U|BzdlD%ykNL3mFC2e=vIP3zj(fJ48Cg0V9|jRd{D zc^YkdQ3oMmqv37MNU+^@4K_t(z_b2bsLD`*L~%C>x~mLOphqWHnNbf389FP~4T|6( zDv#t5Fk|;Y^vq)HJ@6Mjdo~8Wc;<~KHJ-+m>V2qe@;=a;lYx&kZNbft#^9>DSzJk# zCsOT7K-0R%lIZjw?D#OieU_fY@Wo#=T(=!&{>#A|&mUpR;*MZVm9uc43C8VClR2e+ zNgT~{q4z94iN@}#Wuz3-(CEZ^oRGF34g6bnE#*}tI;53|mltFckM5l!6=zjpe+s{g z|LRJdhqO7pcb3AfOE2Q{WD2Kk)m5>)r4t!zH=w&;6u^4>NV0Vc&rAO^0qux8&TdYP zA)!@9NYhgRU)o>+CDSBHY?eM&8(qQk!-53&(QZ+J{7=-mP8H9ztRPP=N?|1h79T%8 z4x)@Z$SC^-c>9-B6tu((<(~URHsAdZhdAs<8s*bSLBBGxKTwE24vR@s=Q5C&QG^vn zd}b?M7q1U^&Q4wv#QI-OV%3vNuwu+CJVqjdl-Em(o;gkyvig?7S6Y79??4@!AWURU z1_em!sxG@eCrc3i+kovKkYL94onm}6D@D`(yTe%vf0EYjeFQ$(v(@og=;=Zi;hio+ zVPBX6&!Rp7K~-tg>s%=vefSu4adDvi>vE}TZaY=q%hAuhsnpUxoyr9_QP;LBwDVyZ zE!%dU-#0eUj}=GgVaur~I<$eOtZU&}YzfZa{|_l$yC*v6Z-HgBcjEuPK1YeldPw8@ z8QeT?3{G*^fpk=!roN`({Q2I@5HKY+Cr3Z5_D~MQ9#I@$qYj(*0YU!2CQrf4n_w zu;V&0lP_mFRMiC$DslolWxfv;^p3UOxL;Iq%9;dkya$qj<7oWe4*2Qo3ptWM(c90p z=!)T5{6dDsKHWX+)(vVrb43c>Oa6*RTsjB|U9;e*XEVf}Jxi>6XR$FszGU{fCib?3 z1N%VpD!N#<3M+f$;M1!M@WL^JXsY2jWc6k%EWct-AFViwIxp%X_2-x1qwGTXRbtQj z2dd-#juPZj+&I(uuRU1vd%La2uCkEM&{@VpErUFj6VQ&PTsmO? zjd<)n?k>A1nF+sN$n2`H z3O+j0TOOZa<~j+Q%=-a7t)!`0_fxX# zSqZ$%)`6|}Dv6^)1yT1-BGu&!AkC^9wLG{E7QfHIvUj`TQ21z?zc7w9JU*IzKJOYB zPESLv*AiLQ>JrPQ*?`BH(PU?A6tmd>0FqHOLJ<#Sk(EieDBR)`>k+vYJr>zI=kcFK z^0+O`T)1n+RiQ@97oqWr55mFu-oo}haY9pw5*{B~Av_yxCH!~LSh(TR7~xZg2HcQw zPH%KZqBn71)I`o(yUD$nNg!KiSv6ZxScC-mHCAxvl*#u)}r@uzX6*ka;z+?kw)^}QQV z@0w(cfBi$k`;nkJB8!xTXW`0DRjgL?iqo5^Moj&*x%kHySl8BAcEhYQNPpILwj@Q& z4t)KHVx(T+1C=+~_qXn#@?+`Pcx4JkaZee!Ws|ViP#V6K_X;iUe24ydAqxI*TlDf!6npoI2c#&-Ai3RA=!K6xtE1bDHhzgiD=2H&S%fcD{zHQY>d4ldDxy<(lTmAv zrlIFo#uw0HkR<3tOtf~cOzY;e0wGYYEtLr;FwZgE2m|R( z`0cHKoR5qhoN%3kS$kQutSSnb9~3~`PBpCGoB*rTW5G3E0W9rZu`Y4IR#B&zSG^t6 z{Tc+=N+N~CKn&l>Jb!-lVx;LfGH(a)%jjLzEo_)x+I9Q95K=f0>#l{R*`Va_qMit~Wx zW7oLmOEXa3Zg*DcVJY*p;~8iA%@`)S$a3`Q1ae%IMRd2mWb-;x(b_{TT)vDFE8BAi zO*buO^)INfA)`m2#-6!oLt`Z>wPx|Eqcd^a`pYo6+Zg5--9X3w8-bUO4aLC)kCB~Q zFj{W<6)D{Nj(57m;0~80q;EVLIZvF;?k>`V&wc^it_KE0Ib<=+O^A*Xq{wSc{+XVL=T5;%e31P{N{aj1MBKVf^8&7ge6q+wi7yi2MB^+m}D%3j^ zB)qvYUHEy-VPQ@B8e#n48evLzyfE-ss4!!4ny}jPq_F0Dtg!NyjL@KZkD%oHEbh(= zjHlO02$SEW%?Q=K%&PPM=T&}9cbj2b`|6`?zXkzZ z`wCybGhNtdyII(gx>i{HXg&@Yw-1-@F2>lr6gR(1#Fh`=;l$Q-yddro!p#Htzc6*- zoTv9t!I4G8-L{3Foyzg~^_d_A31~)i9=f!I#j|iBUhq{Fr(GIkoboMM@eBv3KYjxW zlGSO&*17ENX#psz%@(`tdd$haC%g8&2j_FM$nJy&I_LQq zGBtJ+-50a*Cc$`IaVwpz*t(Yu@HB~t&eFj2eE3W{YV}L@ zs?!OyozL9A=W~8q;a}053%Ajxr;qWbe+gK*b_m&Qmc(&leL7QDi+W!E1P2zk+Fv`lvm(R!8w$>Y4ht!y)EXStJ%D;tj+ zH)pY82`jW*&luD`YhkH30RQ$(!i^CNz%U1aflXH+PfLweP`gA9+!_NZ zVaHI!>J=!vd@gr%*=YLH(;a$8^PfMBW7X9}An>waZohG?Y;jm@y{3q7dju4ah{1Ce z8_OqfmoFO9`9^Qqtm7TX;=(ibS6CMWo!!Om81)Z1WdB1;e=5^KinZ;A*B%x+) z18Ncc8d{xlNr`+sXTCfdgwj$da%mf5bhM6rKeLC_#o43wk-@B-Ndn56w1bsQ+JrpM zII<&d9!Hiz(y-;%Zd8(~Nqil5B8N!~nCHa7@|(+H+IgN$`{gklJwKjqwsZyQnRUER zR1e~2tS61(|2T&cr)@9%t|cpP{vv(}4BU#%B{tVDlb7E6kzKDQzE$%Gxyn(9T(S-# z)SPWQPk0L0o&4|HqEFrQDUA5Gjh%-0JGV*`-gMIPT_%T<-oVd2kIwe9v4*lzsF#GpTe0v+vX+ z;=lDL6!5PuC~i!RO$cXKaO)`teNYg==hOxHtEEj0ER=d?vR?dK>RoC>8zM zdz@QuJIt!T3K4mV@9<2@hoVvFD-^`~61}Otw2B#3?6joj z?g8+p#v8exTZYBs4$_{-b0}Fei|*msCrY>dw|C%LI z*3sdH>zmQv#B~ zlc~FCJZ?B_M9mMcL2osqMGLGQaeR^%Rgl%gE08Ylg4>HDQn||d@SS+R{}5~ZT@&@^ z+~uB5$>tIQV@1>Dk09ym1+1&bK5{%Tfpv;{h|&X6Q1+}1*jFTLJC;uOEy|*OU9;G*@+s80O`F!9PC${JEHch}4wuDk@Y*ID9xWj_ zc_;7mQ&dD9V-4`*3u#z#|8lu?|TyTa3S}1#+iWYZEu;ueM%H2rhj& z6B|#cXZo6axYB+CcaIyi{8buY0!&JP$a7)aWC)Ik6XayeLMhaqG#&W7Z@tZ~-h|xg46;N<*4nBlmEA zwoTe(|H^6;e^J$>X7KoZlN~_? zf-!B(JAD{&_%}QURD$6B_Z~>^I98g_ZXv1-;HUmxiuVmejUI5%k#X7z1eK- zAhz*>G1w&^r%P^WimP}IZmNU?4x1m3B>qg}tXAzrfv+w=&g))yJlBLqWR@{m>FdCB zlQE@#BX4AS`wj_;hAuk1=>7=%N>b0|hobuNL>08OvtZI-765qk{(52M! zCjiTrgCcKh->ait>=rlyOjbv_=-UJTA3UqDReeYPN;-&GVv!^FTm z6gs?s9Y{2XSa%03j-HJTpX@?Tr+EgY>V?Wn(PHX+{02QO$RVG)wh|}*OLShJq*(4; z2+Ucvh35Y2gxJa_)cmL(oS&;nvws}oV$-(L#za37s`VdjKa)jg&iG0FOp~dE(|zja z!}r-vDv1kSFVGwM&SHU&P~3g5h4xM>qcTNN;zi`R`1l8B@&3oR>F=I@pf|Sy+&X;G zjAlhz@9P6{6Rtr4tcL0XI@H;U&jt@|pl<}hG|QFm#Y|VFGs1NtZGJcRS{zS42A?I# zQU;_n_zT6$^T?dKeW2Krz~{ODQEYyQIHgRdu?~7f^-wc?AGQ&`JrKa8tOcw(JAux= z<^gxCkFm8~CsESScTwruQk#Y9?mP!_3^T>Wf=oR6i&JSS0+|hE(6b}ZHsi#|%EDPn zbc@kC*kv-0*))0`n=hw^trV+K(eq?R+KtbZV?VsbqY+K($>bek77&t)QU#f)wk51;J|9on--H7~Ds%FB=dl+MbPU0rmFgQ)X!3kCM~qw+k^yLiTUv;<8?XHXbH2jVeTH?s8D* zyaS(arV!6J*6f}4zu5X>-o?fH2lvPwf>2fit}^_&wzw5Ox6Pn#iM3E=VM}H9%8T9d z%*EH{mDA{)1iCU`THJSNtXMGpJZ&30Kr_awiftYV#fh6Hh?{OIiB;zsh=0n2iQh!n zihZ;*DyIaSvp4m6Q3UUm{;;H6WMoxSL3?7D-xy8u>H8d#9P^lo`j#)q zTl-FQTHTbm|B{EFzuM9458-$t@9Zn^pMVpu-GH^;!?0_@9yB5A0~%ZXAEmK=^x3b; zl*x%iAN3OOaHt#pRKAnSdiOx$`^~VxCkFgV@3P)2-xAS0U-DY*0cUZ(8=o=j#QAm( z%_`T zhS7T7#dKd^LHyo3vkg;ckWI7oN!^qJ7$0^MOx_-VF(1r8c3U!tKTFV2{e$p5wFq$B zJ*Zr(OI;gBQr+(baNU+b#l;~I-MRpc|E9s>^#m@>hyYCwbq+4+)%H$>zWboejPyHltMm5+?PDOK2J-Qt_4+D} z`CAv3hBiQX{6qM5)c~a05>XQHGaLmWY*iga%cbcC;m$pUVyY_Y1 z?(J<{+dWk{Vp%)ZnD7wyG|357W!DRJzK#>lvMCw?pNHZu^jGZ24c~c z^Z0$Yq;QO`EIQp&kDb3gCCvxFXN{ODErAE)~(xsm~M$*PICHiy8WGWXmmd^hpqyhJAsqalIYN@MCx3u=N ztww|_t~f{hfzQo8cR+U=&VpubA{yd%jrRA9rDz$Iou-`Yh zLpS>9`xt~sSm$h*l4A^Pa_3C?+y|zxiNTHJ_Sp+ ztYNIoE5Y%WGa9s-iyLQ~F;-nu;Cz+`TH+Ls6g>Kv0cn5KH`y4^RbgOSl@k7ZM2er8 z$1+l{a+s{BR(zuc&_z!rY_eaKl=8Ff>T8)KWThp3<~A9omMRD}&qj#Mym>GE0c*~^ z@UTemsaUl2q#wDRHxmBtae?GHcBssy8TsY9Le_*VVwK*8diXi?j?W3G&ZB|geqB-a z*Ha?Vt@k1YyGoIJcQbct_BPQSy=2Z*#+0!(;hAW%YvAjnx5VSwI1ouKgp$3c!qR`z zLY1|(=xvWON~yMjJ%)>@($e*GF}caSa+*lfncZ}Fu`|tCqE16{<01eKF(gx1x^fy*GEMj*j6xzCfzsi~Y z7-H7%c4I?d+X>QISJ_s6Yvn$;9${A5|3I@>dZEzg<;bN}19zx|VJn5TSe5jlkY|dx zLbwdC%74z&NJMy2k}2ByU@M9>ZNeo*ckqD^n(WVohG=f03~7z5Wd9s{g^r)y!1|4t z!RmBgWSefRMS<@GbRxm3@GN?qu?IDNy2$AIgt1>n z?PHh1J8n*@zHQ>NA2t!w*V{-1JtR*54X`f#l-d2}IuYs~M^(nxS*5BTPIM=c&?)V# zw_g%A@|%Sh@b`W@CI^Rc1^9BM82ypH#m+5!j}Kh^gPZfF;hDbGtV*66Zg6;v5B$4< z7fo@)4qdYac0Z?~px+0{k_1u zcY|0wH3HWf2{d6S73n27GAZ>BNtc{88~bxF>ejPoWz1e6r;^!7(?JKHJMGE-3;d4C z-EVWsEnYZO;Ts#h%LFI?xWImGkH)zPyG3CO7#8P^=avZ9U=Ma5YtRMYad|4_4xdGz zLyn<*Frg|RL!oNoQ+B~)AzrM02sy0o1c~!w1+RBq#h)MW4rEJTWcVwLJ!$ZbsGf4g zrvD{lr@~AwvuQL~6tBSYW(|0K#wNrzj>J`}o#X+Z*}fJU!(Mz)j233*k&Ri=*mmY& zR^h@~ve;}A#B}}R^anh7zp^s&8!Uv(|MYa*9={e$-xdF&Ex@jfzZWn21*shVFgu678}Rll7I57-+?YRfrR3^h&x>TsG$m<5DCV6Pzz_8CI~)r zn<@?eGsj-fd`XyEE~8*6YpwGt0V`R&L+Q!`{P})fTX8NTgbn#{K!Bgc|lpBAw_EQFNw07=5!s&(Dnl zAE!NhXEYEVX})AH>Ye67y;fogB_rFUy*|j{-oc9aj%B#PrjkQC68O%VgQUqd7W*p3 zaQH+77f~Qhvbv>7u$vKXi8Chk(&{Al`c3Yhzdvp;)JGO$0^x6C9x3#Fi?@jyao0Sa zE!~_zE^kq?ahqB%sLOI-vTmxdUK`@Lj@>f_dNMO@B_4jg7I|d?$-j1iJvB9#`4y=T zCzs5{_q`2;>(gFh`I=JXqc+6ehq?Il%N2NL5&yjyDPlQ2JKXrg5tpBJ!7{Gfapc{_ zxP3zu&ON*r`+g6^W~MeMZq7n6WZyS}q*=S2HbdZACjW&oh-4$I<2-ZIJwsfi5pO!Or{kg}ED|O3bIYbJYtD!^ggB$bVrR z^fEkGZJj9@KSjU}3#_2Udjjl;3)!d(HNW;!Jf;Z*61sF%g%z#PIzo+CB-3kOf~j}LRdUrXnasJ| z3Rh-%(=2`#=o)6kIecqF2@495o6K$8`>Gf(I;w-)W4v*6Oc}1`v$;OW{y49870ywy zK`I;S+4Zm+)#=osCWiN2>sS-dK0kI$#dTWWcZzl&kpZ;=Iq{{x3AAXk3NjxKL~g+x zWqMoS`Pwy1jQbyUn%-m7+Acy*{06yP7ct}HH$%{O<8bm%UIQhdgk8e0IvR36I*=@ z6Xoy^taECbsOpR|DsZ1o*0jfVBn;*W|=PS$#S&Dv16N z7?H}ZL3G9V1kQT54L@1hkGfQf@sx~A+<{K8X3gz*y`TWMy6=Kh%U-e{w>`r2^DMr< z50#ww6C0L)3X;kYo!?hRT_%-KLE$?dnwd%2o-%r9Bv9!t0gW}bqoX?e;m93T=+GIa zamy9NlRZ{Zr=9@PT4@HFzWfkmo~v?q3?U= z;KbYss3t-gTiZvX{FOJ+3bpxg^szIW`mq-D3P;eEeV4f=zaoP zqNAN6K~S`n?hB6v3HBA4Y0dXr<#M3%Q4P5uIgttn!|1Xt`sn0nNA{O&Elg?mr4D8G zP}aYkodfgOE21ILo+Jq>yIjb~5EU`FX(Ge?9k%N?P7st_`M|iEo6tF-TgjRaM+la; zq%momsdKXoIJ#aUzf79x;N?}YG5aAgTK$i)(DOp&nM>JT{W0u_Z^zk~{vx#h!(rsw z9L}~$&p^U?F{t1~8rkZg1`^uQDBHA+J7RK?4F>@zPD(&uz4>m8V;fVkY9=x})q~P5 znt@#L2q>91hPv1K(}2UdT#&sxtSuNzj~~i`)vFb$|GF z`tpjZ3CQ7@FMjx47U#ZH6npNTh_=ZWa^pl-h-bxQXlNTpA1)jr$~2UOC!JfsaFrAH zq5M7f#H=w@{ATolf(>%jMn(%6RG_fUec3b#-1#LIeau>I#6yLcTgf}l(fj2_Cd zhIc=bzY`YG>Sdm^!S5JtQvU<*E7#KQ#!YnAqBGRtcN+b+XFg53eV>)NdyV|LEKNO) zOzDLVU*3@&$#tftk>Z5!RLX@B6Kh6%fUTxKO60{KZWPjcHWTSO>lXSUzL`oK(;(Sr zR-^5%Q8d15H`O2Lg!O|N#PLHc`oqkmvu8hq?T_QoK>bpx{dow&bPNO?B@uMRSyk|mGN2OFYkGagXmYZR_sPoKqhm&OJ z&2?(3`$4pxKrLIoVC(~WBE z<&pMsjQ#d@p|V5YxbgdXQ23EpWL#Q;Txkq9+`JbRDcolBM--vx9u+urXgXdb4#b1` zyIFbu{d#Md4+!5x(d7$e#cd;aeud9rD$>#r{{(^fUBq1R!rE=Le#dnh6ZnOu-;@y_ z9kp8Q_{K%->5@rnv}VzUujg6+&F1iGl_EVba~9e)Hy{3NGo}f9^>NbqGw7|$XBJ#l zv6reIHv4#zRez@qbz8qdR+cYBtv$hReNn>AJ)jKf#Q}8n+W046KLV*AcMI(1<=bzKinrNunTqs`AJ7T8L=7 zN~iAc%YNng196)n;&g)6P6z2YNMWnN4fZJwxieXo; zV3l62U=yV*pw#d^1euuAvh6eIcaEjMZC+BdH)85_jrY8`D)Ov*7qmi@z^+pE1TqvL z{%=t>o!>Btb`4emE|y~c-1Z{BcZx{Jjv{j0WFq&}Pai&5hf>yO7td{&L8ku~3vRQX zaQ#CekasDQI&51+g>LIm^ujHuaH|Cv&8Bo+@GzAwzDVDxyaSh$_h{aqemddoY3jT0 z3hn=PmL{JhRA*`l5~e+;mu*Ifx7D`N7_Am|$IUvylZPhkCrJWYH@c}rTe8iePm`F6 zS#4}`N&x+OAWc;KB@wWSD{YX`q{|~?IjeCcB3}AQj>n~tldDFdx+86Dt1y+&@Ns0> z3s2-fdNz9FHxtR)&O=P~7kK1w1KRuEl564%(2ynvQr+7?|D6Tx9DV_vw~tkZ44K2l z>cx;PzloKI@DNa+B35Fr61f;0MoSba?D&~Pb>^2*gGKzo8K}j!lu&4Uauk-{YJ+Ht z`;5x6RHl<>lq@LyUa8~xoT=!pX67l4GCA-uKYr5r|LrWizav{FrZUUO`vWDcF@>#60mUu1hvSk2@KC>k^`F(%R14yqCfzw#0m z_;?>b5A}xpIup|1?ZGMf<-w>O$>8Is!R|77!`*x!!CZ4FCbvH|a$bpYMC11i=2vMy zyRq;QjNK+L1twRJ3fmrsX^L#HA2TMBV?dQEH|t%Y808#F7L zLC5z8B;?y}kU3Nj(Lpca&i$#dm3OZiP7Wi|4u^>490yWlBm=L!6Up4m{JcVUB^>XT zgm4)TC|VW(KekSXD(lM1_i8st#O7Yo({hf9WSWRscM`PSvxkl>7iQD~X^=e_2TT95 zq-mTnk^FE9te;81oLECPzsM5UgmYk#slwJw`OVC|)JDQ?SBw5lt>kj@?@o4IU(O9B zyAZb#WefyQ5?p=tn&i57kb`HXSSMixqgNkFj-V1ES1C!_r>jhluaTz(yyNX8b>w$8 zZnkEWT1a!Xnmu;x<8{t2{gIoh&F>SwLj2zTlv~ zEnMtR0L_VwOuO+>Rr#-zpE5n>O09=A2=m? z_T7k`{agq+t@3=1CxFO|8A0-i;$(7`$!=QAc1EW&PVQAqb{z;D zb`6MfEjyU}lc!1P2@BA4nhkRMzH@t%r-G&n3%R}=&;A;1W7Z%Avlr^oVgJSSZKq1* zv9r-6@?j{Xysd=a$+e>SpVgR#yv;B1KZag1+{u*$vdBB2d8ar`nK5_W?fR(y;h868N$1We& zDcW#kE_t3SN2~9BOJ`xAZb(frn}I` z?Zc3t@*h>oQKN;CTWI3iW_nh1km{^m4_l|MqsKFk(x4?QUC(Gz|Kv&3;B5?;@UFib z&oiO>-a1ysUzzI0zJdA+(V$zf1XNGUA;UIjPJg=#88XqOR<#OLW&b4l$7mM>J-7fv zB?|CS?0$duj$q!0268*W&;k&|nRC+Yo#;6HK)NZ#lor&l$jvuF6d{M9Smp2A=> zs?QCkl_kQy{pX>Dt7WG@5W$5NhoSYmGgUo58*bI*feSN%_!PR*aX#CbsCrJ|Q!tsV zIQ1P$0s%~SD$v$NjIF%Y`KI^p}uQm%&2242^$Adw-L*a%q*^u*)= zm-p>%xz%3r`9goz=qQ_pMdJ}c7Ws)_g*twnWu0)8u#95+) zCW;-|Zxe%HfW^jk}Pf+QZ zS1W5eIFMG2VS;xbrOL8y#9vdMq=aI;{CVj3A&Ig`BWu%wpRA+)V@Jhe!^0#l^!!Lqtu(EIQYHX8ywa5kV}9*e2w zCH_to;|v3~qiE_|N2+?vk_zZgSh~C!>Ux#wz!U@8IyjnM99c_VepZ9;2kYQ@emAka zFTw9xE|BX>NYT4&Gj2Am zVLfT?c1#oErqBf=G-&IoJ5Y5{jV8v8qVD0D^vBC!`k`?abRNA59cNZki>LBbX^blg zf3cj|m*<9@mfk`c_9bjK&+U7`=wp?<)%bDVVeGu+27B#M9y)in8imWKkg`#hczxYX zbVA)31x-ds)95a@oP={aW1^5xt(fe8aRvLk$O?b)To1PjcOtt$8~QuXk#ph+5GvP7 zBKf&Yv(zNIVoD0MiXW2NkQjRGWdl8^JyqQ3Y#?5AP)v6%d_i?b-lXwG>*y*PL%WA^ z=zi-Xv}QV zx}qg?^x{;yfALfJ+T}>RlGf6N>pnxY(Pg+bv!6@eR7~}XvuR=CQF=LO4pqL90DIQR zfVovC49E--d%gP*Ft!KuZ%hZh!zQq2#S zu<9$1k?3usK&n_oa#Nq<(2Z?e-LWmOvo5&3uhM$gV@LQq)%zo{KQo#f;`dUk-X+*=Xhi0_P>p6%Go^c1Fh4gOW zZ|L|mntqgurPVt` zcW8g)UTU?*o^ExT1nas&K+E7FniAxKJLjB&#ly{H>x9kt*njJA%(gdfK6FP6V68;@CMrh~p3Psvp#4F5iVb8uKY#^(K zkNzA+eV-q)ecA@-Pvbmvac%~myLyHg_t99@y&uguvlvy5C_~b>)L?_xKN$L$$cplo zfS(y5sYn&u6*sU`{cGXG%2Vi2<#DX1_J?!O#K^{HHt!UdAyJp2xb*r_Y~YCxtm{D^ zFuklutTyjpl~EL?`=&$82N||~+-M|z7Y6RDqR^6*QRwwedGciTYS=Nt9waKfNCo{| z(L1iHqN7fRc2_?pQ~xB92#2Xi==M5zF!>zOR~_JrH~WGtpG6zkRL^}~YbZE9 zHkr=PP^51S-wDR`RB~&s2hh@vr*M}Ih9&RkkwsmO)Xm79em=X9eB5Xx*#GGW?K?e= zwvPWmxR7zg;kOaJmEQq5!#t~_?xtvC`zzQXlO;;qGauv@+mb%Xv`YPt@?=J%x|hL7sl9M_*#=l~<}Mk^P^a(pIv{F}JGgGqf)0b7G=(IB zgCHO5p9{d(^Cq1)Yd+nXu1PnII1Ep^6wr+-Ay%oWMza=oa5;a&@k`~iI6aLc@4i-( zf17&9mPAUN(Zl9 zr6GxWljSI=3k(f}k%5rMK!}cE?kES6N9Q`$?k)Bz$mMV|+68WWjkUec<=*NOp z^pRbUAYgMh*LH9pSeI$it4#{r>(^04CjT~s$BIDlw1H^0UIgr!eGMitOQFjA4kUFJ zLg=^}64NLNUA^0Bfd6Bjhq#vbWt@bI9tKF^!ZTDOg>h@(cgQ$kiTpNtLgf7=^m={) zsA}90J=e}<3cR(!(uDW4_KYS)22aUeo_jFUkAJp*T?-dp<-!fKR8W2K0{%F>h1>d* zY1{3wv@3ly6-*O}#x`DH23IGtdHaT#qE)SxJF2zV4=+C1{>#@TRth<+Uqd&$tNjIg zSzn2rBk#=f9Zj+JM}+a&Iq2uZ1t>}W4O(LDg13kZ@c%gb@@TB0_g^6yDwKpqQ-;Vq zyl3x6MM=_}Xp*E!isn+uJVr<;G8dI0D#LsBL!{C~MbS*7=12og_k4eU-ut_E-Fx4) z&L94<_S)~e&wifgGkr4|OdlsW(v5y2Xs!V-I(OWHDjYaMhVR`;YqPEB>aRZZUHN_L z>QqAgOmC16X2S5QG?xaxJ4l~KThPRXGE|{Kk;YH8q+CHI`B&LZI)3}nBC9Gg&0-B_ z=xs-P&6-J)*qa;a+9~nRoQ=c5`Nw8v#mz{X-+Z&QF_8*a0mcv>8b|d!7 z3kbxlB3!;6Np{U7GWNBCmvI4kJmr;W*#{%KYojJfpivcKLoc#Hp`T3CG$a!X8b!e; z=krd#hLL376mFkJEFU6KB$n`*Msjw`(V`d4QNO_d*nCx=P7>}p{|o21 z-2%br^lKy1(U(EqnmiE)xgM?8h}g^5Uk@O@1A?lfb_s?m zUyS%V|2VRuIh>nVyorzc(!ek25xjR3tN4@q()g4KLxnujk*w4D3IFbc2~*Z`XTgTv z>~!jIR+6aAT;fWaolCEWyJh(M0LmW z22Om!m1OPrB`aSAh`Wvsq#tq{xPTfLPCjEHH+s){@#0lRtFC>d+Zp}@|SU4hmVrS zd#4c1`)A10%4qV)`!?MmGN73mQdDy7cTPWQR+aw2pVb*hCB?&Zw}`E*R#%tzFBjXb z%@%)cNf7^KKH`aU{KWEWRK$zdnTrpeu;aQsWavhjWa17+VyC~8`QqokE;E{ z^!bv7itN*IUG^+}ef9JCgTx;DllUhy&hzHWn=9W=4ywL#GKLQu7R>+iY_#r5er+4G zaGlsAON&pMAy55y59*!1i^hH0Pt!(+(2o+nR6bRWs+ugONwR`Byxd)?6Iwu%Pc;$u z@^C8i;w`Ow1@q+??byUV869OcJ?mNJ zm1WFyr@-Gfwq$dsmGKrdhO#{Nvi19u9M=v+O^zxtq@!5G*xbj z#8bX$Y=Wrqmq&Gd@*wVjz!kPm-N*e-wX%J!Z$Y-+G-B;4nzU-BJ2lpgrxy+heh8nP z_`gcUq~!Q4x`DHx{qbG_a zY$`59eAKU&nWVuFdKpK8<7D{Iw$b7*A@zi;(5GjW4EQ;kGW@*ZcSYArzj5C+^Y~Yd zI((0VH&J_ZPzaG2!JhV+utMt0UwT%;n?9S*jMHbZ@s)=ehNUs7h!t$g$2{@tAY0aH zxR2RzgP6Z#nfTG*nJi68iM=~Ko44B>Db||(o)@nb_~Ba3{Hf#yUjOtEey5xXzhL4A z@z?-o-bv0~Z1(b)pzGZrCOu1el|9vB@r`(~-nbO;qV{vuZ^HE0e2-SLvaN@cJn)8B z@_o;*m-8Xx?TYw*>3m+>VjSo8zLC$eT*I{L()jP&KX4Cjf8eyGV{AhLCRqkKzZ6$r zR3II)&nuohkF|AfUPf#y+DMm!WOYuiZdKjX9#W$jMb=n4iY^_s5*eB+vMmW&Vz2xa z;*g?Y)eqGl@a})7a@9&|yvOmc)jubjaD)4EML!f`gt;@Tj5Lns+{qq(@F3(Dj;j}s zuan}Vd_IXR?`*A>kD13yfS2v4c}GMa)L3=?<5cl6dl@b&fmUx=mQ?Nd*ou#TeTCBq zvaF7XLT=T{K;9=novtW8O|CUF;XG~R&Z$mhH)Ccn;^W85-OJ@{X&Lvm{R#Q1a)PY+ z^nlc!R%ag34g4aB9)4!74L@elckZph5pLh^3)R0J>%@&!*GYW$>+0_hOF8N-%*V?z z#4GFe@R|4c>NXtkCS|eNkD=o830`8)qQRtpyBnXp`Vtp+ z*N!|IFo{R&(L{Fnn(B7_qh!&dJ=GCLFT~}p#iIPWG%mMKk_?_Oh2u7?;y>!vig(JD zi0|(|B3>_B$nBgHCjQSpPQ0)c8iif^&+~uUN6Y_yA1xj2P5+^mPZPiLpAEanw?hlr_5*AAMY2~g#rzxpt8X@UXu_uI zO#U=$+*Uy*w?*<&JEc)6?-RTEiDTpEU*s}E=5n_rl*yg%6)bn~R=z3Cko#S+gL!7H z7DxRM@}DxAxgn$Hld3)X#Cv`t7Zf^;AM2jS9sW3!xTKbFrzI}&5hW$!?Z;(ts)e9u z6LPQSHost2x1N*z8JVQqVKq1U_Ze1M_=LNIHiTS=;E$h|;gqA0U%W+)S6X(k>P$=o z=cy=9pV*vZfhLJ;=-7dz_3Q?&FF}C}bI9i1XNy=)SC6Rca1cL0H=UI|IK!_!QO9Lw zt8&@bQkeUl3!LioJg#n4Aus!WB`zV7MEXQAzwOfuu2k@aEF7B8p3NOZM(0@)$_$vN zK~uH+>Ow-EpX0ua9mDN0@8b49m`KFGe7SFLt;JK+{YY<~mgsBaMb3J55=rQtMV{wr z@V@O;%<-ruF6~U=r@KqBr+;*b+Xffbu9e1loZ3&etXW2s(TSXm^&{uXZ}6w;!-acY znyNPBgG-@k8GR?E=Mw7Jmc>Z8uiAHtR4HAx90n4=@%Q$s>XqK-DnX14){Y1S7var z@5;EBp8`0w)w{_)r@f>!VGsG@+h|)Ue~avwbS6quJ-DxBPw6Qcdv40C3XXF-BrZT> z(b!O-pS0aXhPxIBzKcUyu5AiGyT_huUVV@#`JE6C{d$k6Gz=%s?GEHhnl*{M5hJoZ zm`$4O_gJak>GETA#`Mzc|>u zTs)Vv*Nmm>8oIdC>>Tr){Dhe9o=L80>Qbj6Z<(LKuh4Hf$Cd6qNx!)C3q0*E{+Qi* z^8J=4Gjdj;{p*(!|B58eu)BqMEO4abL%wk>bIghV^%1bRljrWbH<7n*|A@DKujGP8 z^i?HE3tWJ1C)SxWhU~d04H5?zLh6~VWH583#d5~<;G4v1uhTEZoNX&N#C|wc=aSg% zYJXyCd7gYacY>Y|S|PSySxdHC#e%gRQ-!8+r9~6(5$8AWE5U0xm(q7Q;C>~Q z3lR1}}=;}moZpZlL zROwO^J8K6*AIf-Nw|G(VqAl#vpAykLxeIjU>bLwxDI5OF!fAZeWqba&{|2VBAcG7{ zf6go2I7!^5>#)0d!Axn1B*>U0adxwoaT)Jj1P?bEqP|y(%edVx%uE-NOn$ler<=Pt zXyL2s*Bf7mH)s3t6P%0qjSG^=1q&VO<~DkL;(qZ{$$RAf zr{}zXOcQnbe4Wd8@aE?A1n{k6TFJhE1>E6fX86u`1r3!xOUB-Q$c9BcB_Tdi_~C^N z-LO{?*9{WpCZT&t)XOx^C(D*9Npy>Mttw_qO`lS3-6Sqz(I~FiCy_bVCoQZtjcsXt!_RZ_6Xgd8xgpvyq~Ts7kq}RwC_-0!Zi45H@|RK1Tfg%`~oTBw3%aky10lBvaNl5(X`9IYWM*n>sVdZGA|9@Ra zbQW)6+Rn~=d0DEce%c9=5;~t;S}>Sjp{hzp>uumZj~0`r=?DL7T}@JM6ZD1J=0DH> zeqAFZM*O$yGIz9B{$H0h=c5IR+p=+it|L63ZVeB&XTbLZxommqdKf%u6@33}O24d1 zqK*&cYdYGGpkLrI++}qM)raQNOp6itpiLL=KUoDET5aL(@o#+foC?5sdm#39B3vDB zf!g}_aKh9Ks!-)ZZ8By+^`dmrVm+9J`j?}DTqOGSOduKNr7XH^HQmcu)4AzUYDPRh^0k*x|IyXR@Kn)^JehN4V>8N`a3jzZWK%|ItshJ z6+!dyBYJiJKl)5z4|O}g5cDK7V0-6bxNW`(7hKp0PA!+|>*Z-w(R>|9u33gkD8<|t zMAN`lH`*^Z0lhwkVD+zRIzcBM@m?@<@Lh~8fqT(t!B#YOGDVx01M$>{BBI~g#`m5~ zBIlo{(L$pWY;yK6p}g_u{&~Kn;Sa*;MdvY~zi|RH>e~kAZoX!Ldma&O<&|_y`((Un z`-^!d2s~TaP_Pc%1A$TS_^FpL~|R%}zvBzr)PA)Q@gGn+9o_6)E$MIsXf7%ofeCsGovx6B3BeXvxqLrTmVi#EMSm_4rE6( z!Px{3R-8)(?J08Nw>o*SK*0%O=7rLW2j-x9@LBfs@nIa$R>#~rT4?d)Sk$sV%~mVw zq20cFpuc65o$ra)kmOVer2z0#ox`?^lF*x3$kt|0#b0-pqjuIVJazdZ9sDYemh8&n z`afIXCmSnJ5N7vZYrRoI=*%7~E70kW&Qpgb4Kyo|#?HDc%v-}7d+sWO(>Z_a=$?fS zW}e32;zf8~?-%>GWga>NJi~$%8`$l30zUT*z*jRmSc%~*cyKNseq@Uv=gC++=N64g z;bpkf_a*Okw}>fq+0tW+6v-mNe?MsPSlIL@4^M6z3J3OBGxarx*r@B}XgFFDPycv? zGuo24%8Wy}WOf)nd=iKK6+9Ns;DnjV5T+ev1!YG+h!5Ds@gttT$J+Hf*?i+Vxa*q$ z%GtAU(lZez^jsq$N#lu|nUG<)AqTd9A$ai9M>crZ4)m#PK+Rz?us&PW&U{KM#igV1 z>3G45z$cql>-aM7$Z70|h+xj#Xy|dQWzGNM*znnN!LHZ}K2__&Y0oa$VxnN@*guU* z-Z>#?!S16}Z5%kwi>F=JUb8giKrpk?f{6i1(4<|=-k6n9x$#agVIhZC6;s*p91R#V zZ6Wx_ECfvZ`#A1es7A9kPsiBNJDBm41a^0f6Wq(w1&^gZIAUNXIJ5H*{G9Nq~ELlt*zUMIC zcRSzOG#n1)vI3o+YUmL@iHN(;LdN#SdRQ;Ce!ne9l9_AHm39J)g~L-{&o*se+Ag=QNrGc-;*Eb zUNb`}1$hoAg)*W51i;A+&J2a3uKH4~F+{ zV<2my11j&XgQbUufbyK_a8Dx)Dm6^;u`x{yw<8cYHG=!Njd=n;KV zC*dd`66VFA+($cMrqXYgv!C^XufM&|q}W%>7`G5NwhbQl>R=E_~@(NlsR$V`S^ zILZTR7O4~F%BhrrY|Y_Hx=HgM{5VQ;JO{;Qd^!gB_g z^%YRVIVLzG`VgGDVoqC+3BNbJf#_X143vZyd+ow3Ty|wGC?+Pu;$1K3Ow%~%np?m| z$jal6it}_#kt)l!)&h+OQV85|9Bv-Pt9)#x=1Xcx`0oqYwDSwEo+Jw|v-9C6od74R zE}>g=Br5owqkFgN!K_i=nAEFHLjS!A##ZbC+XpIWZf<~*Gd6>qof-JWj>Tj8?QFnu zXL3s;n{{u$L4%X-@!$3igZa9Oc6-)H(w0?>E?B-3^cED-@vp*ZMOqsBtFDKibNlG@ z7ac5b+(vL+u0RaB*E1Ez@0^M7o}8o5g`k>-XY-HJuXT#h|9c@To3b4|)fCx|`9btb zmlEClN=RQinn6ACi$U2i336a3%)Mm^R=3AMlFE3<%$0^E*;`;t_Dq9>5w_ zhuVo+;Pmh=1*1w8ayo{i=I+1P_FF= z&E?k9;`(27ibE0~WZ=f48@pl4l@WI3n`^0aQv{SfTnJ0Ur^0345JQJ$v5N`QXpCT* zl|I!RM@pB`!+VsW@zx9Kyr+c!SSA5R8b`$ndWslQrGmGgx?%IF>tw*FPs}3L9_?~c z#S`ll=%pAn{2>3GwbNJZdFDZu*JBM*6BF^nk}{m=SBXsv-LXk+JnGq{V$^$zv$~|n z%%%I;i!0;M?`}Jt{x1>le4B|uZF|t=aij44`GNF_3vf{FEUYk>6h!M@utcvI%+5r^ zoSjWHZLJPOsIJB4dRM;6RRfeFE7*}G9-QOlb?oDxBg9yu5R|;D*t{HhFuXh%6!dLC zH(&}sbwe0Q-LxInPRIb=7-vZPIRlmXN+R?31UTuHv2{i_sHKoYan{!h6Xj&Et#1Mx zY^tZ0u}4|cCKbe#<=C;>gns+^jP$1z2p-bENz!;dd_L2c>0XS7-Sdjr$bo&dQM#0S zyZSipjPwPid5Ul~R~MFq&!G!57t)B&?#yJ_cv`9dgDSjw#QCdF#i{kV5PLF*jhZtB zJ%9AGk0FG=8EH??y{rFT^lQ(0@dBhSBc>BEY>x#_nD~DBZ)UVUh7(9*ti}TCkjlZe z-xV?KVjt5OXNP`&L(zDV0*)5`SNO>c=Kc8zH5*xsIJ?s(Zczfa)T@%s-?E$e>R*BH zy(j34MW1Q)m`^OzGYX;#%D}`Vl}20qrWSi^1khwBwEOIV&7wync$k+a^WVMkLdT zk^VTmU>Ib-(u3}n`7}IiKStcl5dsD%EmDz!nDDh9ra+|aA^LdiCRn^S7zQ63221skbxl%*yVs4#-QOyhcrX{-JYUmQ*DKhp zg$wA|95ZkypTN*7gyyGCg^3NfakytHCd^xp%@h9NpigP|u6iC!c9AAqy!7zc$9XJx zkX%iNtB768w}UTtf>@5Lqd5B`4^n{RzJ(~j%Yp?lz6I_?Gqe%oCbSf~U(>j~%# zScQh$+G+5m!}PMNJMQt&!Q(;MOy^=duB$D>rn?J(-N|5cg7et3Q3-hQvrcc@v@D*P&(pYob!?oVJSn5=rp$M1Vf{Q|ebkUz4pN%j~W<2fDA+_MHRHQ`Vt8*?-ko}Ta|%hm70+xth+>}CP8EZBsdVNK-J z@iOL8Y>knNPt%|JiO_OH4GVIoFptgmK;1wLk7_(2N7)Na4lG3LbrG=aX(l`=y+Zxh zGOnb!m7GwG!J%nc)Mqb&gXjOT!eRI61%q!mW#gclnjK%bsuUlV`tKm_xiS#PYz)V7 z%XPq|-wsvhoP<)n<#;ipoX%7^L2d3Ai)FkxT2Z)x-kKH*J0-l~ZJ?OiJpRCHJUoQV zixqHVz+$pqv7fHLEJuGAo7ioe{hfXi`Y*#R7n$eFo9y!VENHe9F}YA}3_pfgS|5%! z6I1aSWT9q7C{DT?jC-BGqfbW}W`1)A*JBio#vfx|wh2tnV>T>VdW4+cU&eNht6~=) zp2Ad}k5naNINAQffL~M>jx&mr`9bDO(er%?TRZwXo|GD5{I6x=+Q-epMR&++7P3jpl<{x<1vj$YfeOE~8SN5~|0p zg+VdrQJiy|fAhwl53l{p3OBwdTA$XjLBHCld87e)8*TvGSyFb0ws2_XXq@{Xp1Dd= zRI9Lo{>B#Q9bX5T3x=V^#zs8%FAXiOd4grYJ*c%zW2K?ZX!%_Y)3i^)j*1uHb7d#Y z-&>5+Gmhe>lSeSsR*nScNaJ(KFdSObZ7odVv~t z?}*0ww8czbBMMHwvjm-cSu8eSE%Yq%f#L`0)G9j!SLrQ<<*w17mr(172S{BCf~q%vVUJuK z)@n<^!gWhwOs6O8+L-`v)BaKgTVsCc`)K$WHI7=R7VtMaa=>dxCS3M)rd7#J)Oq|R zvc{W&Q$QxI?+AoXr_aH$6O|y79tmsIFN1TMCGDBeF47Ior8B(_v%w*E;AYtZFczjF zL$|D@Wg|LZ!Qt1i;TDI=t5ulgkJTXc%}B_bz0R~F^kDErA6VofW#<#3ggcgA6w6$V zpnt1(!?|cxc1dp~Dku)D*>bD`zos3)r&qpUnJa~3o#J8X1YTnzwVILGuu^+vc zXkzRXEKFX9?n>9uTy+G~`7?v5EX%@u6|=DZVji;)SJB3Ze!f9Q38agf$z-KDsI;;e z?@DH|F}v^bx*q@7i$fRi_2dyaIcEgw{!(XG%G2@EvNoEvbP!J5^?{`Zgk$nd1LBx0 zhk0QZeCFgrF#Ht;Z=WU;gT_3#(y0U?kK@^jay?usYltqj86c7#2)&D2s3={~NG}Jx z-?)rl5WW|-*4bgMoHp>YB&qGjY49g-A#B}rmep=(fcB^$SUdMewQRs*-XW%yh1LzX zvykH;F=`>cTwX%al0f+3FR(8?9L&F0Vu6v6i#l1rk1`YBsHp@@sCyv3p%h8I#CNEi zelzn~ABGd+b?Avl_RK-1m9dkp^x4%QI2X$n zoP&s-KGIx-^u=o(NQkb2=(q1dqx%rC{jd3u7ck8zRm8v`b>Xkt;c7BZ9zXv z5hog&;xU~fa;vl}MtG8%x*ilS776qG5jRIPl3_+KtqnYXk zd=`EIhChp-rhEL@qp~n8&0ByHnlEADPEFe98Op{oefHs12b*y3GSEOr{OmLk&$Nxk z?0215trLpM-`3!s40lWqI7m&rgR#x!ytuSx0oyKDg?WV)cz()AoEe+XUYQ-nNP|I` z-82c;SRcl1=?k&;XD3_nAcQ|qEYw^Y18~N^si@nZ2FKPF!pZJP@OWy9W_LsBfXWlp zZCNilkaUAgjbJS3%XIiPN)j99gh26-Kv3>*!qeTQoNi|TZ7e6O(k}!qb>y(J6lH!$ z!35?TZb6?G-(YtQP2n*a2Oa~I@ngIWZ?6-J$*L8htJi!#9eV-{E&q~ag{7PKlW zLXU+nsIs3M4shI$DJ>J&+uLuk;oJd~ib$#+QTUrg-1vw)o+M*@y&*UqPyHZ^~LQGmQoB%6>NVm<7z{G*NwDG;FgP zjYBQout_T$;k9KqJarT1bz#Qzb9gAMSTMlu>0>3k9Ww*qi~S9zV@e??;vUV)c+AW_ zyx@w=Bv>tL&whj(!WZG}8m!Kyoy{$*fosJJf2DES{5|xg&PiVX*+ZN^eG`taH~|C8 zis+dgYhe0b6Z8uk0cj7%QMXs4z=bY?{Aw+1owC8ks?YpF2xVv8wBXIQN%Yr>2{5&1SuWWE&NUHNJ|m#C>oj=lw}I*6ERe}DO1lxHwi18mt*N) zTbi^smAO2##f~T=lzqCHj<&kY_db?}Tr-}!U$bE+@At7R90tQSD^QP{qwvYIRw^ky z8FQqo_`5^@u$vjL#b0~lgbZ*ghz`8LZ;98VjZ(kCOz^1jRC~xqaxLuf4V9V+(J>fj zJcaOy^5Tbgbn$S{9}>Ve;CqQ5>_%Y?d=xoSMF&rmRvrysw1z`|lrbKe^O~#)2qubxTiX}JxT zyvykd&I~gT7r=(0!y)ydADiKLmzn?Tqpjv9=wURNJ-z#ft{3XuNly>Z(dOE?EV-V& zZRur7)i0qd{K|j)kwXVf0_IHBe%UZOgMEF?KJ!7-x+(S$4R)xmSEmz`<&F zwbGm&%fWsBb6S_DK)a|qTXMw|>yB&({q;_uU6zPt5j!#C-~hO0Zwq53>Of|A2!8d9 z<`o7ivmw_jVBeoWeq>%b+x7M^cC849i;X8hb=xsirwYCD)$v_w51^5z zU2@MP)J(d^RvWak)x10WFcNzIIm!^RKOuz)-J{=yA5jGtbIV`S3t`GE&3;M z4$C+e&2l;~_P`iP9W*T%gs$_Z zQ2ksv@UTcg{c1sUPk*|SPC8-$E*TJ&(j{&9iD^YpP1eA?EiQ=@OxLKHWPf6PX zs-NSat@S4x>~DgyK4;n1k+0YY0}dr)Ptc3uGH~AT5?mR(g${R(#b~Q8UhPH_xzV3T zk4c)qefw@&xh)Ig#yiqgXQimewPDn_ESH(;RI;?LdyLHwXX6fKhzF`}1z&C+wm3$y zo+4E^m@C53DaSCxTZwDh975f79Z1MJU(tlkUzz647W(yHDs`Gw$h%Fv&PKiK;%5$? zgsqN=u=3DMi0U7P-!dbx?qm>ZwOwP*VfuC$b&iv7OQNl}_36mb9IPs0&}=dv-}fm~ z4GEq|m3pDWXe*p%7J}kPb>4KUJx2Efv`jt$I-9Iu^!+33;P379o$pvWMQ;bF?wbaE zw+CbT&@AA`Yf#nUTVY9IE0MSEVcu&u!3W(G3{0$|r^ZF%o)Okyf20r2C7lM7s~-3w z=LTD_G!B2;4XBPD?Z}TZTT3C=0|%RbXNKj3WOz@5Ty`AJyf9`-GI}V|`o?$}MH2Yt zInCcNoehr5q&mfVbpD((I6vnp?Kv-vZq2R``ce}tzdFF7g@e)mfCvYvoW=D&Cc*B! zF5droKRq4e0Hs%M)9B;%Snot}Y~?oMvMmYx)W*OZOGKEnS$|KA~p%Q43sT zcO09WBk)h1E^6i8M>&ZkK6K>?)^h$BJQ*N@Pw_Eq`1JdN2iFv;m@x?-h>pYh^+({2 zoD0?6^{F5X?Ni6eQIbg z@X~5lLQ_s6)J7hFeX5=0qH!b}^j#D5{_4Zp2v>M=@G5&^QG-VvCeX_@VJPdS1SW?K zxKYc`(M+Sm(CgR6?Dt*}ys$QcsPO`u9q^A8t+9mQy_K{{GYJlTw86O8Ga#McNFRN$ zV6q=Az&idm{Tc9q1>B7jaNV6~QK*HR3@hpTRb@2sY!zKMw1RbAQxOt#N=S)C3A_H~ zE*s~0hF8@{!rk!`VS$e^_xAk?FxXZKB?W_FS^s#LsPha^NeRZJo59NDXk0d&foSU{{&01aDyO__#F+GbTXm4I~@FbU0`;S6fTn*MCTP9p!z30AkpUyr2QRC=Ke{< zcMt68{-yIFb^aUJxA_9}PJ4z@op)Hbn>_Y*>}Soj!)eMBIa=uOnYH)1LFuGmTDDOd z5@%+zeY-kXZc`p=e9XhB!5YvwBM3_>Bbju12reHHft@qG!PdhLTS6zXV(|+;YUxcH z{yh-)EjUbfr^tY$fPv+R=78Fv47$?$C^W{N1(k7ekoeCKSJ)k*g1{U6W824BTzyifo+*O>fsFeeKW4q)cfLiX2#X`>?o0KAWsw z?24^>#)92SB{=mkg|Y$bq5LkAiq+<{zh@XUId6d8MJ4b}^|XK`n?t7GPz-8+0DJOM z!N)ENj%u2LUgaXl4}VH6+FZ$33o-oYkAQ?hvmx)=0T{};(FH;7@aMx~Sh@HtQCvgF zb&VN#=fDlJ6mp>KQjv_}#tYO{8mS!?wXV zaR`TH0V^Sl>4WQ<%}{(?z#3yFfM?Jfo7jvn&S;gqouc_Qm~ki)-tT$O`odN4@#=^0 zC(?ywZVDs)lNE7AcmnPf7vtzno6#ozJgysgo?R;shis1pP_Fp}#u!?o)~{30_|**h z|J76b2~+3{!FRL$LoUe2hr`rksSweA8$_FCurpCA$aa=9vHU)|TImdcSOafNZDNQ1 z@wi;Ggk6@HLw(i_WU?J=aYf=hTzsL9$?d(2b~Txh>ClVwRaW4OgN?MrV;5 zSW4+^Fvx!Z{aQIVY_Kx!kCVsm6Kvt5^a2QPe#Le-oWv5XAZU_}g^bmD#P6#qPWUkt zhMp>;DUaRg>8LyS%Vis@$UV;){QAVdFv`U>SKH`@kZ-!Wr41XO4#O){cMf7B)s@k!Nhxesm`Li z?90Jw7I~XbQfGyuds!E`pJo-r$mW4j(*z zL4RmpW|2cpL9b$vU1M4=dD2u$CB!P=7N8BYn!htX&H;*wHZz}B7ijX_vuwj_GnjDh z2wk^X4b5(Q6WPZupyo9Wo8J3?9|j}3RQ$T zrGIBC(cAoh?(iA}lMIzWeOM055B@-3$83awTW=%dx5M9-VEDb)0G4^ELvL$^&_4_L zGx2ix<<2OWwZ@lHtbF!PMG{?H!(sNTGL59jCBvVz>0b^z{IUvko+zXD|_VgMxo~+ecB3G@fqNJ&uMn_=2E6v)j+*3 z)Uv6rW8r7}RGM};9IAXb;fzW>IYPs?JUq4{4(0E> zWK-8^q50orELRJs*XP8tY&Qd8{`ZN$((#b*c_6~A>7zl_Mw>dWDnVr<3AEA;!h)OM zsPUn6YOwYQHjQk>*6`5~es&1NC`hyOE=SPalw$XVaN+Ej;d6<2s!7bT@zWnZVev6) z@OdW|emnvm*GIG89;>kCmNA=sU;+kW8SFY6M^`FIW0>7w95Z${+F0x0CfUWzqSpnF zq-x@#Gc*2EC$X>TVeI~@$xzvOj$Rp1PhX!KMxRISgnz#RAZ3dIKK`x*k4O;Jc)1rY zJ1N>V53QsPgYsDGUxk{@Zx*n-R*#`6e+oFc-N3IepJT$~+iX+LeaIT<0Xv)3YcAcH zh>E91gYSYdWc%}c=vWerTfM@l^aVp+TG0IegA|ssNT0OdUynLpdpgHR2c*({e%sOxb`#i6cNWSug?;T30r!hZpnF*s?UwP!?+({!uiy{cIqMXCW;GY*pD@QBFK^yx^*JaIykf>? z3K^m61wJ}B!pE;SNx+~%prvF_$IPAz3lh&lR_l9s`?n4zNabSv?E-r0QW_k&+rmdK z-37f3DfoBM0<1k50OQ`iVhxrVw4`PxCaj&r3gfcb#P|vfsJwz5S7hPNHXjIi;fl%& zWBHV0lbB*;Eft-fh~u68;YPI3-`masw=?Hh)}UoraoQcDf&{$6Zxe+~9ePb?BJ^EP zW9CC%k}J;Z+0Y_sRJ(JHwv=zBJu0to+!uVm>fMQGXlmR{JFgV(NDqG_!)Exvu7#%VSa?aPP6 zchuuq<#jFCkzt9--{aBPb0F}S<)C!x@yhmp3ur@14LkGh3@m-o!z;Yj!q{0FpnTXC zzAtWNd%Ctmk=IsQeEt(lbZuqNSQt17HF@BOINUId;pUA#bpF|m)KT7;R2HrTCleps z)P8}wl^VjvGlA%;B8U19ve<5+?prtJ9eZpg$EK(5V@F36($$r5Y+Sb|7CH%MHPHzj zI*wqEcn{t*ZWiAoKOlb13iNtVF1B54fv1;@fdZ!ib{m#Svt_%URzDPm<_&Sz(Lo^r z#|yQH&yumYohiYkqu*(m^Z=axd(;hA#t3hHt^A0EwYV)uTnXJtd@0@`R{xmXp) zY%ZcBbVj0U^;N1NdxkD-c*x$U0k$(mTzD%8m*~hrMfOm3rlFG#J9D01HWmDFp@W|H zngRUu1ZWN3f}d6-qtb0>w7hs6YKIK7b3Xrxnis0#v4K(a&`x6rDL4O64wS{_H7 zy@T&3@55_(DIjpC$Wbl|pRNkP?a~YJtkxx_W;z6xcL7ry?uw5BkD*h+NZJzRg#klP z;%>jJ&!^8A+tOjb&i=>uS zmVEw~au&31BHa2PguQ1xmw)*GE!m~4j3P-!h0-uS=W$qNG^BxVq(p;|hN2P?B3t%| zq%BffobN+JN~Iyv&{CArR#Y1Q^LOKS>-t~Ub;H9wpYw70oa1;Muh;YSVk*jtRe+nj z1ZHVFQlzOber^}u&r-*+bKRPJx}r9EUsWf`{nk`;N{pfh32YSG6);@bqqOdRLvPcj zLhXe_v};{QawF7PlQ5qqmF&i?zvQj{?DasIbX{Sm^%JF@b5`%a3_(%RFf#srf&aO) z8fBl}$BL?0e0^sfscf7Gk1mX4(Mg)Lm~#1fQ+Lw((w!8pe3pA%P|p@hS(2XeWKKap z2P8z_*|2$E;c8YYD!nqm74u?n|I!=GY-Blpey6|>D+@)B)tlgm#vATH;36F9UklyF z2J|i~j@x`RisT;uf$jIjvB_f>R$q!kQ$B|q;y)K$W?pB$iHi8WPO0|sy=+>pF`mU< zYot}qYE*KinGP-MA(=U)bm`?$T6%FD>7F`CBSQ7*%zo4H;)4*}{RoIn*9&LY-%mNY=NAAHIJNx5%fNu=6-A_&tp>hOflk z3qmN9D3}JNq-HfE@LLX_G#mP9U1V$UYN_Tj-`r6W8qwoGOhJ~1P^=8Ld%3K`1kIGXldhNRQTt|*1C?z zVHfmKFHVNF*%?`#>6?SQC-{&>@n#e+C`DuIvABp!B+Z8T^jy1$E=%u*psH2;+hQAx zl?$RP>pUR$kUMSBc1M|)JJbCG#Vc6gVDyR&PIsHd>&*Y2pHQU&oaE>Hz z5mB`vrX1ok$$=wlhTgO>K&>+t9p*YI82Sru)+Iqv;b>fmzrk}_G(OFW$79jPd|ZYTKXBcW zo_Sv4+9t=KXy}~U8|D|ls;?S)T~CAOhuL7iuZYI%MwI^>jtW~Q>g1mM2zry8Mb;*ZvC`a} zHP6tYmNuEXCqwgTu1g;!`MQ#M?|b20ubrOf$kbV=&!hZco;IC3K(k8}>k6-*CvU-X z5m>d0ykn*Ct@BCt{JslRwx`ftxj7JOe44A#5aS{}bZL={FUCyW4o_@d;Iy?eztZd| zKV0pZX#SKe)*rEteXUwXgGbGy-6sV0;}>)u>?Hgg77(>R!SJK6*XE+45W-IYcKGuOi93#H6% zZX8!TRF5#kSl|4q|{&DJ=M@h`CMUse67lZpx6y(0!LU*~yKyDVio^@bk2w z6`sY>CBT7h87SI0!{OQQMUz_;@mhxu>fQPb*JKYOta1hqgVp3`v=-jfg<_CH0rNa4 zLF3%*@WgdvVQ&+B=_HBh<7eBNZE3;q(!HBkwO>TbO{MYD)T!*&bscVV?I|#kaK$fPviMp3 zA`mA_xd%3aouRO+fAtMyU&X@s31#R%p;Po?$X~YN%uW$z3;l@u%^_9D;*OX)j%Es| zsIFLl-Xlt$-u8cC4&fKrpN*$k-Fu{M-_@CJyD?_B=&-vvI{4f+iVUW!;jm@{RNYyC za@Q82Lf{3~v3@g2jvI<2pA4`N&&}Xeu$+~6ap0j>%j|!Rgl1z`cBcM3NVd-;t6y)R zyZkasdnxpq>3?JjfxYa7fjCvvRSG>;A7O{-Usk*R5qtec1?THafWxu;o)wY zX&uZy3)#hPlPT0}Ii3!!JWi|9^l|Jxp^wYuE(`g1h9xOIg22R5=C`2-rY=9gzlvod zC6iFd8+Vs&@hTT3&vSr~ywf!J_CB&pc*s^b#FL%uT+*%DM6VJvX-!)m1>7?If;Rf9^h_`iB|s=nOMEZABjqqniT7shf8kz!@99O zV0L*fE*vYzf=>AIGgm9)$f3_c;rt_ZXZ(1KDT>7ymjVovUJ1JsLM^R=r7%ZKj|)$1 zLGjWU{5Sg&{M;4|V|gI&Wq+9dyJ*;vc)|Ij?YbKGrqc|-b}GJ?61CLszt#NKQNB% z>F}d)jq%v?JQ>FsX>c>HX<+Lzfkhb?hv8CUB!6=@87?Tq334Bpd;DWwY41I5?(tHj>2VB-pxbENRu7)1}kpbouZl%9)u--#-i9)2?=|6RL1j?rzSd z>l!wio#RAi4KVU{G~2oLA~a?{0kw)^sJrb%ih}-`Q5y|fk0t8%|Fy0AHo+1uEt(6M zdH{ZhZRS*7cvHWNHQjkvN&92WYMtL6gu5$M@b;~>H2%IYkF5)(hv7q^`Ti{m|LZ{q zCtFaS-8tsc)K#l8DT`YEK4M==uQKkZ0k&OGu^O}?5FG~AQb_(etgu$M+MO?L<*>CC zHGQ&CPh$tITWO6KYD(ZuhB*8(%&`oAD~obL_WT{&M&3=fgeLpVg-?RVp}FJ;u)z5w z^>qy;ub4@So}YyM-)(5Ay$bS2)0oA#XPiT;8%C~JAY{RcDfrF?+Vx-wHL5nV6AEwm zRrPL!{k61vYd`?6=Zmu0pTo~_pvDl#OHUwvN*&vEen0p| zJJP{R$7rNaC|A&WSS0Ryms8&sMn%d2kRDLTHpjQIIcodp!_-t}VtbD{zT8cVb4!@7 zQ#W7cASq~IsjNBa2K<)XMwT7v>__L{+8c6u6g}i5Hu!Jh7erp-rLNs&OPq7@g4I)8 zR{Q~62dbH3{aDogQHIsl<7hzNl!Dfjk>fpWT3ZCdo>-GE3OaZ*lT8*CFkp zMQlS|9-a&ND)ft1QB>dI+T@PG_;Ow+{@8XB_Z~OJ?hmq7&yHlF<@gDxpQLD&(&%kv zE$)J^kMpz<#Om5RJ1AN84-GG|r)c#%WNljk>3kh4xSuZM{~{<>-2(rtaHNrY>%kZz zu=U9nJZF3obR>+~y~u;?d&^yJI{yrNr7Kus%s!fOqKGC3wy~vZ3g|M&4bQqB<$C_< zVp8aNmJpxFp52{;AG%y<^79XD&CJ6vcE)2^Fd`jxuUd#ht>&}1Mc483m=^RrTL=dN z{1~@Cmi=6`4$tp8NEuq6$f2ir`(54hwRypzj_!P{2s*j zNO2yqS!A?InoaIXG~Ls*D?f*_rOxmgsTovJ zX^gF7W4N(qJaZ6BqBr{_X!O_DU}f}&T`9~)@#uf-=%u+>;+IMX*Y`4sCCPYqhY~1k zK2$3&bO|l_nhl>$L@`%YfqSv&SIvk|fLkvX)t=KiL#x#^a9LIZ7MRB3#-M#{vENS8 z`?8;Drj?R|y5PGg`Num+x?tJyMw-|3kIkQ_#Ch2cfufJknD_iN=$o~IV#a@msaXkZ z(!xtDc}5aVx04{N^anH~9E473xw_csnBUz3aCnvv;}6;6=dfSs zG&cov15DWZ0YRHHIstj1>Ue#V0@-KA3ZCI`+MlmP&)*!Tbgj?yVPZW!e4K`sH^#Be z*{yu5zk%0f2b*TQY1P*yo4eI^fz`4m#Mf6Gvyrd=M>J~Da+|%%IZamAME=C$N z7O^PWiU#Y@B>$B-#8cgY#`Y{J&I1tJ{jYR#Wwb=hEyR?xyi?+;nCXo+>Yy7cA=qQM@_swQs~hD~@1$(LVHVizCYxMM{nyXO*x_0$ts^;Z5;$`fL0X zuBq&RCnwi}+|?bl>o$W67^T)97+? zJ63ybMa3sTL8E0+A>b8;UeL0NSP6vWQ6KNob!%sbqwb~`Rtn%4R3NyO| z?ZszN*`@t_`Q1ufTXC7SH!mb_HiIP(w#VPfu~=)YOpz0W?A6*VEcp7H4a`p^XXkz{ zQ#^>0{@K9Oi^d>bFb-|b?qGB6KVhrfbEmw!y*YgSFq^e!&-a526yn?S3A#$m$ZLnz()6s-#_;DPQ&LckndM=+RFNVZ{Q|wt4722q2S&sj?u1ru^H!LSVJeM469*#MHcvW z;eBr6oC~ZXO`L7WPXx8{Gt9R*fquS{qeZ{Rg3c-@C~MTgj++UzQzL_V)x=oPvH#Iu zrihk`@8|P3uAp-_6Hu)3Eb7lt61>RA*`vsE9CrCE=Bs65=U!vHk}O00sXxGK64j=L zP9>M{e-M>03FoUPV#JI@7NJ|k<;pKWkIA2So9=vA>r;uB@{eP6PzGE-{u}D93VXB* z+OTXxHLKYVQ1o9aRV$1n^xJ_J2lWL6=~0$wc#i+NRDn%2yTmQ`+r$b+#FO$MM>_dR zyv_;l(R<+@@yv55*EiuaQV?jNZJPPb2ULm@G}j%byIZhs~@fCUyj=+uLYmd*SzGMrR>=0KU`&@ z;05kCB-hW9V4^k~XZ7yJ8`Uwue{ttWrpQuU{0;t#{0yA7UCzq-%qt8VWP)nH0x-Na zmNLsPFpVd_xat|jDEg~PlRbqw zT^IC~-K0O~GIhN*K#>zi#gmNDGNqSAD`=v*iZ=#p?n60?P<*gsDVs8F1x;OW8rsT7 zqJKgjJe<4(I{)2=Aqwl@kU|uuKXo9twm@Pb`rNyrLZ+oL9-C5%F(o~bUDVVgBiZTn zZPFC-aTzA)LGmpA=nnSEpbS@D?1taz2k4}43>nyFlYz27?pv6IzcmwZDKF&v7G0pK zo-ueYWGs%_!C>!$!F<`aOs?_cP%K_mf+G*Cz{Oti(0gM#IY{r|mcH@@!)#^2IQ^Gy zT))L6hAidBg*Bq*qyxBmyB=m=UrM`HSV8;CWZdq$4UevswDR_O14oXX!3~<@@bD`$ z%(?R(OP)<+^|~(jKFAB_k9p5Z?+(KC(c@6!m^HWGbsnoZwpYk1AEm;^D5kVv6j{j< zy)^v9eN0cMhr;F|qNj=ni(;oG?;)XaGRX4ey#|I#Ido!+bbsFnn_C_jqJe)HT? zwCX#f^l9ki9*gFeudoV}SC)DDgXyQiZcrAu1qvRmC{k}>igFj3Wnv=E^vU9i_hyS` zj87MI;}TkHPyy%nn^V$xRcv3=$`)Cb)6F9_Y@*~m+GLhT)8b6%xN9y;iG2(Wn~v8u zyjCH*b;I!0Y8$GuO6I?<+D*rF6zRXehuPmRBGeMz_g<=2@SVHP!az?BoDcEm2PZDY zHBP2@(QhhkzBrWBRT8o8Ss9m}Xht8yM&a|9dcys5HAoILvNJ>e(X~m(=>DoFbRw~m zI!up{)%85O9Wa~*wkNS`4HHSntAti5mr=EhJ1Q>7!_Z%jv@Pg2JA7V)M8+?;HTLsq ziTFan3y=%vbZ@YvNF_}2b7QU-9O3&~Vb1$wLi2u3;TJ3pwp=0XyX*!9a7X?W(xY3c z6dUACQ+61FcWD3xot{C?^O8wZ!}LT{lW5^Q?2ONMK;EwA4{4gtZ3v>%o$VzWmO;f zmF+F8!NL+7msy@a(Y~3Lg#LoHKff~D$!Rz}VGk~;p9L?o)^JCrDdXi6K4{lbOl2!# zX@#9O8pmDZn-l-Se|8;k&Gj|k9`+u$SKq_uv@UN(&j z8CXFhI=gA-t&#XjGSf2hhZN0mi=iRMj6pnEgPqwEgkuZ5h$M>z##l0$EH=ZUQ9igU zIgYJ+b(mR%%iz=bJ-9WshQIJnmGS~)LAmS}sQKR~r(ZF2ru`%~YCmV5sYo>wohVQt zkc_UKrcIkInD5cuyo&1voU&^?{dl6srArwZ8fvq1d#pd42T9lZ*^Ptar2VU(ScjrAMdaOewPSpH!+ z4i$fbaitgW*;-@x7Bw2@n(W7tmJsae`~!u)NswTZj4IcTqE6d(T=A{Ia_qkVVRtda zs&&2-zMs7x@9QU{(P0NXeNF``>~FyOfh?S0y`S4w=SIrw?AhCIVpJcuo5oM<0ww!x zblX}F9cA+9RYfP1OQ_T8P-*`0;7M@N<`4wE5m?kmV##8JKYdIoV-q)R!FEHw_N288 zO`GmV7Au$X``&jjgXsO1s}Fjixu6Zp*(sroN+vCSDn{cL^RR7yCInsFN|$vO(|<-D zT=U+3+OVmJBDC9R`QJ0tx8^;kGTDO7S@njEdOn-ItNjRPo))2*mJjZDFYIZ=_OSSg zi}1&rzwlJcksDNcgN*Cvkk^L2G%3)ZUhE!*@n4VAH|aEdh2YhpY-Q2p^g<}j?BWo&I{D@UZDuXZv% zi`>f`)z!#RYaJJ)y$cJgMzex64fr^02-!4BTg4mCvf8C=VdeI5iPdJQe3CltM9GmN zoR+o;UG&|K596LQT*2VS)fuH{q4WOoeK-33a?*a`!E+4 zSNrpwX;HYzXB-X@JdPb(`rvTeev+kG%_}sjQ+xW?VEh@Ig5nTd@tLOxm z{@ntTA1ct8-#0Mxp@gVvdm1fvIY#fai^Ycap3mGw z3%!ZDc4&cvwHjWWKZ(Xzp8#_Mb=2NCl4=V!)1KkTr1Lhgk<098_@IwalQxLrLl@Io z)fUm{6;afwF$^7>#LzVLA*nW(f^u;dCT)ma>tzM7*Db0cN7YwoI;c^NU z&Bln5humZb!G9?uUsp2X1x*aHx4P*sv>@i1LGFo5_}>(DtCgdVf?C5gL7$7DhcObU zULeokDtZQQ-xjmeC1<#AZ+}qZZ-ctkO`A|bQX04Lxn#3Kj(xj&05&Km@S$(^V$@d; z90x;jm9h+;oEFEPhVG;jRtNEC$UYYRcpr@`@W-{II>FXD9ygtECueRcx7Ioz?;lTR zA7AQ&*JuvA8fL?vjqjmopAPEn52LEOKuk>t!T;z!OZm5lhJAVqBNjb{%Pr238b2Su z7_6g9+p6GZqyc?Dxs!rp%h89W!G(Ydnw!X`y^Y?Yxu@OZAkCvkz+{5_WNTQC1 z?C68pC;i5q(cQ^|64|K8rIo>7DQ{CqO4vsDvM1mV@H_BAio(c%v+z{j#+o_W(=!vm}KAN9HiY zdWo8*7l*lP5(WGh(>Hwl-Z|#lx_aDbNj0i2h~a*W`j5-lcaXa%8H{tQ8Tekl%!i*; z#+s>LnTOO6dacEa8XQ}|D)u-u?(>KB`to=r>KeZzq?j*$I|}Q}s-}M{-^AaHbY+>1 zP0ZTW8Kza9V_omx;H1^!3gmJWFD_YXF0Y#ghx{*N0{{gz*DKaLy~ zQt0y9JzVnD>tHoR2ILm%;a#nCxZGw;ISvt`fSuPM_~94sY5ZXPcKA4-T@ugdut_ld zrx8t$R)g@me)il_&%=_-MyR9?%Y<~dDjW!_Y{%oq|co5_gl~r zumLWvw8Edk)-X6^Def3*j&9Op*ubA_tRr*~bFq!*@bVPPD!vXTX*SpPlukpFa%0*( zcrB(KAx>9JcYj59ihja%jL7cJn;e`F#__Ccvb z3GmAm@LQiOco&RE_h0TPcmFVRQ&Fv}`CZAj?2hMt8HSSogk+jl(#}-QnbW5Hu{iEx z5loNhV=jWH>u|FN-Prb(b6Q@)WWGDm+%MB`gu*!L%G=D|eS6Ga%q)TOW@HuaHS{?C zBs@H?MR7&CEWkY))MfP9!ZJk~eAY+gT(<_<^$|Gus2fyg7_#c@(d4SUfL-z4$%&a| zv-UODVbyUnd~(Pb0u4{Hl?$e`L`w~7S^uYIf4Dy_KUh&~Gn7vgP`|ZKdh<>8w!jbZ$w>XK(r#$wuv>?ooreo_K5Aa@ZNO20XDtVk780 z{>I(5eM^@=_|VS}C|g*_ z_6M{ud2daeRqX)Z%Ocn*WmT3@kF0%PDwo&%543i~(9--_pzt@GO|tdFk*7yd^p$7e z<-dxeRn0LN^w<~P2ptplu~&!1xWKWl>`;~>lu!E%Iz~Yde11C>3jRA^(LV6|tc?<{ z+-ZkqG`Y?k4La)!;a_VqH*4`1aIUUnr-b}<)7WOTls?bLEgnOj3j^qZ;4z%m-^LFN zzeamcHZjf31=Kjtm0C8Gkqr)_F?PG4Lts61ceb(@o!{68@A2qqA4>LdvMkCbpSA4G z$B5pS?B;kMHmoe3O<(nbjSGFkw5DBP0ZzzDPiP+0B zBL%-(x)`=aj=^rxaVV@!1B5#a|#?}ja6--f?{l-9fO z>xU9u=<2QYnmmnL@^Bh!+8aQVw=AT0{=@~rNR<7khqgBIbX;yMRrX4P>nRmnWTk+y zOJ-4QyBe;O?tz8sYFJe^j|JNuU=J#%*Q}pXgvn<#Y0N)aw62gNDfc^=zpoUY23-P! zTsyY-@&mSM(Fx|n2^!(f{rs{ks(4fATHgOKmpP{BV0h1aD16Za>vmp%;yv0pLhl)z z&zvA~IUWNMeTmSh*3FL&v}3O(D3I;a6s~0NM(mf;gtSp|=vq<+x^KN;V%$g;6TOE$ z9&-X4t={2lw~71`>w7e;JCAuphr(jz!?5(_6k(s0#2&x1g-d6q;hLResNH!u<~d)0 zwmEm$n8G~*gGzx8{fR~0zIvusIg5=L`G-FlU@BZ3UwLWUd*C}f0X6*BZ z{T}#X5x(*x`8`~Qoj!5+YOPe=#_kDJDE|uv-#u4bs~~VvKgQ$A>ftcu=SC*sc@4HZ z2jLH~F8F%>sK5@@rxQ=_P~e%<^u%&L-u{;YR+&-M{AC779Xp1h_U0JhG>&4T?!Y%e zZw*@^?0AQY@RExOCA|(s-3^&^z@>)uu5071ZI(gY4q--{orr(refepv9h`jbAlgzO zXln<|@OEAR$%Z@^ojoTFK51Rtm7)avmXm@}pA#|kR2o}e*#)M52T_lIAgy2JDssAN zg_*}!z^)0J)b(o;+mTVoOkSq3x3kN*rEAOazQ*U;H`Tvb{fQ)A_u?eDVBZMm)cZIE zsWkdyxqwt|EkoKc0o2+&XwDSD`){xhuAbMztFv@S?`;W&u`*ye-sF&PLdOORxiRho z(<*(+SKOB;d!H;`J0cg4)O*r?^K$UC(uKsOF}UW946PKq2_54*vFX1DxVeX8I-gI| zUj1qeK9Mi#R2@tWHVkOiWlR`Kt*FVb4@AY3=GUL zuKzjL=kOmygg4Ps?=~=&o=9b4QgG-(b?u5#HYgW93>CstLH6$w_;bjJ>HM2Q4S9p; zj8O>6XhLZ@&us$1M! z+q5pW*5%J7n(*HY`t>P^Rv1iV6JMX=Mr&{5rLAvM?HzS8uXqD{g^b0L<2B^_J&8G( zOvj=uXE^l1k&V2pM*aUb@vUnNkrR4a6jQCRL3unbd*96m<=%#bQblOYI>e@pAU>+- zKI=@_2=S47c!%>p_?3(7$mv)RX55(qiF$@9#p0Yi=9%jSci+jktq*qhv>8niY z^QYRrE%S(T3&dO7n}qMU1ipIl0h-&R(A`%If;$VyfYXJA!ugpMTM6|_582YQXQ4M( z55L?#g;9lrF#FCY&e!D&c)Q*ZvO6(!J83ZOU9^J?vR7i3trO0)9zl~nS%PxpuX9>g zg2DB7H(Yh`B{jJc{=Urw&}-=DRYoep(P^d3?VTl;f9f=Lx=T=t&ntHB;u8E}ZA7Q@ zRdLlN2PkQpgzvUBuq*!&kI&OYvPtH6U}Fefm0L@9muusglqPPFLOE;O@CzoKZQxG1 zxiYdGf=bTfa5v}^Oc2$uDOt+kyIGREw4<5b9cqg6)x_Yj=X18>)IF9x=P}HhG=nx4 z4Y%|b<%m{peasgbf+S3z5^Rp)6ij23A-0V&tC;L%#WheN)k%MDnKEi@sUtxC2X*j4l zgaXCXP~~+Xh2?g!;H9ged)+FUIX;R#kZb}q@7sK|Nj$>}9f4{42V8QMF+$)v&$$`P zUN04=vrE*-Z?-185t6~n_1cnx|6rWGY(3kbwH^XPWbm)KAFdsv$o|#ZQ25tI@V^y; z<45el_w|Fh1&>G2lH(jT^!w4P`$AuFlqv3YtUyP>4;wf7Fgl3M!S>&|B<+3~?8jW? z4Sfz$tF|OuZ0Ti12j<}eK}YFVYU50loB6!5MJT>$G`-gl{B_r4NyGI3>~XcB=U?)8 zUT+LFD*0fNts`C-JwniN^-#<|8q&i4ifSJVy85$xdTqG|D(qa)ajp?veDNGFm~OXR zo8rxK#+owg3T;k%*95W)nZu-xeS=jaOSm z5z`cEl|Fk?Wk4ox^)eQpjO=4?YdxWU=1PbkW6b8r9by|E$kWw5vmqvDajjWX3?2BY z!)M;>ga_(F@Iuxubo6S#do!+p-vlw5F(Vfi)|~{4kq_yZ;4S^36pz|^0*`4yKlh{` z;fHuKzjW$+m}6YT`iIEj@Ae21-#UymJc2N`N0U7|sDs(GW99kn*+o$0+#E0^vi z)IB+j|Y@l%dVSluZ+z^W=5 znS$pQzTwF;wlv-h&bdvXv#*WWr|OF=z;+*-_2dS-n^*@mlU}ed+V1o>nAnjT!Rs*R z3s<{F5tsBA!=dNXS+0LD?-g+jk+vm!XM!$vDJT>yRi37P!t5~LE z6qxAF!JS7Zpx-SK9jTnn4kst0w21`H_1%h%Tr{U=GJ|*M+RwBioM^$edn9*aHa|Gh z9!AW30Y&#XHn7f_St?Ycy__#bed=N}1V76_^eyhhfx zI(?d2#!O?e=gz>BYJn@`DGi-jd$IR}J}&ss4F{YZDDRy+zMnT74`v6kjl%yYW9Jp1 z`Jb3_TPDm|m`G)ED|oG4!w61h62GUE-*cpk{dhc&1~`3`Q!t{>znIXuavJYlnuEXY z?qj2Gy@aN^M_|@cF7#5jvJd-0nSGowdt_+|5~dqSQ&Qj&#-42K`(tt>pKzfxbF-E`?uHXV!PbSz@9GMhwcJr^bWe zc#xoNOEf^|<`R+)y$T+MyO{N+c>2CC9(;_V=)teu=<}-zeow9dJNe%%_=zb$V~rHf z$R7!h{^l^ZC-qD@q@0^;Fpo;zTuCWz1zeFUXFV+nZ2J)ney*zsJo8N0f#FJ`7XhQ; z{q|QZeXJPziw$P4$DiSv;=C#2U{vjUnJ{X)|KH80sHXJ~ihHN;w={EZz zctU2DcJsxNtI=?&G=BFmg8esKxUU%_@M={&n>irK{kD|j=vo7B-tm?*UK%3Ih~k{` z?VYSLyOg(5x>ZBoWpvB>3#`!EfjMU0WF7Cwb@gS!=XfD=lX(lAu1eymrC<2|uhJ~$ zr#Ow8hWNO{57jNs@E1?LgU8GKxe&w6WFB!&$cY-WnCFx4mVsrm82^2Tz+||Dw!tuNwoVTm&fu8n95b<(=y%x^N)vc0fT^zth?zLd5%?$JFH7$E$zE3oy-O=Cc#Zl4)NxOjf>lS8{h0CKe)9};NXFqJX(K%0Y0P&F zDdn`x-*aQVts$et2-6By&_LKFcA!gz?J!T`u1Md)?=ro(0G9EUyB(=GG@63=C(K&H zjB@7P;njAnp#6I`vaS2#*q+pUHs?zlXK?WhcDN|wr8Z?6*yF{Tqb~DF_D4Bmb2YG) zxWpG9EM@mqHsb8Loj@-Fz+>P$XLk1oJJgfUuCxmKp3`%MEN2!`)ajP)PXaP76LT+q>Ykbgdf-fY@UQ#7M7Y1KZg`z*_j zhYi8P@B)~~D}&LE^E7ow1k@j0H-^*xH4a78c};2Y}XE_#xcd*m$k{9)%Y|D_tb~mA!G2u3!%So zY#yBo*vL5+%3Hme?u~}i6X|uH1l`hArG-ZFIJZu&Zu^Qymd^&}^Ey+<)N=NL6d`4U z`_otAsqM$$@x9NW^~{3K+);)QK|3MU~k&PGnto-QnDwgKWyB>&!*2<7m$S&3|Fw4kM%4Z%g6nvl>uq59SBYy~a6NQC* z_n;woLP-i9T#q5`fvYgg_y}9%Am|;r{Y)n;l=LpnBDdD5IPA+4cpa_=YsEDwv*Zq( zdZR*3wGGg5(2})ODAST2XFh!AJ|<@!j>SEP>F_ZbGSX0`cU=jzrr|Erk5s3R5-*^| zY$0aNJPL2YSMVI%6G_N5u&vU0XsRed8PY(JFBzP;KOfrLv#_V`xZr_XOhZ2J!04J! zeB{UiIO7}&DK`yK>)<_h?c-?bT5$_6+RCtg(Z|}-z-%t+yy1 zf*K9eCp90#hT{F?R<6u;jKI_02;GV$FlKBa%~+~RwKJb$_o>UQI=zjJl88q$yW8x! zsy5y1*h}$&!!W$Cnr-njpv(i;A#bt3bZ?l?t9hpJ9`6Rz*JMA6%+g|)yRUOj`v5O@ zo&x`S1MJX1DQ8sOB>H=E54;K62NHKa!qMSg=p1$k7A4g&&6B>6o-Ip`Q{pJ@zbX)m zn#R6(ZbD@v4YI!>hwW-$)Q`0suMK09s<#lrj-x|c%>w;UmoZ^Ct&H}m6T z+iMqz2)=DU#B%*~;GKmyW(EuMky{`yqnOO%KN`}9#RFjQFOKcMew^uftB~(QBV4y} zJ@4_TlwIF&hW^*Zk4b+dPf{yCYmgDTUI`R_!F0#7#n1JB0ao`r{b z!9S8#$wqbTz=B@JT86Em|0YZz>$UG7%|sfT)rZioHo<2T=!1#%qiEdz8&H1i2RB^3 zhLh;)V+%$(Fq<47bd&UlmWQd-ut8GLi-V|lR42PXwG_^)Zo%(kWT;zN8jlXS#hMlg z+~;R%XdYt8cDXh~)ZMG>!bx8;)8VMX&<;Npi3^=Vk*rUq8N{C2lF!P8@Ub`zVY|SS z>$(L)7HG5L_qkN|@glo+c}nfc`#rqmp-8UY={oxrd=iGtA@aT_Q#WzOU*;I6z}C%c zW0}`?(zz2#7+*D)W_7Pbdm)ei%xyT93p3@bwNd19qJcZ|CY8PF+D@NqvUvOdzKS|@ zRIwHRFbS_?qMO#Qn2zfsNbe|O?^d2*W+|2|_Uusl&?{(GG7JXkc~EnHEPRa)1n(Vs zlyl64OHhyJv(MR!#=aI|p}>t(4PC=UeL2SIY>dFY0h3AQ>Pm=GUxqz_rg&l7SqjRS zNfTBFu;7aWd~Tu%E!y*u#d#@H;Ii-hY^9-~6*LVWNcOY-Y3bl|R`8OJRHJp-VeH4{ zPPkK*!70s`7Jcc;!0qB&XT|H*E6`SlZjTtc6g!Y-T#Ma=oqQr<_Z z07MSk*n==7T$L0-pSl8}>%l)r5xRaR<=QiID}DNI<3m0HjyNakH|VS}pks>;a`Rj&Z_;H^!JE>R*9bOK2ZTWI=wMph4_37cky&+)jmP%K>g!hbaeOi|q zjHd!-K-LEhc0#h<@`!vb^QblEue>T?3PJbaNrAu~`F)$ae1O9vT9NGShD-diFTUK> z{t>j(E(A_e5{uTo4WCuKS?zuYSZpvAY}yN$)9j0I+q8_GyPbg#9!g->4-ZfrYF>M1 z-#XS8sYNx;W6-}u@P#SNM72-qxN@z~iE~DqO|Z^^)w;3RcNU@lvkzqqE(D8-2VlWX zd3<2k4vSMaqSO;P?AH|hW!v8KZB?gPnuQKKt@)h|tZ}B+BZJ`8MpHI>rY(Xrha(BInmQHpsy)uxcrC~-EPn!-?Os+hoUnLr|RwEupvVw zX{1q#j3E-vUQ31~L=mM)rKFMO(ufQtLYjn%N=Y+`v)7W6N~vfNY1BkTN&PkV?)Re) zxh|c3o&7v({eJh2#xtjo*34aesnnbm4~)Qyk~6}rv%krz<7iGD%NV?*m$0_Fl*`pS ziYp}Uk@}~6vG9r#R}I#{4sj}+aPkP0WLyLL={k7ipFO`i5r_-lE#km^-EsatH6b#& z3&vP!v)=o3x)X5?Y9AkfCg;9*dEQHSII|BM*+}ffA5-{9dKwLi$)F+A{OQq!Vi>qE z1~2Olpjp{2yuWA}MZa@{um@e}qk%7d@4f@fQ}^-AiKD@=s#>s)I*Er|)uf-5q%gG( zi^&Th%zRey*T5Jt^MDRJg^m-_Cb!_j=XdaL=@N>r19nSLgZ1O*!2Hxl@X*SZdW0N@ z;;W-r<=+k39KM&-R$k&TLv0xN{Qz`$GYikGDI()3%UH`Y6;)NngYrL_Ape;|($pVf zXO1ObMQuD@JAxzLU!nB-z2RbK6IvYltV=*JhuaTX@;LW^sDpE10zJ zlVJU~JJzn>&GN5>fT=Fj-fyPddg?yeosRK%_iO}Z#(amln@hn#i=eBzDyPPtq%Eta zg2P*aoBJo@US&tP`*b$;e{oaDZ0(1EOLxH4abc|bavudwx(;fcqd4W%e6jJzUrKt~ zmFGB=!nDD+xL0u#ebqQgQKiGUeScr}Q1nC1U>ocn^cqIrlUQ&UEog~?5)S@yfo_`? zVg9cW+WvYu&pLM)Z;p|2;?Jag-iT)L;I;uAB-pct#H>r*k_9F)<0$5;rhL*J6`Xr@ z7`*uQ0B$H53v1q#)56E9Y*T)KOQgQs_b3%{+qW##^4^aJmi2`Hs=5`wTE3Y4oWF?f zJsRa(K90rV#nBK2Z|R40j!kO|QF91sq;Tk7j;I<)YsJwsXR z-Wln8I*%V_nSkP9JLFU~Y;(IIhMmo&@|G9$)+-6d`6=Rk^V6JMbBZh5x4@|0dvVR} z?RG_0(Fe5*t{r2ZJ%b8kbKjt2hW39^a~dC*hkFz*&M*&wJA#w+cF z;YCZprcxbEJIT<>XEE-!@Z*T>eq6RJj{h4w5RdsQ(WCE)U{G%Z&(~ORxN$y|zPKdN zDKqRi{U+V)FY&wkbz=Ko?xdV%FTd!g1K-}H!A9R&d-3{z5*sTTd_q&fYX-O=K(asF>uE>86 z)5Ni%ljI{>^rReAUp`Vd66~_(QQ?U^8uBfG#{}dEGYt%3zk! zXAACe`Tz=*I`sKG%lFik!{l3*c>J#h`>be%{ViR&c*zhf)!GY#@-!sYaU9S8u?DAw zG)Z&5BdDd*k4%<*mY3*Ayt^VbNIS6|RSK0cWaT*YJbr}M4!I3Y0gC)}vK4m99nH%P z^gv^+278Qo4J|G<;<^XFXfQib(cus%oFv1kHAkT=z=V%WED7Zi1!TM`l{8fw8FIr`jg(jWBtz7%cJPjQHCZ`L}tUkFdJqZJj} zV%&K}%vhR>Mx*25@_`sG$xoy=W+U;J+5uh{x&w*>yTHT%17UX;DZgrY5Y8P*qjNo@ z(TF_Jcus#dFLmaL^&@fdzhJNwn_=AF50s~;D_l$KO)O-^3;9Bl#)(}$vLd97R@*WK0=AC+$5>-Br!beB`` zFu;gyRRxagWX&5ko`v|Kt4QaOG?TIQBcC&QaQ)6#x|i-D?N2)hyT2~y{!-U*L!ccU z8PgR;kNiwyvl@zfWL@XD&B`UF>H~#5Ed|>M2a98sa742zo>+55*fu*&*iL(3K*2%! z9AShzavFt|dne-;=P_8GHCg`Xd7JdPxx&3;Mf|pLfzaCOMRO(Ry|cvX3A+4`jFz-Q z#I^)^!GF)`^39Iet=64w{|v^%(jGx4yaJO`zRP{2{Y%C3X1@7T$_geV^6nlgC28YS z@XJG~3p9Tq-jw(mJ?B~CuY}z~zeSO>FvL|_<}Z=|@Er-$%D2<)MWNjD&n4JDL|Z(3 zySMPge;1Z!yrmRlX-9jl02rnI=Wy7^!^SMsXmtG-+wC%yAlsApDsa2w+=XC z>3S)LR!P_0hX~`UQ~5%G3gdhWS01_I&UJ~R{IWAY^evXJt!jjJ;|f8yXgtereG=1q zSYtw83pSk+2Zg8Nz{gb?tp^$5?@^t&A#@uT|6Ywv3CDm^Mq>6`H~e_#f|&9rf~Gdd zz+(G6!DDD1{7rpAdds!ha8*C-*f^0C)ve%^H~?zhUV~<22QhXgVo+|6wDfs4a+@UOWPinrw zuTl@_Ja!DMoG^$sn@*w9rekb=F%)Ke{!Z6dX9(-t7DAe`K0Y*66i(U=!iEW%5Im{B z zBv$lR5(XcSC7a8OaLL>}Znf!v`C6ChnBG+i?p7@XAM~dUvmHt%&R{9~|57-!E4k>z zp)wv~{F^2fbjG^7Q|bKR4DvVHPaE`}N~~gEEXWPy&;gN9kP?Pn;$1l?@+B!8nT3m- zZQzaKbDHqL9&IInWVOa4!oL=Dyi@_Z8%jIs6jR~8mlFPn=mN<@kH}o7_JoRaE5UmB z4!rHLln+=P<1@QfgL}RX>HfGYM)#

X}Lyy=;z{c=IIvXJg85W(}g^9&@<#q7Gg> zo&=He&f?&pHc+293$~oN%x`Ti(WCDZS~XP}oA=BXJ|4XQdKV7x!_>VzG*ZS7SIlIM zA!7vR*q&mnH{twqo)}(q7+>6y?to>bl;2}3jv4oeE?)Z#M{4KrD!ab)^yeP#FnBfV zt+p&#Stf^n6UyNIhu!%3O&sW0{Ddd-enKykj-1_jB>#D8h&tH=1wr)`JinVk*@|Ou z@A=0ctF+6>gAJnJ!pI&~a3;(dtJP8k zhX)d?C4M>28?!(-bs6~4ty$uFUy$9Q3OgZw2D_^8E*6DB#x?R}{-){8Bn z_5(T=Y{S8`i8`0rVXMsontJsmO>qZyOiQK{f2VD^6~PbZrM#ub;zZ9zAgK>*E+c#}}%$nzF+8cA& zgdQEVd6~y)?4UATC|`7~xGuUQTlQYhhmFgHjUU6o0M3c}0R&;K26(pVl3=whgjL66 z@>RWwFf3IM)`xdQSEZ}`Sjyzw{5wcExYhw*J4TXG&3L@v5CJ+@4$y#pb8vo6&*H8j zuCVjp19|m@XF_Xa0Uo~NLbJOkVcCYGY~Og44h+jIh#Ch2xcr=~P?YnU#CHQ+&7klDF_M?kO+@p-6J<84r~DgK>E5{CCl< ze=04$I)$u09g{DsIfxpXXXs~`H&`p~#n#>#RQYAM2rEXT@^M4bOg&4#wx5G@)@ppl zCl;36`77?beu0KmFQAq;3s85sKBg^wFJe;xoKn3a`*z0@4{TDvqht0{_vOP#H!cVq zcgWCmt|NLhAEXy2Ex9r+kl$LT)As8nn9;WY>MHtxTyn^*3qMbe+NQj2jRR+zePD0J zII42e<#R?VG%le;aymSNGGAkyuyqcVJeRo?vSxC2)lIsBM#DD4wq1dAa0So15Ch?_Hqcn_E#i$X+o`#|QZ}_x z+V9ldCja)X{NMKX!nTF?D7)DN(<@Ujd;C(=KW2ydxfu|9w-IcQ3m-8!Ln$uk%gBd5aqGX=F7+W2#r&Lmz6Sx;=@!C1W?|kFkJGy`1syJX0ylG>A*78Sb8!_bjcB@wo#Ea zRqNrYof$l6iWFY^$8h|v*lHD3&eh( zp29f8Z0rh>d#2ZFXj@sqlhjjTRF=fg(I#4SrmQ%)bRAwedPX#CZ3R2my}a5oP$=7a zSXeOq5e&DerM6Ec>{ip6KSZ5E^(nL9YkLy~xzTsbJ-_xTTCT}N_^>CZp=H9^bMa?-mnoG(O1QTX?>;+M>LR36p> z2d1g;vy~%Q>x?qS7w(3wxfRgp=|bjTI&;GFFbvD>DY-~{iXBo$37TCasVOu|xIMZR z{M(Y?u8IS!`lZ2jqa#@HqU028GlC~K%TPVhAD`WMAeP^*5!S3}ptnj}F!|ClUT3S% z3JWzPZbkrk3@qeHhcnP*)=Esc(F5htjx2k#2`ays&^Yfeg13|@@C(vch9D zxR(b!XxK-oVF&2tnWu2;;VwGpok_FCD>(EWeOz$%nZrIkoO!(4C9Dm0qkWT{glVaL zIICZ!@GQasH6NLAVP}+hOz~jewTZkdhLG&VWuX2J81JD@JGWf~*Pbl?coaynb9M_~ zmu{z`#13FNTcu=INjVp;wSe9GyRcJ(y2Rv~C?xtvW8En&ocyudg^kmlY3CJbPnf30yZ4`mw03P;lWT>(6VAhIqw7?# zMx75QDAT)xN721}q!2o1J`Ktoi&;P4lKrd?)MrB=?wgV%yfI%xi%SR7-t)I<_=eed zJhLy@^@=5DGkZ36`9i~+9l;xvG4)3hXey>+(#BozI(aw@7-oWNPJ~cHjt*~`*NX=U zOT{u}2U@;hI<_qJ#+6|mAob5#SQYp{rug3;Sif-%9ZkFj-CVcB`N}$~?p;GZbq=D7 zvnnaYZ;(%pzb*NZJ_*SR1rWE}30`kqMhAv>W(9{>!NXYMz%Dr`WZcr>RbL~~v$_Uw z=6HNLe-BO`S0tJr-ay^n*Yo-=$!zLwfcgLWu+dxWjYm}H>W}@|G5szJ5$A2@w$`#Xt$oYOK-rkxEUw57trj~Za{j10E zwqZ^@XIT~wDk%j0pg4JHZ&&zIbyR$k*%Q?^Y~|FstE8Oo9L`T(0i-)OVUQbG%dKakR7QD6FgIyHDc=eqt z^w!Tp)bIa@F4v4g<STkUbJG}1$)UEd~w~Cp4ficne@9yvFzC{`IHzHbbm1u zF5cM97nZHy>$=~CqMv&FuUUl6Jb>*ES97rYF*c4=P z^xD1$J{)d@4yj?h#lI74N#AQF2T#L*L;5)RTLGN#H^Q-l_P~sr9eC;LdN@(rMyDg* zP~TL2%s(&i$?HAvfyPiCZ@XSNSJ78orul*L4NicXu0Iz?o}#Uzp2~F79jIf5KR1v2 zL?=B4@UcZ5;7#{vaYL3HJ@i+@E>9NY__rRy`a!|i7=D9mTE- zJvjKg4c(D;Pc!NjIDg|rI43cN`UNKP9j|1zZ`A@T@1=aBVhG-Ac7RAzCp^E%iQc|O zUOYz$o(ErLcYP+DZ4=hhO)u4YMQLF2g8+O(OK&p-CE|y_5Vc)QMZD4(aF6uWXTQc zyK+7cX^NwH&rZXX+6?L$*G$oB5)1W0xzsh)VZBaOLh$M>p!vc_Br7C!BECEr1b@Of08U>O=gq2EgQ$#4t2 zez1k~<$Fn`K19gq1=9209X+)Ni1cgXFG%YSgUZ-m|=9j zp;u`HOVDq#O)F3Fl+rPH2FM~zv_9Ryw@!n(WjV>{&9nYCwHT|Pb8&0)Z?uNM`>+D zcMi$6$D|`k!sF{++~;AfG>5nXeNOxE;e}x|sjW>A7w>|K`ULjsw31BP&BXT5Gjwfq z3RZ5(#eKOO`MlRE9yMqO>kTa+jRYIqu|EiwseGgEzuwWGwJQAjq$34yoK6=q&x0vU zV6&@ZSY{i}TG=T znXsngAMwpEXZ#^$;V-Oj;=(QUFexH}T8k$0s#hmj+0+#WPHiQ{1qr;;BAqWyuZDR} zz4?=H5VGB4!R>H5_%Bn$AJ??GZtpg@w^#|Q#(Prr8Y98&qc2&=zKIPV@@aWDInC66 z1}&>k3SYj}%Cqmr3wL_`5+l(WzGrmj836_SZp~BLD%ZyS;|5W_rxv>8^h5hCV`$x{ zKB!tW1kYNv2%B_9a&F#z@y#nQ3e9R43a2WIi@fcb?tg=4i*De5YL1xi+<`u4nxazx>SN)d>MTf|a8x)v^&k9a zHHj}5noE2IMQpm^#{8&^YmS-l*;Ij>-}ypHP%kWYGp1zgWIX*)6S~>{6TBPl(fEYR z;2-}8&aaa3O%F9*a6&-at&$rux18@mAD%Zc1D5o#jx;*uI;8Pglfu5fRWn>=IP{3P8^XQBW{+535Xl0770nja}4_gOnrTSGhYI zPBg^bU-r|-*LLE?3#xeWZ>xAEDUb^O>cOEyQf@%yHkowoB|CL=E{*SM1)sV_kW$4=mm5_630Um^@nN@Z`?Wd5wb zflm&uqHj8B_ttilmQ@Njj<%4_Vj|5z576o}jzZD_&BA=? z#llKa?aU-v_bU}F3@1a6?>@A7?q@o%X&d@?eni^aXJfRdlm|$xB(0DD+WB)9&Og+P zn-^V|d0R6TO^64_pAJ%fql5;YIm+SNUJ5;?oAD9-SYdI~O9~0e6KqSj!f?5OV@wi+ zz6I_StUX`s?;B3f{5H|FmMBb;Tn{gfbd{f|@Z)Rz4dQxjl6q~OIQWs|?C37d`E?EO z+n@DtGu(!|6}%H3d7LH%R|jGI;5BGyv=FPk+u_(RdroPIrLIH9V%kSfp7iQ7_wCvu zbWq=eGZx;5Or2rWM>UMaZ+g6Q<##CiI|;Aet$;K4?NM)jop|S9Htm`{o+1uU!l1(^ zg@opnErt|-O&F)cg*>CmCk4y^4{`7+WYD>e7;~T?T^;N2FbBxwQdS~P1?c_4Wp^s=H(vjFGZR#aL+TtY2l^7^{Ec?j}r5?Y-_XVQdt)Kme(q*{Q#sb$5 zOp?dH=+4Kw>Py{4btu7aLUxcde|~p}X!a%1&stlW$+&QS>}>h7v`AWEvj#Su&g4l+ zUf`$R7ti+1p!;)5x!~tVsA*H@TNlfO%*KQEg`2j~_O>pZp=~Pvrm2FT3#;jE$S*S2 zn$0s`^g+kbHh4jD03~$6L@phQ{q43vr;KvJvZEFoC9Go~?+?P= z;1uZK_Eyj{PNlGs6@099JM?}29f!!;Y5Si-UNk?7pMM5c_jiIO**O5cj=W~sc@nj@ zf@l2}NXeQ`^)_p9k=%sS3R_^DPXVn>-++zXQ_*ByGG|x5g838MNHp9_Rw4JvZP79M zTCLCO0iC(OI0l2PLfBv4g`WLz!Q8O}=<*axj`$FVio;G}w~%tOG`kOp$H#z$&gcv4 zs?0g>$bPDNv<-4HhVhUM$$YBU1dJah&0WuIqN`h@FlVqO>TTYJ7kzrO!O16N;1Uf| zBM9nmneemK#VoG-KqzI@HLQ;cb$O~ZcD)U^>yPC6xtiEz!&{i@d>)i{To;dCsuaH+ z-OCfl=+L|O`)Q-Sl=Dqapsa}gkRbQKo;jKr_%r|wz01KdBZl3ACEx0nQof+BPbaP0 z$t!gY&v?|6Qd$Bada{PZcD2El7EOmV|BYh!BaoP@z4_{mc4+CnkadIa(<{k8)4snW zzZv?Is)kB4^5M6ITJ=cW^(U4s=1;|^=AocqtV2V69nec|inBWnM!UhoC}j3j{NiLn z-nyUR=|343jM2dEy6(jcsV|4uOXxhHO!y$%}>E=1D;9XugF!b_w+v@ATD@oHz>u{s3q zXn65t>He>7n9g%&^+6%Jp3colgVsf{m>4{VhmOA_9zAPGCb#YiRiV?xyS_KcTwR<0 z^j(B;FF(Q$lRAh$c#VuE#?unLH_&hSSghHh!ij~0c+Q4ByuLy&%eIhZ-7GR3Ex5UR^P97U1w*9B; zIJEH$IJ|yGV|~r&^0$H9Yx^@Y_*PD?r3vI~8wH9?dLF|+m{=(Y{Fldbz>3@Nt!5XEY!xkdnRM zDeQ|O|Gm0ONa$0KKiV8{#yNfVUUVLz^G%3TaiLExRkVFjs_^iaBRedNgn4)U+2Zs* zT6MPywisuKGspiHO7-$NCnE&E*?)teT}NQ0pEh0{5ya`QBo|}v<6PolBXru8KsqD4 zV!_F`(3CcwPk0oQozVx#uYW04CeOiO7bAfO?h%Wn{@e13I}qpN1YO)@!em83D7-OA z)b~n)4D3V%{%Ld^c2pnb z#Tid!ml~tN^R^Lm8rM!~=K`>5i5gzgIuGk|9VGrlC>~S#Pb?{}Bd;Is>?X10WA^+4 ze6RvjoK3iS+6)>TXojb59t8D$A3^?gH+|eA?ZYB-pkGD=b<2JZC712++P9%(_9PLG z1cvauyA8tlh7~yYkTP%en8}ILd$RAPfn5L4m7Kh`L0s!wQA=1rOCLUf(zp+hZRUU_ z-vPXuFTv=zQy77jUmn!Mc#xXSec^l_@DQQ2F0xp0l7oiEw#KQEuwwi8bC59oP8 zkrx*4fm45Tz_^E$M|hA|Y^0b6ncL%VW4IlcoA1GIvu;ABr#jaC&gIPMY68E~#BCeY z`M`5mGP>Cv{ah|n-Cw|`7b|GRjN#(MmP)wuv!JL`+X9Kvx1Wc_?G=^ptHJFmOWOEr z25q`!g8RE3frih4P}WUb9$RF>?xh38Wwt3`qPs(u)2UGQ{&@`zJylK3f5u_jizVWe zdHuLoljJX&vxhP+sPp3w_uRvxn?g9<D(oD9wf}%AoaUsa#`+ixr?ggdo?>qZ`$?X=!7&llx&Dk!oumj zfKNvb}QUZV7n^c)o%8`@L^0X0e zg~={4;Qe1G?&-asH%a}bF_Uj%&G}{cXGsIJKetb+FyWkJI9 zTpB%72E~yB`7Umt*55IBEWiuz`r7cI>fW^NuH>$rx(63eu_g~2X0?xHWVkjF!}7A( zM*FMm{Z3`xuY83z=S5&+o+p|e@!`?xd#E=2mvG8(J0y+wg7hgJc%POdk8(|+osoUe zMg0VOG-Q$Dts~$d?1kddN%&z{H-6(ho1gnjxxfKZPb_C2G@Z-jW5PsERkoMj{oc|u zF`o4^cB5W=J9*TffT#O2g`#mo=+814Y8%G!q)bi&&$NJ$ISt z1!vV;p)UI_tt<-0kt=)Aul3_m__ayAlBdb#t*eCH6QgnWIZMv|T_FrwI8}0>y{4t5 z=i$5dT*{I^5gb>Y!Z(pF*w*tNEM4-D&K&$?UM>8YuoTJy2XIroHAmbT2_eNrBy;eGH#Z*%ZnX~t7vH|nmT{H; zIiC=fL$c^*XeZt`V-yD`dgGI!mh||10j=>{i{T1iVe9LT_`2GZ&#Jmow_F7{c6bwQ z=oLoI&gn4nSRb@{)d42`)|Fd~wqz&&Ym&8HkBR!?YT?7G>FFQ)>HE0ayS_H6Z$RL4_&3~-QVIYP|_a` zQ~Xy7M!`|+ciaf>Pw$GCPMw7Xt*Z2|{xEIqSxJ9-*`k|mDCIPa7cD03#-i7?5^LlR z^?C0GMSF+Q?oR{QId>k{+}Fkg$8mV0VKz#XSvaU)3r>}#kk&tkR9en*$-R!e?Q>Vq z*s&EGirlc}Rswu@xQsKEOeBA_9*)tNNp0LJ8XNfW_1nuK(C{odYuaOwQcM-PCLBf>9sijJ}9>Gw1OAjxLzi zQHlA8!3YGt+Q_DYHx;nQQ zEC*i^Z3??m*{ubzC{1F{MOMR+N#DgQkBo8dlmfasset~5Ph*)lK;ppYU;QT7$dLkd*Z_RmE1x{U>Ey@+$zlV*3`eCBYUGb#k0lwaB%k@7^*)nwqD__ik zIhQJE*$!(Ej?Tb$yN`@D@@-kvjupBE26-HK^Aq;4~!e}N!Hi=q44MDTJTywoi?nq=ft*1uC)T0$=z)@%}t-&?>MpR zW_vuh_5#n|qDLCnx}v%FLmICAM+mg*V{cw~S2Ti4uw&5@?9)SoRReOcZ_Q)4te;I~ zSNn_3<#uGPyOwR|jUks1>E7BtUi`Q6gNXB{W8j=jh_RCk&Q~WxL|GO@f2?Ag8xdmF zpLg_3-%fz-N9n^xeLU0cDQ$Kb#xo)|nTH>YI^!e^lKeLvilfkKL@2s7TjR1F z_k?+mr8D)!NO3_jlIN5z*k|}L{`92@Hu&bz1il1op6(W&?z2JFx`_~7(1o%-_oU$l zOJSr_2kbJZ2<(RFQ>b|bog8}re$zO7(JzjdmK4#hn0@%Fc%PI%kT?@&C&|C8RdgO2 zDS8iz=i#p7Q2DS0*BHl(aZ66Xwq8Sd-?VSS7X=42*gq1-d8% zXN8N&n7k$ezTYzEkwHZeuwf6R^qWMA#S+&uPLGG=I&;#MEciKSi}=MnMm)8=18*Cc zOvgga(Y-xccJO{VN6of{Z4!&pU27ipyHLmm^YwAN`fOf1_7hw@d6(Bk+OnBzmT>?0 zV7x!8U3_p_TXeF2O*L&!=vU#-B~!NxAC`9EiHlEC+dmW0+i)VSoh|*_2`jFeKm64J<7Ae&w^bL6oCouB>CT~2{hz3jK7)2-W(nJBlqJ9PX41bA!T;=1alnca z`C{+BJgIRD*Tx)x^ogBu=9PP*Ppvvv_(q6AwE;TLBTDx@3HI}7#2^6O-!{PR>If|9PD6&bDTWZy)W7Jil{zYd8hviDJH*6G#<|G%-a+DlJ zk&8*uOzOTg_2%LstKp#d2`*GkC9NKb*ex@KR1}8stIg7$-RYmu_Wp!e-sw2~$sWkx zCHCD9*RSG*ftFmF*`MpD*j(l*REs3h*QE7I|qbVJsxzifF5m97NhMF=gwZZ9aT`2EE7LV%~L=wpr5}VF)_zoi;ykr*ZtRI9Udnw@5wd&a4AcmXm zUeL9QvHb0MJ3v}3Ox1o$aUWjNVI>oM*)GHTjkUtD-o9-7VW8xq>JRT!-pOyDi-9)i zD)pP5a+Ogo&zo=tN`Edw)ut7={I)q{4OS4Q9+)bwIM$bo!W3}b^GM0}Rsg-f%w^fZ z78usk13lix;EnQqV(;$2>vaui4~o*N%>BLu;kY!xEh4q z_ytABgt@fL&X6ZYN&o*U5?ABJX|`QyBR85}Per-s@VEM0R5~{fTh?3^Z%^F;>z$v% zo<1S)eAHMz+-D76bG}QPW=sNw3U%&91@=EipW*mqYxvaL0nKkc!GKFYf|E7qdY2qmm3NadZQoOtv~$^G!uh{vA0N;ijH5`TSHg05?m=)#E%*;p0A zO1;zUVXJ_lE1uH5L4KgC)DN^v6NL?4W4TAhC#ZjrBYUp<3R>5`r$kN#kB3({W#lH_ z?d-)>n$cW-{wVitUIp8)j>cCbOyN$x5%ga-AyDqYj^ftTOQLF>EyR^eorj&Z z!uZfcPKmUjcr!%hf0rOHUaiDOu`ArJ%oN0;DAX7GVEFgH6x;o{7}xR^Y(6IoFOm}B z>Q_5i*2^${xTj9)>&J5CxsO6s=2JYHKT^o(p~PxEp24JzyYazgLlEnesHXOvOmX`; zg29c@ZQE+TWa5ju=5bu!Q3LfZ|0RbvrKF?r1J*-bA>k8tyc&OEj* z9eoWlsN3n`;^+J?@L-`QIz)t#^1=Ibiu*G)&tWHn!Sv(5p?ss(70uilDM;rBEzL8f zfhNu3Zree^?@nVycaz0@;%g5uf8d1yz7IsZqHD$XPOPC5JI6uz^KzI_6%G0pF*t4Y zOH|x5kZwkWfMx3*8oApKSMO@@tA94QeT*ek|LubB_xbW<7ay2Z)t>{SyQ20Id#bSBMBZh= zX!84#ux3skZ8MU5&a>v@NR>7kyYvjz{5VSsPOYW)%|&E4B$A^xkD;b+4)OyNBBkEm zVA1klEH3eocE|6!;GWHmG-99RVeNf_4rc9ORp}jB_RImJZBOHrA5n}J{b1MXRgivd z7_XnGg!B6A!`)TNbYYz;+M9{6V^AR1l}X(5HBq>vtq_OlZNS?Mg^^?(Kag^2jIm45 zATei833V(NP&o9OzQ4Oe{+^dOeBL>(>Ae)g+d87pQ0d-o9R#~GnsUL%4!xW%2e2YLNEp_c?8YfZj$!f1gcs78e+SI zv!ZIXc+YP=hEKRfQL|p~qr@|OFl+=5==Cir+OHVPSe1EiKn2oZ5+gyy_5uRi*ZIv6c1C)<{vr9%uaxc&muU? zEtwPDngy3v^Vuj`6MIB|gmSxb%pPq+^LN|h#is5K(;li}s=qyl9kyhx-|A@4{9ZQk zkCbz=>xA$BxTA0HClC_2sCfEwl!ZEk(!j7q6goia-k*9&R!Mnq=Z~vsu3W**j~|F( zi{@fu`cc#=F5;fIbRg-<7E*9Y$8iQ*;bz5lHgy_;SI0euzy3$bV1NV5@-6W0@Qb`@ ze0TOcD>=iYT$rO(Cw^|T6s}(n#VGrJ7(YXSJ}(YM_wSu~%K9oHJLVf*X}%2wQb*tA zK{~$*h~*V+%ILM@0c<+zBfMRcM;D*F%aXEtp|{s#SQ;72i3c{rjlybh9siXS6nY5_ zujUK+*RzE?r&UY*E8BVBn@8lXqs5*RN^tKmE4;$(bmv97*fv0c&jv|3X!~(s*&yvG z5)E*#BXH3xLz;j6G53FZj^})RN4tW3+57HP@x6kj?Di@RA@{*cm^CGkQrvrCr|Kq% zi!Y*Ly`lK-Y&k6ceVA7@+`z3}JK%@Y3LIs)j|bG|QisR$VVZR2Y&B7()izRR`+Z;T zddQQ$^;72^)AwP)GHb|*-!4p(a_W`hVth4J%5`@wht;pY(0kQ3Qrfc^(prwgyn7L_ zp0Cg|AsUJ+FwO? zM+LB{fgCOK{HWA5k^1e(flo^a*KM_=mIn#4F>f^3ZEPc0mesPEjY>(+@d4PU=Ky|r zt5_%*Evegvy5pYz)}!v(r?hZF8b9hJ$`hAppwK^wo+h86j!m6uLYNj0e;+G1ZB4+^ zX#%7K%%z)s_eqZMrSxBj87=yjEv`||6la}Gr6rTJa7D^inwB{Q_x0+D9}az|m+FV$ z)VNc;>)k@0JiVuEQ+Gew)8nq(Y_vIsN}VsAHR?FfY79Hfz94MKngX^{wej()IC!6X zfORfLa=<6aJz`t~>(3tJ*2;r4?2{MPv<1+pcfMGg?JArwn~EB*y>X~fA92Oo7nGxw zO0Qo!^Hj$;VQ-5T{u?}7;u1<+yL=_+eP#!&Y#+&PKQn||v!y=4ITL)>i1kotK{c!0i%@Ti}WFf+paO|MqaAe2d^TcO_P5 z{DV~i6Tv4a27apgppo=G7$9*wPju_c=gdaZ{<;p>%M`Hp)D`$Ge*q3%Ys9Zc7k znuK+ZLp$fK!mD8mXnS8xC_b%;w$31VJ3PtzX#(D_8bQ5AXF#=rK0l&n$ox1EwKOD_ z`1=vU$f;Q{GGjV;E|juWS_ElJ9HLI_~gVaQkmWzqskh2geWmL zCU|4Uz24mDsfb?ZN5huQE#%*FQR<%<;n~Xf!s!iX>E!)?Wb=6+o>z0HM*}-!ai4m4 za4dwzbvw`1=jF6~@egu+)tRF`%Bc8gJ8{JP0&+%Ppt%dem{iJ8hRkXKNrE}vwMJ?&vQ@U{@y~v*qp|ebx?n=I! zJBNALwG4dPG7H;cUqExFA^p=^4@xs6_lHI)y>W};frIyQ^7=zC(m#z8{zuVyKlJo} zaXd*WCCLb-L4_76)%%_!ibQrPQOSrzvO+2}C0Zg4y~oEYDDNzDoVW>lmyO}DigVm=a0Vy**@?qu zbcO4sBhe$L6RBEDzU&_-VXU0t)D%B#`S}XY8*f7MWnJ;(o}L&MvlEJ?jNlaYOr9%w zYGoH@fwB0Bu1Wo3on;l&!?GHmO#TRYg_hW1r2&>4`3a_B`^Y7|j9?(aDeqQ$C*+UXJYszdq=lqgZ?zE=E*|X5p;FYka z#SOHie*a(nP1LP@6Ak+l54D41ILmz(C`D+4d*2{9y6^_=Hkp9R<=usc%>gpAXcDq2 zkX&N}>Dh&=vbineL3rX0I)PJo!$f~n9_xV9>W2%ScG5Ya`9U}mm_#d-JFu6@D&Acc z&9+5XCBDQ0A^3QmFn65~ZK)Y5IEJ1U{#+i%SMDW(-iJ6ku&5JXejG~0QaA7LxhRZq zn+`)$!r88&Sy+=L2%;V&``Sr>3+*GaquSeOJQ-~_Z8IIUlaS?y-j7+r6fC62Oj+o zQPeKE@NmBuj+MSk>sP4q5M6bA+slfFjv0trbNk|%#m4CKdmfprjUn&Ha_ZDH30J?n zD?YtyOFmp88ochvp{2*rt;qprzDR{0TMw{p&pNnz`Yd`Y-;j6u)qymEvME$M8lz*n zqd~Wu@aIdEkaKJR+C@}|vrRT2DfQ2au)N)%$5IlAP!`O3aqJX zA|7fV2>*)j!trcZE;c<%9p|@+EfQDZuxuD^u+&9=B_}a7U7+kDZCo}=Mdl=RY_3K5 zl4+Zo&~YE?0R%4Izoy`|KMOhR!(RTV@*gGL3n24{M(7kg z1h2J(qI=LY;h5(*&Kr1z=6&*l)=`F}cix!urW=Ue7fQSABbzBK{ufw39ZU6>`w8Nz z!FVd`2W|5b>F2b;h^tTYlV3UH-3a2Xg5{9&Ia$21X94aT1vGX<2O2l>EYI3G0_#5u z*!gq-J?wjj!de{RR`5z}s8s-4V|ED7G)1kUk@P{@nRq5Ez=0J<`SQKnl$~nEo9Er9 z#s*XVlv+T&Q*wCBA0rIuf0>7;ZRbbvpQyS`6*riC6HKE&(7mE^$lT)1olPs~ns|_( zycR?w&FR?EQwQ7cP3DW)NffS~NG&fku-nB3SgQU(KILK?oz~5QD>sJV*+(aNQI*8T zJ93bJ-Cat>^NjdWm^0K4H375JouRWZ8v|{Qu|eukbaXb7l`EYPW_L8iVa- zYwKn@^8GeAwcf)tXHnwBbl{ls{z4zAFK&VAAb|&9{hB&(lAgPvPo(=`s}A;w{wi8* znCsx7IFZuax510~yDKcOs_~bcK@@n-n#+V3GJql@)rZg)7J+i zxGL=wv%U-i9qV(D6s*90`JGV5^#c`OOaUeRZL*+sUUY2wSlSb*#UtOCqfbl@h3>LO zdw)loIp`Jtn${PObXOIRk5?w+rfo3tTaS_dB9$w{4;RTUUee=te+>DirxK=K8-lGd zpJ`>`U|fFCS9Eoc;wd+ze(bki*e&-Pgna%0u{&3DjOq_zd~Z|iT@!^yexlIzVLcCe zo4~2FH(`x>J%uKY#n>K-TRLEH87!2{&HJCr^x|$P1Ivv!tiA`#(;D&TOE8Qxy21 z$vq)I(h?WU-zzkCt0Y-)019h+N{pym!i-d1sQKQT$4oo}WB#6l%ZE<&@dO6_B(f8r@WxXM zafkl`o_?&BeIqORw_+S)X{4ZUn8-^XX4B3IPF&7Wc;bsLB$>;k^F9yk{RhxH$s_b- z>uese*+pn`Ge#H725|{$@dg!BvYTMTLtU>xY-=xEy3(57b=iY)SxcG!IOD>7+bFP~ znUqVHiQlUuAhG9G`ZshwxQ87FgGc)K<4r$Wc()w(PFW7!f16QzJ;Q;TEp%r@yTkjP zQqN#`G%wUWKnr_^OEZ8AkU3^I)>}rQtcxvY6r6*3-NwteJp2f;fqQvI{VLqOd?>b$ zpGY=S!r|OuEs5D;L)X@?;E_kYIraV+PO+JfBhy#$tEWGxV1X7}U8{ijk=KNH?Q(wD zL0#-!Z7$CK&sD+#^%PfIJzE{%mn>D6W!nQy1%g!PW*hn5Z!TT7Dd%5hI2ObISh-TdrgY>TRclCw;Z> z!h{|4azh9n?06g6S=wEUm_kw0Ezst7E99@ANAdNBte|6pRS_QaU!@EGa6gF7vcWj0 zr5BI#^}tSw=S73Bnmpt9C+g+j1nQ}aPemO$OZTxtwj!zTuA$^z zzMS0<32EbUu;%1ja_m=D@%->N$TBzuX#x5(xIycpPxP~WCOp_)M!L@>hEC1^n7PT2?_P_Cp7EpDed=&{D%-Rv~W&_uy2$TGzKLDJlKTSMk%3|zcNleIDqv_q>Sg< z5@BBBFUpj-C4G(^7E`8spwj*<>Aw7%yzlFP^%^J4ZyO4eep=E@Srx4fi$tBE9*8;} z`N6(tbnH$jCwz&5DV^uzh4s5}on-~q+Qjfp*X821k9n|5KO20H9N>LrgZV~-I?Jc` z1OMjs?%vyCLu(3Y20P30GJ-knt2+-+1?QNYQS1szmNvBPytNDIz8a5TSZXJvF9S>m#r)R=>g|5{9PY3F^ z;uPq)nv?V83gI9MH0_if#$G8EHx3?+`}alS`5nmnXPxHfm;Muz>(Agvg9v;wWhq4k z)j-IZL+BKwg1;sQvAO*KHg9o7?})qN_$Yf}?d^+TVQa=$+ruh;YR<)0=YecldzEv} zu40{!XXz%|qOpPnz8P%}7d&plw={2P*BFTBZkmz*oJGR*uq9l-{4VME8KG*^4&3)g z29GzT${Mm;XnsjJF79e9ZX33rvhLY}LwTI&9r9NwYFHu^s#M{}MlJNN9l|l;l1pH! zG4?mUODl4;c=fc4lwQ9HFa7J!D&4l>p1GlU52bn@#yp=o?tO(n^>+)Stfv zyc5Glq(b}WOYrw`5tbVt!iQ~J@y>N0>AY(dN~OEk*|X&&9@J&G-KOyDOenFv0)M@e z>~Nym31hz4(HQGKm_A0v9i7y$w9!@SsYS!B#60p>8-|_Dwuv3ncT??_R6grh499L2 z(VVRsIM#hSdc@Di7r&B)3Dwi7!EYbUuh$&8s+T`+jord8H6=cw-$oo}q|L^o-qHGk ze5X{T4CYCtiCrvDQ)LK?^}<}U0hRuhVx zn*`md5<68ef>Qky_GsBEW%axA{9+9blNdWz2kmIf#z5%5s9sRIoJ+?{w*X}h!t{NE zq24@9T)sAm!!iU8T6=`mr8#SlHG2GP?+>cA8V#-Sm14L2LI^kRPgh*ivD?=+;n1pF zNWNvu{u7sws>d1}^<@hAOCGz2Dz|7?LA>xWKUrd$T9DGoN_g;VHfx4W#y{s5L3yqQ zgsGK4lyeC8v<*cNFh`u!r)b{rKBJKfH5qf%Gl0hlb4EFOJz%NB4Xa zDJ4w-yZ-9JZ}TkKwxgXG_|Z~+#mfYR{yo5G%uh-k)F>L+9zaiFK3utO!0S#fX1gj= z+}Ngs`;9YbYkV<7NgjVtH^OgUqp4!&0~n**DDD^+L_ab^ILT9khb%Xw)|P`5uTds^ z6HD>Ly4_e{UQbW^sen$E)G6G%jFm!lAx++u{9gvqr?HXP`dkzrnk;39MPdBx?E+3U zEur;CcT>UixxC@90gF=C$v?FQ*IR01|7$kb+xxox{m=U-srwQbQuP74ek>GqAM~S! zFg5&nW)1cp*cZRsETr{okBFvuI_MTv0NZa#?v8lHO1alg9<=HL^u2vWrc@@W9oKFa z8f!GT^43i5oc)a+PHdGK&78&R+j_F`+8sQ&NuBf5Pr(eJ<6P0-eyIh&e zJAO-EU|BhL*rh1;)R1}}XNHMmi?@p%)=Bq?#L+PPmw;Z+vaosdVe&t-124UsCAv%y zVPerg8ee%ImJQjCDn8j@ChQWEULK}d6^5V~myD(h&eFPx^C(T4@vMnf6o&hEz_Sl7 z(xb2s+)_P(Q@zx2v7f)xdtb<=tGi%KWGm^58mt(bK|bGSW0gt0cz%62M}$;B#%yJd zE;hn@L0hRLRhvKl?u1EYo}|3xGQ2rG7&q6{l45bD9Bz7Yd9f}UkDQIQS5CpAQa#Qa z5<^M@)v+jT8!Xd(2@AW=5oOL7pvi7FK73UN2R|jk;trNry7iLWJfbg$_(*5_?Hdqw z`wI>2aG1*SlZCFIw!^!YAN1m;KE_S&A^8W7(FG%I`t$H5@u!JomAO^;yP2xIQmRINbd&Ty)um<2x1!tG{YNX6$Vte!Vi^xIL67 z&l?S{BaX{nocJtEkBz6_L%Xng<4vjxFz38ja}Kn05GJ&VVEyiubjBN^=(Ad$*XJEX z^irmfwX^tk4{LI7_GAm4R`KDj<1l+lt{_ORt++Tx2+;gM=Z93_h>YFBx1;S8?xBo1 zw>?l+8IQj9*WqZxd>CBtoFY9nCHGe_8cRK?l&}&SH%fZmZ-w%w|Gd}(CgaN@OV&$F zVb|pQlu|bwHfb2Z`7?8P<2wo|74FMtEZ!lyj(P#HXY2&05H&`{mGTWP z9<;emm#1AjN_QNmK)}nl1yYtg^6fatx>b7YbKjy{B(y+vMs~&awOCKui|)!Lilqf_uq-^dY^L zFH$Zj&sD{cRsF^B5#MN(^$(~@Jx|xR1hK*XTyoTv{@%6KoV~iYIC#$-oZ~i7Op`dv z!Bg{KZmy1K=A?`sJ}x|E>m#tORzmA>?!4?kESD@&gzsA=8Zu zlf(JsSSi!BA`NxEpQgz}BU$^|TeuEsII1C+F6~)@FUKfRT`JL?j{D(Z_=$@46KlEg zZ#JD+l*-GSr{ICldaR#$7EVfC)EbFFpt4cw36;c(XMZSg@%p{k&|ruO{`JtX`ydP- z{+XP09zlRjH&$u3g>=JI`NP(t% zaFUe22cyRD6Vx=eGxVrvpa)wlc~+hmj{lR&wL2DxF_9-RyJWo>5+?0dMKF8%aYVRcnFK!j6wrHj3MW+ytRfzUl|yIogMqHRx-AVa&JUmicY~;2@f?4c8OKfA zjo5eTX(6K73J)!xB{6!&L*Iwp(4@u>ceu=gS1X%osCvCTFX{k!J@J4!ff+*icyB(f zWY^G}wUoXeIT#=K#tD-GA}OecpVO1mm|%UGd98V+ASE~ zepZ}3O=2CEt4PfJc+_8#K=Zpll)M?%?7qH{+S>+k%fA79;r$r+cFY77Pk^wr(;2A` zph8x5z+2)H>3CTun7w)o9@%n&GiL0Bf4A14>E9GqaLOjrc2|6FIDwx(d`ROZ@9Qrs zbylCG!s+kT*`gr@`i|YZ(N~0s4pw}*V?FPBA?@jenRMr5 z6+EBWk;!jm|$#Mu!4ozde|r)})(A1$`e z@5vF>iz>DbD@B_-mxX`^4K#9C1SXo&9md`f^p9Vpu-tsuejVuaM#ajGQHEU8(3hKI zB+o-rCiHkW1#7P-Lu+UsEHcf5msidTmTu>SiS6U*;(%y~balp@t08psLEnn@c4I6J zEEjAeRU|HBCkP9Ez+Y}`5egD^;2F1r@I;+uf0p&et7|$@)!dEzPbZtFE`3S*jWc1T z{Yic<&0~yW57J}785N6uQ%b@yPJK0zj?|lDL4F@Dg8pb8yr06eEICzPj>=1)hz|F= zfYP_qv_DgogSz#?y5~M%T6Icyz)*OJauL&T)Tfx)56MIN`2nT~H@G@@+E5)YKqv6TYyL&PI zz26Q=vy8B(RA`u3K3}NOmzWZ2v7$lyMPbZIE&T44#tPlSu<~IIrFZh@t1C{Dk5V3_ zR6oZ_r@p}2^rd`W%9UK0`v>a$z7pq^(H-yK(0p?Vf%MLJ`C$d!J@QbP{u$_KaFw|G z!+JsI?<5?wY&kAHI3F`74P@J^tGUR@0*>uZpjo$KLCn1&E{%%hu4DbNF|QNdaM=l? zB2p=!%7ZRx$KtmTNtN4j9WKB51V>L@ppmD$@%=H?)HWrL3>$T6!R!QKWA$6%j!&g5 z+C7-&&zHeZs}a0+)doJ=kOW7`c^~&7@ z(|*s#{*4~8r!}*AYL*qmmyPCV^D>!Yrz8ri7)ZbSg-Q&~zL@Yr5f83B2+Jg=)aF6< zJZzjVaEPdKSgQMpHpJD@33Xen@qI`} zy_LkEj{_;)*@~@RC5uKr@%*x40BN~}!;RQXDmmqk_Xi}SYgHu8=;nkQZl%h5^?U9iq|0H1m4z_Zrxq__Q>XvV{ZGyKs4j7)V-j8vIlI1?Zd-W9{dsVF@^HM= z%K%0!UW=Fa{Rf9ieaS$tjF$ELh(%iuvR+;)^%rOGrVA1S=lngmuBeLRjNLe`?_qS3 z?l7u^M{lz)KYUCwqR#|)qEQY7doPwsvfv*OcBCwfiOwhAz9mhgpI>LQtsT% zg2JG#IL7=u?U_HErZ(-x0p}V>U3CJiwpPbs_A_B@wG*fBaD#YhhTvzrpTO`h)MOap z1&Qa^b4Lkko6e{5TOVLfL_OWjUPmm!+>m`v~UR&zBTf$riKo@97M^vXU20Y7fTM8!-RZ5&9yvyA9|pmZ)+ z11|HGGLKIacxzZ3_t>$7KW1v;@-Sn3ajZLeIy!Pj$ZMRnZ4h=51>EGGiWm2H7R?eS z^NVl`PX5^g#yUjMzt;*JRlZZgNiE=wULlyDxgR#w+Tb&Yfm$^!qJuhMWPAk5eMbtD zmgaHQz2EftLAfk3a|z!McNM0*)nlV|q3~gnl*!i81FO)-wEmYDIv&_A|L@r%w0+qC zrgt=j$&W@*!-7eCYQ4EQOVx@4qek-g{j2ck_rX{>pgWH9+JX;v=Tdo`5)bKmwaE*e;??Nh$a?6XV~p2NR^a(t)lgU6K=+D!ve$`QGJ|Mi%In$foUo) zMV*w@Li`9{KBL(z8lTE%pC?*u^t@DFAAE}}pGjU)KUZ4weG%OpCwV6JCEA zi{B#8&}?&E-eaE5KXPQOk(Y)}Z#Uxcg*Wj~k1~iT9nNF zXLBi-v=-34_|3wzw-T?!dnAa%Jka;%bxPZiLzS)qdq)gqmmEvMJ*EKXt=<4-J{95? z1*ia>T4C>z4Y7B||$x0tKYy4{u)+B=K(ztZ_z`~-=!tt}gOcq$i1 zza-nA`P5u@hkk|6!lea@TpyZ>kAoeACr@p`dw6F^{=FOR=Uk*&ybXSuKcXuwqp)@V z5X_8vCpcRs@sp^tRPmt)_wE+OE2Qw$vN0pr{M}*bw%|NH@m&U4Z8u=r%ry|IDC0f> zt+1loPSh9n(28y8Y#0|Ju2fL_>e9%#U&HfZkCiN>nje>chM+@xi}?74h7#6X;eG!C9A;a9hb- zKKxCUS5C>6U-j(_=lXTwKkw5}W!+XO!VLG7JQ79Y!4Ai7N zW7zq(ayMxnHRF4muv+RtX}Rnb@{~rywZEItrX>fry*?uNj`>chg+UlH)JNDS&1f&0 zW{CZDC*%7;5_jxUn0Q=EV)XUU=I3KlC@@GJKaZ8zj9=E^j|EpjZ+#MetLjD?OA{&8 z*q4s0ex`i03vkZH8TM=GqE2@c^efB7?;Y}Jc)>7P-KkN0B%rsDvDcF}MO)C*8P#wx zsv6uE=8)PsEA(6ZgvNkA&2p;|_a)|odzZIh@>rc)yKR6kZ!010^<@Fx^+L0-!?f+z zSr~94g1V~P!qH9h@a~Ic=-Ib3zL@ut{N6lcZJWpBu>P%lVMYf~Tt1ao9R4kcH;;ow zKU2QHbTQc_w+X+^`t!mynW&RG02l990gE7suRcA8I$G-SC9QCD{LdC=TiQbQtxfE< zVF5eL+(aptI??s6mGJAG4zyi9i;-WJgL?ZTGFyBUa#tVVNhkKR+KYRnq8hq!ODw@;CjiQH$<3{#99dFi$eY*;dY_m{}zh8>OI%|b&SV!8(3&r%q< z&$R>Zdf!nv87XxiX4KKs14Yz0a~&y0XyWz?e?flG2ZtRptoUW~SulL>3*zMsv}D*a z;q~#ev_WEBl%}76mK;;qyjDY+B?ZH=hAlkm#UEJmx-UjK>9fV}JkWwp+_6ZPA9(DR z*{MAfYkj2r=7O$#|N1yS(WOYXR_T*?Zj&R%tW)F5`v$Y+m29?{bcWQQDWlGKbAIvG zo%i+26z30IO1AGaaaYt)aq|>wNILr;9bfqp?5A{>_Rt0#qcHwwrL--WA-jdOVtpkTOWkGs)R9Q}oSHrl&_c z2!?ADXyuMpN_pKK9$uJB*RR^MNAX%vJTysERmj3qYq~>V`(D|O&(WZ)mxvaH#!#hk z64sAfE?A-;w{~5N`%avMfTi6iK0;vU^*>~l_IF6fy%Sd`lW zP!f>>hBE?5>-kaIdO4AInxt}O+cIf>dkSV(kLLEhmZ)9!6dJDUfpOkkXj-ZQ-e-Pc z(WLbpJm;q{w4(-mSWqnv9_9>N%-6%jzDZ>9tArf3tfvdE$6&(gYvRpRTRQx2qcC_; z7yfJ?17j0E(8@XHJSito2!AVMi|j6-F>5fZ#dqWolSZ+5=;w;U=J`0Tx0JIk?Spsr z9*4BmX;j^+h+101c(MN$>Jjmsj(wVgUV4#~(#Hoz>maG5C!==7Q|X;~NVZhU#oG>^ z#NpdRSgtyeIt)q%`^(C3+3`PGU-c21GzJK-x+I8ZA^T}*ezR=Bt8>tA)pardfVMcT zQ!318OM+tx2gSWBiaC1HN4mdl3ydG(!&Oqp?uYSG$lacSRm~T0X!#&+ZTKkLoj#G@ zJ()|+nst=?I2rrRP33@|sraA5WXxQqg)4qMly&x#xK3+&3J0I*@wx2bdKfmw1pSu}!ke~(;jK+3I&UwbKmF%nLBVxd zxLzy8&mD?4&(6j!ze3O`<1yY-lln{ZwnOzv8(#iTazmzFkyqOU)9z~NOthT?MKv#l zExsSAdd4YSJ@2X5Y+p$|Er#*mC@-A&LkVwPPsN<;9m&058yTt@vGRnzyg2VZee4=1 zn-F$F*fXaH;t!?aw9@^sve}G}e2o^?=}Q^;DOS?mYZ!Yswb9g(c6`$0|MRI=6h2}e zbz0nuO8!lwug;Ts`oA}@I8dwgM z4@tfB)ultk!l}ym;!YCXe6<48XI++0ED3^(#;VlzYODB6>`I?H=8CVaE%>3kxr0T~ zQd*aDjNYH>&+B0bop{s<&+NS_s%<&KU%j0~h4*!QU3d!$`Qsr>ZxJo;-4CPO28{eP zRvE_X8^ev1EY?oam7A%!!8X0gw0GrCSgQ0=>}B?XPI@XptwoM>oeam0-%`c;>lZ{% zbsNx!YPjv=h8j)w7}uvOzH-qN>TR>x>dG0>=IAi!(It^y@99M!#7H{r8cnm@C6w8r z8W?7fPWS5q;QUl8iZy;D=G7#Fayh~N*Gf1=nhl;?@5!}a`$!o0seDbgfejv>pc88@ zi@~zd^g(wfnm-;(oqj;rnHq*;oA*=Mz)V;Y7D0+PjSyl-L-K)yK)Vu`hgoEvLx3BzU5` zj_34O;TGL%)b-*u^15%zEwRUG%+-0+(MXvFnb}b7f=%%9MG>B~sl)%0bntt7x)?g5 z39j@o#$>Zx9M+ae{njLq+;2TEXbk4urvZE+@E8|M{?Cq62a{RKYhlZMMeL~`E8ipk zA#}L$Lb!Lolok~0^Y(<5yt_h$qkP^`V16Rc0?A?gpEG?-{tj>Uvlw~I4g*3zo(~rC4bq%oiaa(^E%oIA^)mLY3X6=l&poz4Ymmj8V8CO{?0_1>I;an zn9l31mf@m9x!{$d$XB0r<)CE+$klV%xh|8!CIn%P=N3$q`mhx*{!!Jjt3We7X|q!T z%_}OQo9|aabD%QY705Uw*d9lCtd!~ZyC5DPCG~wKgz(3e+2kHR1s*IaB#oC2{C1x{ z8xD%4d(YkBoJ$Iv88VhC>$h>X)bE{dcMQJWDB#Lvt}K)KLn@uM;DGiI(la%oWwl}G z`zwZaN$9i+6Ze?~TDgOr*LbwDkVpj;{<5*IK5^nx55wZ_jAd zxw>6)%q? zpMozA=}=Cmy-#9T?E?%rR1Y#AADZRZOPueV!>YyMyv0XZn5z6x98>jAIJ|KVW#M<~ zZ03vU`%Q2~4@V4kcV_Qj8C-Q-%Dz5R1>1fz#iScrw9a%7OEp*ap6`e?XLYfEsT{yx zgs{)+;YqGNr%X$y{_<~hMJrhRW@{p!vT`Qrd(Ra54L%F~GdICu>HB%Yt_|RmW{ts< zeQ0@v0Z$ld%@r<_r8~kxRI;lTCtKd84(B8XW_&KhXQhEgaV*_RkzC}L8p$I5Fkf3f z411X?@-g=&;qV|A+`Z=n4qKtfa}!QM`eIM+FL|VEJN%&=p*wlR>90aat_E%!(*&w_ zMX|~+kZ&ewlf1JlryjEr^s+UvyVXNTIukB*PPF2I{~l9=i=XV!5`FG(e?q=+eNWa@ zVTc z8+$RroKHq6(hjqyP~M!6T|)P;&I2cGgvCgoH}i<_IPSSpgG*+P65iZ*rl=ct=!a)M z21O^3IqZe|-{N7}>S(A++DV}?7Wk^IjglIpAy0P?|H>Ay@!VdXb2SFv&F+GIKH8$< zMLP&{@`j;{eOUew=vjI*&3BuM3AzJ0>YpZu?Aal7muvF-f_lMgz!AYdwF2HYPk;xc zhQ%s9#reDXaF-3GB)(7-M)&QA!}9k*-NjA3xiAtZoPI*?ZC(-=Zk2FgnI9S2H*u@} zR<>~bPK$kvI3~J`J#I`OZr{Tmt{HUP>kVlhdnBr9JcUt9B=TRsZ2ssk2pjd^!x(R8 zocAIFCRm*%UB_OMyJ8#sS1P&4&h5e#TW`rvwS>z3N(XXK=PnoxNb^s^cT;-kK%ax6Vgfwe%?5*>oK8MM-`}u9nBlo&%%@Yv`{m2 zE8F;9mw2FOsq?~-&{Q5IM(Yj|`Z%P)ft#g*Zh$AqCHG-w$y!pA-h(|%lrU{pZ~8bp z6pWz`p1m+7-=Wd4B%?RjeY(m+{iOWFW$9TydYf=wHL7ZM;(rz?d~59iir`HoZq3It z{#!VBVxipNd;#7aN7TCPCsa({A#d&*&0F&>@sYYuGV9Hs>DHBWh9Z8G2#?8e`SB960)7ENe#Iskqq?MhAMQ!%675#X)Smgc*2#Q8hrLyhPbV#Bi!hgDl99vV1sIXHof;2 z4_-*&>E5g5>uztxv6_{XoArtl5>C>eRCjQ+lb*SYD(pVGGjFojLVd+tS|&RL=1GsC z^w4a~^;y6jt0wZ{!UhT`_(BHn`tTutM{drYh{lHAqMCslX5KwOq2t!zer-9q-EX8x z!H(GMG>TKU4x}aOPoVylS-X4`q^AP>Ik|3gl;b9qg33vHdbRv2{m z9#nrnO4jOXJVv7o&i>xQhua=d3tpykxnJN_aVEamlrD7ady;wv-V!1B5I#ytpx1o@ zdEDrU;^wmls8ITbe{T1ZeU=O(_gU`vYvyVBr*TyjtB{KRJGErydzW%utT9JUmP^@; zLAXzKFfMx<#ffi*V7F#-REyQ7p}#C?=ZFD3udtXt^?w6%3M5bK{r_lclm_>zsHEv} zATc}tgM*c7yhp>4zphb4+bc)0qwQWQ>OGumA9fZYDi87%dm5a^;J&taAA( z47QJkO?@or-Z)G8+*!&n#twkWrcUr0lOXwACoU|$D|$-3c&#oSIdPsIKYe{x4EStI zL(iJw0-sI1sMiI$?~{xH+hl09cp@10lDzT~V*?r=@}xCqIRE!r@+wni1MhHHG(i=6 zWE_C!S7-3;>6P@jPpaTMVioS%Ie?Wkd-M746VNqJ%Ao4Iv!mNj@q=FsC2qIHH=S4T zyn#co`=8AaD7hSlN;nLimM&~{CtsNT^OCUV_Fj%q9YVqRhWO5KvFujpecpB~kg~s> zM3=Ff#4hWSdGxcd^uc>8UOT5vUvJAfz^q=l9$|^9YBl0?=^Q+~zB4P_z787;6Cvbk zF6>nYs@-LT>D%I{b7&J8ubYj!Ph)6OWeR6X8R@c$Dysi7k%laOFPxQju)#CBa`B-& zw!9}NuP?b2HaZUTEPCRuukwS@j) z7;&23nFP~=#9GoXNnp>M1su?PMl|bQ0OQ1m^lVZ;u8!7#Ptza4qSu8&y!}2*hzn(} z&Jts0stA!IB0(tKiFNj$V8csKnK&v5w&drKhK>?Ac*ntkTi*1xX91OI_=#VdvxVE~ zdf+4NwN9)u6(;mM1sNLxvuNI)ITzk`2^Al0%%@6i$pJiZ zs@S$s4R5EU^Ovj%Tvd>UTWzlK!s1h`jv72tIgVR*)qv5QVGuF5wPF`$z?$aQ@P5++ zYHeqz>J&#RT@(bL(pTX8VIGXGT?8-x2|Q6K5-!y3qw2pNoOk|^d`WXJ9N^=~k9GeO zD-&!{v{Z(Bmn9xK8^GvtU*zsS|51;m;};h6ju9iHtC9qK=kPrE;T3kro~@e3G~ z-4Tq}odKH%!Te`*BYfy3})?u+{kR`vX zegKzdZQySUPm_IA~+&U;EO5YaR{vW9*ZU8Sos*U~{UD&i~ zC=I={U-AdKWK&y9qw33&CM4_ljwT#@OfA3>FKtQ7Oq2I+dlv%_;kY{i^1y z?O{{#cWZb48F-bRC*Oc4dh_}D;LX%=-AFJy=*w{dbJ+F!blEM(8xFVBmcrCw4($6x z9ScV*P<+)Se&8I63(jfce(Ptn|5sOznm3W{Yg5?yxDJp1YJh{JEa{51<{Y9380wKu zQ#KY*h)XdmhE$4PzBXh!$Bx?)iTsE)A^f|%wT3ZS88v;esAI7k6001MWF*Ha^9*qd9wB)-ZJVF?HVzkSE#zlEQcFO zoIQOsi#5cObzNv*)Eb&`;FhrcX&5&jQo#?EGB!%;O)LM7rBgR0KKoI3T9m#BZ(nr7 zb((P$@?i$-HuMD3Zuy`|sjGrHCi147ms6!x=xc(F06Kidqqe&ut@uao?A z`Ui45aX^+5a!LsJv65bHs}nyoY^M50FGzk<`hTxHM$e*S<%Rc@!EZ!wdM$BvPnHki zK3%e@;o=~$yio=RHJ8Y9Hd|st$bFf@(eL!~nn-o}d&m((cuCPPeizh6R`-%hU z-0vk8)>XhtC1XstNf5;s1L4otUog8-fj!z!P*q$Nsmd&%m$;LT-uWgRzBB`7Dpbkc z`uD)hK^H*7L7j4@v;w4u(EQB;9W~ua&gcPaW*gbuIB3t-1Kn_-??ue*c1IZFr73il zy0Q_~lG!{Wh&FdTL|3W@uvy0*JoiEX_xl&fRlA>%zQ<$?IW!2pe-45{BewD!(|A0u zVgVZi?$L%aKRWiyLgFcWC%ea~g8h#_l-B&c;$woHI8XB2VB-%c>JlW5-Rgp6l|^vV z?1udQfof3Lz7IT=9?-6s%|hzOTKI4#4g$Wv7e*`}fzv`fDYNVl{C=_zoHiP65@tp{wUelvQa_Kt0`zh{nsMd47|5_?+Wxh*8c zZUgwH#afGa9o+%&xxnilGD<)aOFSM{B=10yQ< z?GAs}G-6f%Ph#i+O|+hHoVt8_1*L*6Pr_=#J1KA=)6dBdR_0s7RB-GSXICym*%q8G$qdMY=X;H55SgS z1uVA9V)qJy?fwTPr-lLd{2xW<9ar=J#qlI1l{S%zN=VY6bU)`5DiK90D}~IE%8Eim zQ%VXIWrWBk8uxQfWPZz@B^lWxB4uR#KEHqa>-M-|BdLr|Cc^-|gYZ zx*Tq)@BsYL6~2r!%DA|cwyi^a_rw1?Ytbw`nC_=QGP49zw$=)m@J%CaTKPx zFXk3aH!k@WClA=X6C(7^fNRMSQU6vd`IjH$2kM*o_h2hb)U}0~=Q9NrL623sOTJgh zH}J35ZlS1KAHH$7nlsWXpvS%4&`I-H`QrNJ{B6xZ*_i2u{M6S3|BWe!Z%^Bx?YRs( zNxAo?KH0Ef<8`XK8BNBmj@)IcBNvB`PJTEq97%M(S@*6j6Q{h#fis-aBIX|b@t@sJofnmNA9UKSGi z<7v-wNN~6XsiRKf!fUH&)e(2HJJcQT4`_$h#6xTm+Jiz33~|WH7FyZon3&LeyLjut z4my2P7ebysgSw$7$zb>wL0IuZ=w7`YJ2fle&3s$=grgboZrfK`7)Z{xKQ9HJ4hmd2 zc`9BU3;Zx_ISo<0i@nue3Cl7y*>ceWcpREbZ{MFlqm4_rhsQt~9azJ&UaW;j68Exy z%O25X7m$K@lN2d6lHbXyqqII{w~|Gq)$N;+cH6fIPmE_u#WoVb6= zM$u=N5_%t*$zembfivj~L(KL;R-zG)4eWuB{PN)Occ~*YCxIV+cIMMge_)ex8F=>5 zzJf~nUOe(uY^Os6otcl}s zyWK@vzVsh#^++M*<-xFP?owLpa1dC`TBv5qNR;-!SB;N3UL~|#qVDFA97`?)l0)~E+6-AsC>a6#O z)>%qmyL|^th}cI#fAXCs7As=S=BZ>K63uIS>S0&Q5rRpiAD;@Zr1^VKP|d}!aOUL$ zYCMLhbFde!TlNzwYgSOL!Ada5^X5&fozP^#Ug6N_YxHHS1Lf3az?Z}O;LoFHVp-HA zDr@@#+d6Kffzd_uTW0|3E5!1vrTbWWswI~_IxTj5(GD$sm&6AP{m~~;+Ib4YS>w}Y zzB|DYJ2Zc!-!ETN=9Ds0j%}sQK1Mie)i_esjsuVTGB9h-!Y-FFjc z4$0|CwJ*%4QDdL_-m8bL2aEm#)BD zcX_g##X|h&x04o`cjutf8L09s7Qbc2kaMqJa6Y$)*5q7CFPUTNuVb(( zRgVwtJOxAD&w{spn&_Dn0x$n|&t%-XJ6tT9sm`~b%AtMM0V-S+E8JXSi+ef^0P~tfy#G-$W#0HmQ}kQm z+@*Z}O8W)dx%ooUVPg)9K1bU1=b?P@A=>_LHUE2cvHVH!Cfs=8EZX<$k8xLg(Q(>F z{&jOGof8>ZFGDS{`IGZw@^u3Sqwvet2y%j_reA+(NlFtN zHUK3b|rWkK+pEl^zniQQsf45+mq9 zTmm#%m5WCG4aJgC#T0p~MNEx%z*7z`sIYkx9sMe~A3_#@=jD-L`k{>`U;YExI)`BG z+(A$L0^#E~&+l(5>GN@I3H z=*VMm^V(e8D)p_xz9?f;OeT->%M-;}gE{J49DRJ5DgK!z^&P8U2nD`wc(0}d59(!r zPhP~scQ)8+hrl6m0gIhx(sn zu<@}oI4xL*;XP}F%wv@_;Pwq@UO$3$+9lsxhz0hF*20JHEzz#qUHRnmvw522IP3b7 zWt(G?xvP{VHa!$7_$eHQh01yq+rfoC+cZ$NxLTOjo>&?_`4B8CjpW%QtXadrS5`4q zgI{QO!w3Ceu8Mzg{k^J97+eALT1Vc6>&~Uz?$}qXh@dHD{f{ws_;Z5-VI72A^y{ zbA4eEZ!glOi^tw_#6QV>{`57>>A3=3KKH<}gC1c(jW+%pdshf(pTW5YN=WZ$GwrkL z%7M?PafaUi`DkbOz@eM){qO_ADGIEA)rEO{16}w&jlO<6DF*(t#$>%-Sahsd*xgp? z^rxj8KKi(xd$j74?Z|%IT+>X~Z0e!z;ZHhi8i)GoR-C=Fqp(=xJ6xNbj6rc0ocHt$ zgx#9WpL13TZ=N_&;+k&wq;(t3k8;CxhxPGPr#K23O2X63Qu?cDh#H;tl43OQW@yCmv8xDI zt=)w1ZxN_z@R1MS8HEqfqJ9GOTE!zJyowar!r}!@o!rF-!;GE(` zINozO$EDj~!B0b+GHM&eMb046<`^jzRt$S<;zwr7M)2=~cviCe3^g4R*kVjN9HFlC zb7Fz)%e{C^{Mt>-Oj<%0omyd@)Q8!6!vcH$+DeyY3*h2EH%?Xsj+;_U1q19^*sLNr zKAi>+ViIXyof2qO?7&HfJZJ%BfbUgre*Iw{dq%2brK;2kTs0iC=hjQS{ulJQifC=Sf*k$O-Xa)d`VDoID;=T#cEwU)`CeFZwVe9)PT`sV zIk9%V6=vBF!LHLR&^2`uFNqkycO?#Mi~e=$es(HYcCZ7dpm>;c(Ev5~IIv}i4%b?} z2KD0YqFTjvirH^RZ(Gw)?el%Qr&EnZl4G&ZSnA0RiiPVL0emnBNv$Q0bL(TpY>uGE z>q_Bz%VJ)1(u7ZU9gn7$Mp2uGIrLlF4)Z1+Vbc_Cn%O;;+u!h1zu2rt((ZvtnSaRD*n-0@ZKq?}#}TACWO30PE?t`e zjmpch`;JqPy*_|)etn{|*Ha+UJxcs`s0WD;mHBk7B6@ax4MV1<;rx~Y95)H6^r;{A zo3tG|jb1|T856npiOJMq#%NGdYy#8n*GZ-O6!v=Y79U;PFU^!vh3r+iWFKqBFB0r= z?$jn&f9$Jpcf@??v``=@yAOBWwAg4`0hyRh75jbI42w66;+hpE*sysvguQhGb&sJG zRWlYmLk+Q`tPFdOiK5KY~oS)wSdy1Rnn|14@yzn&KlN~^^=l;Bo5_tHwUBaEwQ*n*@ zBhvKACAW<}+~QxxJ^uxX?8ZF@9tZulX32?L<$(_7AYq6vE}*246LE_mXE30Lc0 z<3!;E2lsY|jbFaW6D5A*ycxUb@zek|-KoL;DR;S|&r-=%u^o3+4H7mjU&+f%r94fw z2i%%xDO>eLfxK;pVcv|-g6+&>w9u{(YL(kSRa1b}V`_krFM8AHwd%6sxOwFP9|VlF z8_0iS56Wksc4W&AvHYQ9SKd1;i2t~}me@&?@W9VUP(R})XgV*Y*1_G#@YYc}Joh@y z*f1F?f~ZzJpJ!3db*D$%0vPZ=&iSWb^g``4r`fd#2x)pL~*z zscMy^CHd991f8JOzCq~wDwlE+C9n4NI5sOK=$(5@IM;P6KM<^VQ{NDrDf7eMYmU-V zMH6n_H&xe?2e4}?XJu((B*{U;c zc#;FL#%r;o)W7(1<`h(4mSew+%?RB(bFGCRzn;642X1)JbN46kyM#(HrpOXi{NITe zFCJw7q#aaXs3qn{pTTeQo>hd7$mXM8w(u7dSIK?dNp|q84SIikA}X!CRWWEn7myj3 zz_2yN@+8Tt;+dSunfou2la{F%P$ap!-PKv#X#|&B1Ng%%i7lGvk5dE7!6CyK&qh{} z&E4-*yQWbXa=((UJ3y!xPtdYZ^7irYIIS~rxUSn;nA=-yjRgAIxgMB ztIYp`$0Y?mIrcRxk4+OyA60_N(4Vxfcs(3a+Ke}n#&Wm%Je<3U%BP)pEu>XR?E7x% zn4u^2Ar8;yBa@OavE~v~H989Oe_!C)+gw@wBBIf+IAK-yc&Jub4V%yIBg29-D5x%` z4b!(tWn(>_5t_^|lnjOZOU9h0vlyl*rh&VS5!jBL&VhUPlI|=OytCVY$6t$Q#n+|O zv{Le`8W+P)jdSqpsS}QTJq4G4Er5lVz4?4f9G*U-%WFm#!idBFiN5{S1rM8A!ECZ6 z+m%S2)_00fG++q%rhJE>_0Kus4?}vY^!;4wz!Qc($An+|FvnykXzk4-w_l$G9ZduW zMJ0T4Cy_=v8<4cNh7+#m#Z|*{@Xj3%IJBxp|V@m*m1j>+%?jb{=QbO{!kSk?o8)3 z>%YsgUmm4M-A%%j4M$0LaeweW?T_>OYw(>vy~O!P4#2>gU-H=!+fr9(eT6~ekU(M`x~1?{fHXYlEqSlYA^Bf z^AlKq;1yK-?t~9FJOIaYKJ;ATm0xu}h%u`_$;UsF(?4A=Zmhp5yjX3{&6T}*VxbO9 z`1yc7CN@G~s|!6puE9fA?3dlu?8hE0cC3DCpJ4ocB|bm7?<3q}J}jr7BNoY(p0+G;+l_ ziCIy*Mi8S%3`3Ps`>6S158muqCAd7);`9wsEco8f2pux#~7H}X2yT|yRlu~7~wdr#!|Iuu)R{-U`3&kt9&cQKX4HR=fh}@#V$93OP zRMR_XFxLYcTWzjQ^XBYM8{tL#NZe?#hRasokZ(WS71gwJKxNP{;rH)!%zx!T{X>57 zUe~^y@WTf~#(AT7-WP{o3gd1Gj^Mme9k0Jnr-0&v!iKp=_~>U3KI~zFJ3j8hEUgYO zC~Ox!Ix`*{X0PYn)}fHoKaA@-52Pg@y7Mw^9a^w52VYD{1D}thAaVCTF|2(Hsdv6C zE?gRdrWeNJyS&p>)ttb;hGaw9Z!f;HJpw-j6Adw%gDxj$pzX#*cuTz(#y$TE-Jjai z(5@0s;&K>Ng9WuFi|n5D5=P%&&bMu>+4|K;UNI>iZ98R(gS9j;Bzgz0kj}j8_RN6K z57eM8++Bzo)d9MfD&qmW6V%t|IQZMGCmWq1<$A{-vukuSe^@vVr<`%+r;QuA{c$rX z_6ZSB1SZQf%nbOjY%zPS(1cwF)G=K*hc~9#h?8o~u)cKdeZDW9Z?PwY+nb4~I%QT+{V8hwX@3Lz(@g=S*NK+8rDZLt+Xk zZh@jWV9a*1jv6U;?%pCM9l8&mb=s7aP)>*E9i&Kwup?5;|;(2&-Ul7*XuM~p2 zzJb=EhaqmK2lt#Cz;!R}d2M(&pItAxVGpXZZ=W#S{O=#AwdR9U*<8@^u%OVHEuwco z7UZr-l;w-J*{Q^vyS&ek)ox6|$?L;GE8_$-djm|5ZUn+o zTP@Py9_L`$^-WxGAqz8GuZv+FldyT=P`>?o7ktn&f`dDbiC1+UDAjWsy3LM<|5PqY zEkRA_duKYYulxn4qDtu4!!cB$zD#ncZik++W4Ur(xiCobcc^J3Vq2y)capLa%VWJ+ zcgr90{u0S|BbH%J>JjnO-Uzn2UB?p~mO#P3=R&XbpM+iyTFG{%7XF#M818p&7V}Sa zL!H-|bVqF){^^h|4(~URJ6~KxFZ!J)044EYxDS@c=n1ET9?{e9XHSgtM^> zjz#xov!G#kbBK)FV~<1SVMVrnZzJft-hrz844O1_y%==EgycGw{K0xVnq823a;eYZ zeoUcQ?olW%UX%>0-^~&WClBH6CNm*^_Y3JWnZtZtoz3qJq+4mkFAh1g-WhkUD_w~% z^958nC^><;IHJ|6j>tb_aQ!d^j4w+QlP*4owI*N1tEV*3%DgMJm<+;(y*EJP;VE%Q zRVp@3Jt*e)^nsh_kUO>a#x*9*s4C?RrZ3cEjWcDGGRcKre+~iV4n;U`PyzK`xKg^W ze*>?K92(>u4;}q_p|$cs{BU#4|UMpuMxQvVEp1wqu3Z{*aT4QEHagy2cn#Dw>A@YO_BsY~<) zj+@4Sox1e@$B*Nyl}m-cRUgFKz8QSUJD8tE)YC6LH~0`Zm=|AN!k2#-Q-ORV{{3Lj z*S%EHXjm4<@BS!ka!;b|RfoBMb!XZ2l>$#nN~faR%@tK@u{ivw21cGyW!Hj8{QEN- z9Wux8`oua2{Ll#;CwJ$7qA_Gry`y5<*;}$VyZd8uo-GWntr4DI$j6koZ4j3pSFx=8 zsra$lh^=nz7dnXFY3XK5eraSySH5YZYI9bFhRrJWU!;U8=^asVU!|}szMb;_Nbgzh z6mrXmku7!@C@~&X@nf+o2N(8-9#W?(8gcw^7+x=}|4*YKwru>T6*fTHrR}n;5$%Rx}<9ctPa} zoa~lDCCaO5q*b;|zO^^5l(-3xZL(?lz;w{J9ZThf@v@V3Ij~pqNYsBg@3bfRuCQuM zGCz2o2Y4WoWzRIRVOR*16pVz;VF8e%*#!Tcm=CmOgT#c0!*69TNH=bctb;WPwKV}! z=EFgzJ=u`cU5B&3;7%(SxM9=fFS2cFr>X3Yf^)%)pR(eKb^KwtDt^Dv4q;NJY^Z(% zT)dct^LQ4!N4|yl00q1>xf9yPf|MncX46@{cz9PIzVq!7)kZ%Pue!{q>#>Pw)_j{( zrm3=3Vh=FCWr1^EoARlNmi+nBAogG4j=l0#d7{fk^xgE8yrq2#S(WqT*d@3&Izs%V z=F4`64SB$+O|1Mykhf>e#hkt4z+|ZpbLw`Q6g&XCt!? zJbaqO6db$&{+O<3)2c)mHn#}Yz1PQULb#KT(J+jCC(Re@cJR)MKrATtw*%|abx+jo7KX7Ub15qC7Zs$0aGqKl?cMc|Mz7Px zGhL#EDpNVgPjtuckF0UR{4|IR8^+ZVOS3q~otvXOV8JUV>^2nHuSb7qx^@HxzZZo= z-F`yB-VnUu?8BXkwfTj5e|8ygo?d2j0{@+9SY%;=rzTybVa*RA$>pp#U3yL~ebG;-jdou?W4RGKsv5(oQhjcD zJCc+}b--u$GeP_8Y&bekV#fq@L2HK+@y@5sg7fnUl6N^)e51Y{(w^TCdr9{v?PGHC z4su}48Gh&=aS${Wmt%*Pxj5O>6@A2$p#EbeyvlXsonF@|wrRes_uC?;qdH|U!84R! z_p6c1q;t!^qapk?=L>i$e1v-%z1ZbRUvAU+j~w6srRwe_wA^+G$DTOOBaWrw>Crn$ z{@Rs?ZuR2ixdEWlw+9~W=ZfKDbTIR`x!}2f7rDQAFWmzcU|s)BFsUSiog$|3x;3LX zFv5ZBzW0*9zb&2f)jQ$%>7VI?%Pekfa6;$59l&5~4-RfN$Bs+i!sgEQ968?vt%ntn zyi0G4n3sm#@?6+o{XfW1k!H+=p0MmcEu3C^SFCIc;TLzU`1{5+kZaU~FPE#6sZ}<* zx!i+I`|iqJ^e%$cpL@9fQ#|wcDF1xbi?gKtUSY*UymVsGxvqKe$KRQ?pR^<6n)hHm57%|52uM9cj#YyF1=rw%Rl;s313dSExc7)7EVq0`FsM5&n`{YZ>U2rt zgO$`)>LpxJ8--bWSMtc~Q^bvH2cUn=W~kbpCI;1?X0t=;QU);r3+_YOJ?Jq5@T$R?Z~;;$Kj8CYT)6vhIQHw2+?(CsC%gbk5aa!rr-hu zwG#B08H>3)%_ZKP4ZeSKQT*LspMCGvv0I&rcyy~=R2wd3oqO$~%R}VYoZb&dt}ca7 z*FDHmd;?{Jw_-%g4ap~#N_iFQ#79FmL2lYUNa?tWTK!hh(;pr1s8tXItk#74J+=7I z<0)7cqKUP8>UgYaI>#UUObuJoc&Bd;9hf9N*9Y`q<+(55s=F3yc6Fc?1+GwasS`v` zxB+Lx|0r#VE*o3LV#3o}@nO*rJosod8fgaNsoDd=&46#@=AO#WGsaVqsu!1hErG_T zrEuk^#9&wH!z*4@%HK9`ldr3)h9wtw(Mzd2cGIwc7sq_%Lvhzxb?O7Q9rF!3Z;7A* z;ut*Pm?1Q58jbPW=Zl^X{DtTB%}yg;8B!Mw6Ra)GzyNs@i*XOd)zbOMfzl->!AR16 zcN!8euYG`%i{nBNy_GysS*VWEBg`D!2g3HX(U0W`6u!t9?3FUakt1j__jNkOxGoJknL&ZS>$xg53EBLKhf9w?`!i7;^ z>HV)*+>Y^}G;Ab2`EegEjk6=!Fhk6&uq5~2$+R&ewW6nvKOf5739;Q2c-~w!x~Qkg z=kor+z%WG`XmedY!%U42N`0%LvoFx!zLW8yxjj61IgqWA^vUu`2R!tC5l)&r6GPve z6kgn0Amt||&%(Gw`fK%1T$!GMdomNj`JUt@(^M3EG~!6d;;NYIc3)!fnxk9rPO)3qIQ!!7~Tlh9P5Ynbx2UGQVyfecL4@IWKQPUDJZmkz4CXVA(YL~?g)(@zl=?GbR z6_Nc2MVvS!0WxYrX>d*_)=crm_9kiHAaO*>Lk42!4%Ynd!YFKhvp|^B?*e(f|4vRR z-Ff(|6y93q#8sO-`M8$_hP?6S-Bmekq1~Z!+ZT2Bsn2pr~Y!I8V2IJkr4b-sT?`14E+A9-De zqD|6o?u`X_N#ad*o~csVbJiPPd@zmDevIYMEoS1v=;M%eYXY<$>VomJ5;6F5fiUj! zOv?Uq2>nx4V662RP?L5dr>o2``fCxq=%PpAI-lS`=PUZu8}P$GA|+&KNH_BCPejDHN@< zf~QHtVc)2}p!_3ISia|i_VsOXn`!)kE6_f~ z4K1g?r-{FiEXKCnHA1n~ zcW^eFL3*w;xKC{k_x%!wE`zhk;jN+6o0st{j|8#mi<__?JL2y6y|nT`cP^<~4#$=M zQf`?@1uu-z`qEPJ?WD_)HIDa*2gH%QmD0^!DYYVqlgqVW`21cJe>Dc*7<9lVSB-H= zn8X8d$V4xxA2?{yOG@N?(%LGArcp!L#3g~cc#L%bZGNqOhP#U0l^ zK>C53OC7w)-;2aWr>F6gcw1iosT&;L z-(NUqt%sUZH_*dM3-0b`g7st0LPBOo&fv$i;;0U#{CB3JAT$iU?zm(8!dU9xXC0*2 z=JVw#J+OXWBTek_je^JY#%o_r3N_EKlZ_Q(Ojc($*g66~#dd~OMWyU6>&kbxWWdv* zY1A-x8r)jzA~rNgZokm}EUq{y<~SzfUYjm-Ce?s<{tZU!0si=-B!l+NTEUhE?_p8L zRA^n)0s91X5}(Rle5ap8;2@K%;XTON0Z!ODGvQdyf29yXEo`*_ml=W4=_8)4XT z^bHJ%+(K1z{e-L^A2^_?E6)7Y=5$Bu7CEh3EhyZ-Dc0S72!hj2^!1)Y#?QR?Vz((g zZBm;!JU3M;}aB!1f`I#B7ud%Ii`y4Neg zsDvXzUcqr1Uyw>OS>9*dg-s$sDGBkH}ZE7W=HVa+){g5B$Ul2?wv6k!oR9X*G= zLU+>V=wafC;vIBEc0fq*Fv2T0(_z#@1xQHhN)dr`U_k9hI3Dsxeyr`KXx-xg+14cp z#j)-5;CY@5JH(;NnfnlT>nM~@DW&U$yXf51k-W_jv0&DEjt-G3pOzJc~p_;k$Tc#b+4)(q3ZMRKbdbl_WQRMC(6pSK>5u53B^grJxh%{gYbV48~| z8kNjK`F{z}p=}E0`72Rnuq`|@Di`)$-^dR)o?(p%lEdilFa&FRUXrq&{zS~fiPHVB zuhbW^uIY(K$Eu>)?Q%Xg_CH#b9#6lc#?Y77I+ze0g?AIyal_x;P|<$^j7kclVbTQE zcJKfpyNfl4+2-&?uR1z)y#mWSGhQjIr*g4EpdWX^K06Pluf9q1+`F+-L#wRMlr)K{ zzEfOct;R>e4;0@zah}CHn)ld;OU)uF9)M+p)w6#CBxs+v$1%y=)g1hO-L}O#aiQBjTD~xunUyF=!vgv z<>;>xK<_oX;6l?T8r3`iUu=`+5B`qAsAWZzmXX2{0Way1a!)k=aZfam*pl&W)4_O- zDn^y8Mk5DdF;1=Bq8C6OMlSoTav5G;55$7oPvK3cH#EYM%;bgo~A0M za8B%ey5{>-C~E4#ca08kKcDlUXVw+(NPC+9PF;ltZ#BGXc@vBh#`2xd+hK&<8!OBm z3Eu4iKNkxg&}S9e6~xl$t0_>~Jcw6*36WfcIuzz5-M@3U(XP{7#ZRhop--PonEHMY zSN!Y4x=Ot$O5!rGWX%>VIM zxF2>HLe{BjvFBp;sL8u7fs zb?9XNiHa1Qoo=Y?rdi`o3kUp!q}o(keG}>8*(tblwbZ3OH<}$=7J%&Z z0RCn$6hq`p94RMYEq7+ zZti5--1#F6+?qgo`HCF4Cj~lpHx{lIDPYW;saV^*8)rNZ#pKo;{yE4PcMa`GIdMi9 zoOYP@eh#Ok(oJHNl(l@mN^<_S>}P`p4cgLcF+7@@Au-XJMjtGMhGYT%cAU!R^)J$A zr7dz}!*?(WnP}fr`n4w%e;Rn>uz|LC)g}XMyLaGq6OuT*$1(cZeF6P(ilPtijj+n% zsrY)*H?hmCTH%q$Gm8AyBF;QtN267g#kv8#@WfO{>|&w7|3c$o=BLxpn&iSgCftUf z66^l(+5hljnIfD7@PpN_P9+kbF@Y%-E!k z>taq(&#&*DIz;+l;kXvsdB%|UA1Q-~?V)V8aw?KR30qxo<5O!#;k5}8kG0D%tXTCQ z<;|&pCxKbGuy8f+F_w62x>MNRMGpr0`Qz@8SQz2w!A&Da(Y`~rm?ul%mTBAJ#iSy9 z`+g~|PCtpS?7MKdiW%p<=*RO@2GY{FWjNsIQJnfmwK8b1L}+@MOqVVf!Ml$`IVMDj zX3_{S53rJ*I~%}@f>b#L50ObM|x7y!UnH2Gh&(2To-|R9J~@b{_@Qnr|ci&Jx&a z*AUwH`wLKUS2WU|iaI{a#K&8AiMlEZY=#U8jX_`%WXzMd^yL?wH*xrvG+Zd{j~6!9 z!;UWJ1f7Y|IHT1c-6LZ7?bJ=ID4l70pVtBR_;Et@1wB4DVHB1wI7S*ay&;Z*sLX3S zr3gjzwzED34#|Rkk}GJtwvWW@+Q@%2bRh4;2*lj=813{0>W|-%5AD|gLmY?W^Q}GM z{ePR_d(t+Jx^#u9s|$uVy3;Sa-E?!O9oTC`lfCL)*wWh&XZ#w#7unrdF3yTX;`s&X5V+g!L?2-YnJy z)sxvJP38$j_^_xRPWqh3MW-V0ho!Xpe|Wmi!sAWJAO@7ki0!Zcp7eO~9@B%5-Y28ar;6sNvRC zD(ez~xs88hi)F9L&-E)@rS81xi8jxcU#0(y{=)j)8rc4OBmESuXobuXZ+{39Xc$ub?7dJm zM(ReiCt<*A3!b?rjSe^;tC(mt66X%I=l_Q5vts59zW>?^BOX12XKtRPxaT}{o*jzI zQV&Dh<7&uI%A|Q6()hos-cn}#jr97hc+Ab8)bGwc(dO}8q!<^vS$k4qpf-b~fQMO1 zMKGtU#T#Y`Z~a#_M=c-cbUHUZ540EOcr7~ zW?}B}USE&5WMIRRPQPl?Bq|xZTl`zQt%XfaBv~Sbx$FC>G`xQvM(Jw zo`!Sm=3+o!TiT}Th_yfN)2cTSe8ON9R}AdN1`mEf;mz;V^v#OX)3rF>@;PKkbCE4J z9(-<2B%kvA2gMRA;mGc5aO&>>*6tXHV_R3_W9i&^aMD3^lj~x7h8>L@y_nvZYVnDG zr8IWwSGoS-P`;?M3}@~4W4%A;#qh<_U9$H^__sn`?mAr&*8I(Yp0zK)Ro#{K%{J1< z1+lDhoCS|ii5vUxxy(6dChc?_&YlKG*jZ&P72a-^wM;xhXRqEz2gkW+vBr(&O#McG zH%~=}@pr`agLguqiYgWOuji21Cb%?{BOBjR)O*8^G=hJ z15@Rx_5x()xMOVJ(=^peP2w;wlbx98AWZA}Ms~R$!;!3QB&@R$M!TMud5l+~(?&<3 zb1#Y0xU`+F#x3Fa_zb)d>5bYsJ4mJG0-Vv6L&&U6WSDQr{l*rOn$sQnX#Wq!Ys|xj zs-I%RNEf;qrY!YaG(@wj74qB0MWC^w7gp~U!~-3N^RB?v_@Yb)pJ^_m)m16{Smm-X zCe;q&uFmB}QjfRM=^$M=Va&T9E}$9qr4X}yFlRaU1;vc9SaPF<1}qCAOXU>myUUlh z)>(*cMM`3H&r&S9xB>LcXX2jo#vC*#p1P~Ka_2*9z~lU1TCR~zCma@H@{s`U^U{g( z|0eL+a#Nl)F^jea_TzM=1pIXStEl5T1iIIJtaw;^6i=`3!l#r*2(62*!Y1tm8W`=rDB7^uvXd-&-NyZJmKa+{6 zFYgPmp((n$3CzR<0#6;$6M&GlC9 zr(X(*9Ny54cXmHVbw0MJo}?su`CT1{JL&Nr?fcY0_KBQzT~Q%9ih7PXDjcJAFxFPU zs^^+$xqCTh_p!yu38Q$GG>biI_)|=Ma|7I=2sX@LBV5`1gFK@eVMQkkG2~hpUv+en zc@I7=yd1NExBT}==utjGxP8AJR32G?uH9Vtp)nlqU8{k%+M$HmF_#4Mnv<}*v_<&j-2?5;SmC4(UAc5*29z{yr0p|n z>B-r7sQPs@-5Ih$^vrl78^8F7U{#b4r_L3_(-(?dqq$4W-)oCo7ATU0pofPidwLAP76-|>^}<)ko0|?6J*-%7gBlkll+u2S z9yrU&8()pLz@1zAiz~0KXQd2N!O*{)PV5gRrA1Qi`+yd!s=7Ocr#ys>W2VE2z>aut zNjN9Bj)xyHJ-OtX3~y(5!CMPdA*?Ac@x_1x8@&@-hs`d zMHn-9I_v5QLP^OkdB5>ySbHHELZ3%-sd)nE`nHg--c=f6+?{SlC*e=eyMn5swfMNN zO=|^SP(=4$$H))8GoWKWz8IYoMCb0j5gYQ_#pse?Hmm8* zqU3^Uzh;RcuZQ68tt?I%I*nED2EoXGy;1y}mMzti`LPJ+S$3eW%OIFN%pj`@qjlThZl(0~h#R zr4w_e^4+OZQPcd5FiUkBm+thE_tal2i%L>u1>VA4PMLG)gDd!?b}0LHlDbwiHwatF zfcyWivoC?Cs%zUfCCV5gl2V2y$?WX4mRV#@hEORa4U&{7b7U$+Qko1YWT<2~d#&B1 zk|qsOrqokPRGOru{@e4u&-Xp5=ly@*w|~F&a}H-;d+mE2_g?E>_kCT<`AO^Sc(P7k z4(3{>!KAN^EHB+;wpQUXUT>}ss9FA0eT@Y7?yU*LU~wiWI+#o5o#rKpG2+CdMiFSJ zmVhJ=j4cO~V8qcL%)zVh)3Dzq&Nhzaa{OL?Bcs*a2S73Z)I?gUFSK_ko394mj%|jMvrd z09=11kUa*_TJN&SigXvK9+eGts+|T!;Q-LqJ=kXG0RH}c0@2znh}Va$1E>5x8piLB z!@Fhn;%)~MIOy??^k$12UMT8@z3{t!m&9c*O0X6S66Hv7@~PbecrNjWF~if~0qQ)S zqL2#4O-q53*=VoDWGAqyub$53ya&7GRbY=vBl&Jt4(ii{VL;||R=+oicrVSzua?JA z)-ob6uWv6|9J!u2V%Js;?@(-coBA4ji$cowutCc|+;%kX0hA&4dyQ+>Ht!A>i7 zZFKnnr*QF791!xwpp?!3x_*5EF|$sB*1`p_pnV5WeYk^Y4L$-MuF;_P+G)J!!E!QA z`8keNO2hcgGvdkBgc&Mvlx(^%u`YiHI>*k27}T@eVgh*f<43q=W(U_nb?hUz-mM8oL+r?dTqsF&~N06R~amXp4S@i8*^J?Ib2E#?O3dtA@CwQ zX4%j7bLQx=8s8h#xJQI1z?aGFeu8ui6u#F0Qoa4?v&(cqFV4odcg-UT;o8Ku>kiSX z%Ey-<4^Xcg4*+v1LwG3S0O#B1NAx9Q6)^cCo3pH{Od=P_;4eWMb$j?kiRvRu!%mGV z966zhxHJclqpU8VL)svHIW(2yx_lQ-o-va=tuTSzRmJd}UNpFWcNHvYkEixqC4hd3 zgCI2N5eRV@169UELsPL;FzI44zFGeTSc<=;jI$ESF4+%Ytpi(YuN_4^o!BMCd3{jF zM)a!+b`W1SXUF%j6ZoFHjYtS25f#_RL^|dGFw0Vag@V_pABqsBZ0GuBOj?URymYG5oh zHf8rTlGA|vusAuu=AT*O0^0aw6baj!NfH&fV4INuT<;wSPM6LmAGqv(>pc#TYgtP; zM;)>JP$Cs>&dcsEwBlUBV6r0T5|Gap0p*_)fQi{TZ0VImrWb1x_ASNjVYz{;d7zcdQjSW$DLlJbjni^a3!flUDN(;({B-F-u&F?gY0mPThEc@6GPGl9Mml`Qu~805)o!7HumNj=sed>KKYW!qIO zaf9V)kjWwOPkl(i zE?EcRdG8#~?ZffFs^T&>T(*zw+PoNsS!j{3Q$tve@^r#UssQWkx57bA*$AH(v{AeV zCJt&-DGQh2`xDc^$GOgU=pD`0P8kAsyFu{A9yS)R>Mclq!0L;_K9I#5 z1RiwI*p{0~9d&LXXM%RYV=Nazrg9d^Sr$#2J7n=n*J-f!0n49WJ`VWQr+~wsF5qM1 zRPp9qA)*+u0g@0oBBvN|<+4?)5%H(0fH zKTcYq0!j*LY^;$7fMgaiK06P(h=h`3lXaj$WDI#*Ndu!r-!bL8A8zE|LO5%F(EF~Y zlX#o^pxb!~ER~MJ)UAC)e4{2@fBO`;6e|rid19y<^QrLGJ1(AEWI^UsCBS(di^z^I zY+NdRJ~=dO4Uc7QGtA(&Uyc5paW&`g& zF2hQno&sU(DP$vCt3M+sm0TsaL14)ka_D#%{?-+ZUw3wZhDo1kBi>bz3YY|}CVN4f zBQN2FTL5k{j>pzzuGEd3h49Rf2hpVA=F-LX5V-MRp;jS!byww)2&R+wcjOztXDPd$ro;vLyW=edJIN5kJ3FMw~AfaKp zz}k5d5gA$!?08$*n&fzJ@yiWddntt&oOUCqdrauNmFOKC8TOZ(=ME+Z7B;SkeJ*a}Itz zg%6%o$i%buAUOEsD|OcI9Eh~3#W}95_RV5D{K{XRJh4@Q2^!)cN4y1RSqtGCmht#W zXEhEOItBe^-N3CqmEhCMmsF(JIlOIBBc7mfz~JUH0m^L4xnaDGeC_Mz*kKfbZW~IbZs@+zfBUy;$nD5J)NTOCxWI6 zQFJ$Tf|j(&r%J;Tz&##GvTTw&>AHIvpQRoGxhE{A-q5{T(>3dJ4D`sRvR~9QN6!0MPnnB+>9*0%< z%TW^`ek7ha*C&$sbCfnzc-oicJ8CBg!N$Q;m3e?Pb4_BKA9N5n#wskqmRn-OeTuAi?IKG zHEwHVJlOsw5e&YZOBNlDC38Uw(BHa-zN-)fc=rb2iQD_|j(eiu!naUXKfDD$lyaeM z&&mRc18Z4(7$2@h~>QxV}UQlKM0%CcNmZ?Qq|05zt47HM#Eqjri#gTihvZcRxy zE%5q1evubJ_Qcu2I)w~4#h0PJ@mJz!T_r%VxR8VCm;TtaA1bF!p;X$SUw8G`sI2n4&@A%|-C+HhG{rK@6V0-byL#~bvlP*InGWgI8KxzAEybCkd0TU5b3?tQ3lU4!l~F=y7MTuOJWibu+-WMh`BNbpSRp?DMDM7T3bU76#g@ z!uDQ5jcuMp-psg69Bsx5S(}Bm@O7K~#p)M0R$U!>AbD0yDHz2zU9>kzsl$m00jHJ3nnew$?$?84h8U53O%)&d~On8J1 zax0Z*oSJTu1i2LSo+lrKez0JS3+|G~4dWTNCp={K`2o^&$`eWHDKk_36`2Wymzg(U zOd3+`k*xMNT%2)&zDNehmRD2Q@_|UE^6GeI%GVB(IAbGnGEA&<;PZnW-7nyFUU_6M z&5&jFe-QD#=BP$4h&j9UE7a(XW0V>+(6sld=#T45(2mXs%A;L}xt(i=TC5ZZuZ#$D zVs!`1KQNA27j44$dRZgn$!f_~k7piMw!_9~Wn`+hiIf?6qYs~RpyKHoDm}xOIlf#R zHDBl_SIu;g{wF)6+$F{=o=!1>0dHYcV+ss2wqR^mw!k}2rBHzJCpbY&km<2cX0|&m zVMM`U_;`UIS|%sM&|MOYs-!KdxR`==Pv6VXCkvQ&Ma{77NE9rLoWLyBJPdE=PeUE| z9T?;<0{eRE$urIEw8J?k~Z%7vjl#WH8r>sDCDmOCb>rTQ_Oir4AG4xBj+y%8A>_upAZ;--Z1!~yYPIK}Zvrb(`%G)ZdCzhR>j>ca zEl0?EZYYxLSp~MO8zvpbJjlexn(+!YU=l1RAnTY+NI#i}D)v-EI{YkoYN(IWPMD+Z z;xkd(XGy5vH<3whtcA)wij1#dG0Er2fYT3)pdSZop@QEwq{f>>%#H!3Z?-8)h_M4d z>O;}XKMc^zIkL=bDMyr8Y=Ruv{MD6rRhg)PO(?YE3~qYo!WafCGs;hUNw&E+i5BdG zZcDt#9qD^unOYGPl)FThte(#(z0qU#WR?;oZC55!>LYboDu?B54ntnk-@yy+UQBXC zF!RA<1+q0dM0h5tF&<%BNcWWn6WxOt=T&ATgKr@kmY&3{JiZjF#5)kznjz?FzXWXv z^k))>-@e?BG1%S(2>YOlKP{7*e;Dji35-c7CS_SeJg;s<_{9(%g?Ou z>?H!yp6IaTY`ikm9&NVU$!hC4AgN8(WKXsv8mBLb4vM6JfIH(EXKQsP^Ohspu2@bi zj#m)9b^B5NXNoZhyh_Z|E)kncd`y3_5Tg4^$;pSWVOK;pdD?7_oGZl9=S_>4JrU6; zNH!C-FMN&DyyQTSW;zjgG>=hARb|?Jy5I|`Nz9EUnsD;PRHl2FCd$s^k=a{5W=r8C z@bnI=clzT8R^Hc2Hhm67UzRUnsy_W7=NgomZK957{N%6j?R06BJoYhZ*9b@a9k!^B zGmA;m)kJp#BABO)F~j$5HL=f7B?9r*j9x*vFBJCxTH`!+vy6Xeu)}wu$(xx5Jua`=N|<8tF_BLQ_mFnA!_Y=%}RtbK7Mq zI^n?&9ae`TqX(JjfY((LP_+b|h)qTN6#^L%X)|OHZq01E=!({8i!mii8fXSvXZph2 z4(VD-Ga`+y%rj?Ow0^QbD%5jldDRBtaQSX*uJr;wZc;_ly4{FrtT~Y8N;5GY{7e}% zVceslP=nYvywlj2&2`;R9y!P|!Se2mles6;5cv`2rrab#Hs`RevlH2=s>$T&sFN6H z1MFLKh~pvV$Xt_~i^v%fW{+w&KL2qm*%Pu8x(74Ke1JoiG?) zxQlc~*Rs4oXE0yGck;(V7nJoTkVIG)LRTLFEL3U-Ur72PTOK16*4$2Ng>{+tK9Oi~ zfr z0o?$#bgq+CM1~oYf0taJq=PnX{)5y>$&&)c3^gAcg2Y3N8GE0=vDwdw?UYKmSwtVr zbUa8z&-KFxemRh2ilX4#vdC)AbgcbDid`S1kZDum(QLIfu&PfJ9gbpk-wNg#MenTH}~+N$fzSh_IiBSVVrC=x_!CUbcO5h`kLa(SS=ls}hi9byr+LKO-_9fS zElp?s&*u?xkvG99X+v^iMJ6+yk!4mWJ|q0evPi7hn0dj(5b^s8=wx}ypYz1>!52>I z?A&Ig{ilA({I*{PmZp-w_RIB&9iDs>;jl64czP8-9%D8E%YsAnD-jWT&-Gm#A6YSO z=$cMCHXw>CT3N+eH@Tcs8+4pLXSWbnw>oeGtF!6gN)?QQO>s$|0)B=iSPoY=+;*ZE z$1Tpm;KXyfSV9(i9X*7W=mOp`BBsl}~Tvx4icvDGzTJFVJbCDkh zo{)!9dN-;1%q;G+Bz0QnRxfAER9|pNWIo6W z>86smpQE03bWv!fI#}5y3#P~lfEV&2;G}XFRqrtuz@nvK>#HOX*cc98zqA5z{Yk)f zX*EcFdkSRT@&)OY_o?B!8qn-q4Lm+C1krP@)V)ZGrpJG<#)@ZV;H~e!&=$)QXv^r7 zx}2Q5T)}i6N^kQ{&X2~E6jbB~TTTm6OOKXOcT2d`#bF_Eb^S zKIo^Wi3e#wkPc6D~MVsLjZ(GWV+^3vb-Xg!&DfrOtRm`!XFVH( z&unDaln!J3a@r1jW4a2q512$J6wTq9ho#Z-FUDc>3tKtWJ~5R0_|24~PXK3oHXpU# z;Q(b7>qpgkp5nZhs^FIQgwoCL7h{0)@L7=<{Pm?8HEGKQ+PM8P-Sd1d=9Jabg6SIc z`&&2ZaP^(^&B#`I)wg)M!1DvmBgfX0bWNqlDwlH$101-fPCN0|+X(BcDbWofRfZA< zSGWfvJb?Vl5)k9H7d#4N*N_*-0+VnPkn=DTcs(`+LXMxP)Y-b=Tu&TeI`Y7t4nuIV z-v_u_l>olp%iyg@9@w9p4Y(<{fcDTu5P$6vI3`#RX6$YOCJHrRMnW0@MdH{sOcJX+ ziKWXsuW=X5spH;{T!2@!i{aUQS#ScQTSVy~P(u&()cSUKoh}1H>=gIF+|p(~&)Qq>aX?^!x{mU;1I? zE$RPPaAq`yx7R;lX#IRIv(E;@2DT3)Z8S!cEAP+a@~1`TYyXTfq#U0A*Q8`LhMfC9 zVCejeaf{zg^{-(Wjp6P64;Z>XW2AMB7yD~mMi1@aKVbY~4$sHCUH%%D(GWouzwgE` z*7$qKY&68w%HKo$VvN5>i$+7Fm+<{|P=2w+-zSQrAs&|g9zySDbN+o!HyUDU)$bwn ze}?$GTOSQk`wtKXKSTW8GmnNCto?m241b3By9*xu-P&6B3&g)2`p+1DcaSRV|H^+K zcYnqBz3X3_fa>!J~aec7s<}~&;9E?FZ 0 ] ; then fi # launch training -TRAINING_CMD="train.py -s ddp" +TRAINING_CMD="train.py -s ddp -c config.yaml" srun --cpu-bind=none bash -c "torchrun \ --log_dir='logs' \ diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index e0326c98..9063271c 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -1,12 +1,12 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial +#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err +#SBATCH --output=job-ds.out +#SBATCH --error=job-ds.err #SBATCH --time=00:15:00 # configure node and process count on the CM @@ -55,7 +55,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" export MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i export MASTER_PORT=29500 -TRAINING_CMD="train.py -s deepspeed" +TRAINING_CMD="train.py -s deepspeed -c config.yaml" srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh index 32e8112f..585308a2 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -1,11 +1,11 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial +#SBATCH --job-name=Torch_HVD_tutorial-1 #SBATCH --account=intertwin #SBATCH --partition=batch -#SBATCH --output=job.out -#SBATCH --error=job.err +#SBATCH --output=job-hvd.out +#SBATCH --error=job-hvd.err #SBATCH --time=00:30:00 #SBATCH --nodes=4 #SBATCH --ntasks-per-node=4 @@ -49,7 +49,7 @@ fi export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training -TRAINING_CMD="train.py -s horovod" +TRAINING_CMD="train.py -s horovod -c config.yaml" srun --cpu-bind=none python -u $TRAINING_CMD diff --git a/tutorials/distributed-ml/tutorial-1-mnist/runall.sh b/tutorials/distributed-ml/tutorial-1-mnist/runall.sh new file mode 100644 index 00000000..7659bc83 --- /dev/null +++ b/tutorials/distributed-ml/tutorial-1-mnist/runall.sh @@ -0,0 +1,4 @@ +# Run all versions of distributed ML for MNIST +echo "Torch DDP training: $(sbatch ddp_slurm.sh)" +echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" +echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file diff --git a/tutorials/distributed-ml/tutorial-1-mnist/train.py b/tutorials/distributed-ml/tutorial-1-mnist/train.py index e66007f8..d9da7676 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/tutorial-1-mnist/train.py @@ -1,144 +1,493 @@ """ TODO: add description """ -from typing import Any import os import argparse +import sys +import time +import numpy as np +import random import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset, DistributedSampler +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from torchvision import datasets, transforms +from torch.utils.data import DataLoader, DistributedSampler + +import deepspeed from itwinai.torch.distributed import ( - TorchDistributedStrategy, + # TorchDistributedStrategy, DDPDistributedStrategy, HVDDistributedStrategy, DSDistributedStrategy, ) +from itwinai.parser import ArgumentParser def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser() + parser = ArgumentParser(description='PyTorch MNIST Example') + + # Distributed ML strategy parser.add_argument( "--strategy", "-s", type=str, choices=['ddp', 'horovod', 'deepspeed'], default='ddp' ) - parser.add_argument( - "--shuffle_dataloader", - action=argparse.BooleanOptionalAction - ) - # DeepSpeed: needs to be removed - import deepspeed + # IO parsers + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the local ' + 'filesystem')) + parser.add_argument('--restart-int', type=int, default=10, + help='restart interval per epoch (default: 10)') + parser.add_argument('--download-only', + action=argparse.BooleanOptionalAction, + help='Download dataset and exit') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + + # model parsers + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--concM', type=int, default=100, + help='concatenate MNIST to this factor (default: 100)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # debug parsers + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run with seed (default: False)') + parser.add_argument('--nseed', type=int, default=0, + help='seed integer for reproducibility (default: 0)') + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + + # parallel parsers + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader (default: 0 -' + ' only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') parser.add_argument('--local_rank', type=int, default=-1, help='local rank passed from distributed launcher') + + # DeepSpeed parser = deepspeed.add_config_arguments(parser) args = parser.parse_args() - # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) return args -class UniformRndDataset(Dataset): - """Dummy torch dataset.""" +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) - def __len__(self): - return self.len - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) +# train loop -def trainer_entrypoint_fn( - foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy -) -> int: - """Dummy training function. This emulates custom code developed - by some use case. - """ - strategy.init() - print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - # model_engine: ModelEngine = strategy.distributed(model, optim) - model, optim, lr_sched = strategy.distributed( - model, optim, lr_scheduler=None - ) +def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): + model.train() + t_list = [] + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = time.perf_counter() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} ' + f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' + f'Loss: {loss.item():.6f}') + t_list.append(time.perf_counter() - t) + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - # Distributed dataloader - train_loader = DataLoader( - train_set, batch_size=10, num_workers=1, - sampler=DistributedSampler( - train_set, - num_replicas=strategy.dist_gwsize(), - rank=strategy.dist_grank(), - shuffle=args.shuffle_dataloader - ) - ) +# test loop - # Device allocated for this worker - device = strategy.dist_device() - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{device}") - x = x.to(device) - y = y.to(device) +def test(model, device, test_loader, grank, gwsize, args): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if grank == 0: + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test - optim.zero_grad() - y_pred = model(x) +# save state of the training +def save_state( + epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best, strategy +): + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = strategy.par_allgather_obj(is_best) - loss = loss_fn(y_pred, y) - loss.backward() + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m))[0][0] - optim.step() + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - print(f"NNLoss [epoch={epoch}]: {loss.item()}") + # write on worker with is_best + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} - # Update scheduler - if lr_sched: - lr_sched.step() + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in ' + f'{time.time()-rt} s') - strategy.clean_up() - return 123 + +# deterministic dataloader +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) if __name__ == "__main__": args = parse_args() + if args.download_only: + # Download datasets and exit + _ = datasets.MNIST( + args.data_dir, train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + _ = datasets.MNIST( + args.data_dir, train=False, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + sys.exit() + # Instantiate Strategy if args.strategy == 'ddp': if (not torch.cuda.is_available() or not torch.cuda.device_count() > 1): raise RuntimeError('Resources unavailable') - strategy = DDPDistributedStrategy(backend='nccl') + strategy = DDPDistributedStrategy(backend=args.backend) elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() elif args.strategy == 'deepspeed': strategy = DSDistributedStrategy( - backend='nccl', config=dict(train_batch_size=4) + backend=args.backend, + config=dict(train_batch_size=args.batch_size) ) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") + strategy.init() + + # check CUDA availability + args.cuda = not args.no_cuda and torch.cuda.is_available() + + # limit # of CPU threads to be used per worker + torch.set_num_threads(1) + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + # deterministic testrun + if args.testrun: + torch.manual_seed(args.nseed) + g = torch.Generator() + g.manual_seed(args.nseed) + + # get job rank info - rank==0 master gpu + if torch.cuda.is_available(): + # local world size - per node + lwsize = strategy.dist_lwsize() if args.cuda else 0 + gwsize = strategy.dist_gwsize() # global world size - per run + grank = strategy.dist_grank() # global rank - assign per run + lrank = strategy.dist_lrank() # local rank - assign per node + else: + gwsize = 1 + grank = 0 + + # some debug + if grank == 0: + print('TIMER: initialise:', time.time()-st, 's') + + # encapsulate the model on the GPU assigned to the current process + device = torch.device( + 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + if args.cuda: + torch.cuda.set_device(lrank) + # deterministic testrun + if args.testrun: + torch.cuda.manual_seed(args.nseed) + + # read data + mnist_scale = args.concM + largeData = [] + for i in range(mnist_scale): + largeData.append( + datasets.MNIST(args.data_dir, train=True, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) + + # concat data + train_dataset = torch.utils.data.ConcatDataset(largeData) + + mnist_scale = args.concM + largeData = [] + for i in range(mnist_scale): + largeData.append( + datasets.MNIST(args.data_dir, train=False, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + ) + + # concat data + test_dataset = torch.utils.data.ConcatDataset(largeData) + + # restricts data loading to a subset of the dataset exclusive to the + # current process + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + test_sampler = DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker > 1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + ) + test_loader = DataLoader( + test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + ) + else: + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size) + test_loader = DataLoader( + test_dataset, batch_size=args.batch_size) + + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = Net().to(device) + + # optimizer + optimizer = torch.optim.SGD( + model.parameters(), lr=args.lr, momentum=args.momentum) + + distrib_model, optimizer, _ = strategy.distributed( + model, optimizer, lr_scheduler=None + ) + + # resume state + start_epoch = 1 + best_acc = np.Inf + res_name = 'checkpoint.pth.tar' + if os.path.isfile(res_name): + try: + if torch.cuda.is_available(): + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) + else: + checkpoint = torch.load(program_dir+'/'+res_name) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if torch.cuda.is_available(): + if grank == 0: + print(f'WARNING: restarting from {start_epoch} epoch') + else: + print(f'WARNING: restarting from {start_epoch} epoch') + except Exception: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: restart file cannot be loaded, ' + 'restarting!') + else: + print('WARNING: restart file cannot be loaded, restarting!') + + if start_epoch > args.epochs: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: given epochs are less than the one in the ' + 'restart file!\n' + 'WARNING: SYS.EXIT is issued') + + strategy.clean_up() + sys.exit() + else: + print('WARNING: given epochs are less than the one in ' + 'the restart file!\n' + 'WARNING: SYS.EXIT is issued') + sys.exit() + + # start trainin/testing loop + if grank == 0: + print('TIMER: broadcast:', time.time()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + + # testing + acc_test = test(distrib_model, device, + test_loader, grank, gwsize, args) + + # save first epoch timer + if epoch == start_epoch: + first_ep_t = time.time()-lt + + # final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + test_loader.last_epoch = True + + if grank == 0: + print('TIMER: epoch time:', time.time()-lt, 's') + print('DEBUG: accuracy:', acc_test, '%') + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0: + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best, strategy) + # reset best_acc + best_acc = min(loss_acc, best_acc) + + # finalise + # save final state + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, True, strategy) + # if torch.cuda.is_available(): + # dist.barrier() + + # some debug + if grank == 0: + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', time.time()-lt, ' s') + print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', time.time()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('DEBUG: last accuracy:', acc_test, '%') + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) if args.cuda else '' + + if grank == 0: + print(f'TIMER: final time: {time.time()-st} s\n') + + strategy.clean_up() - # Launch distributed training - trainer_entrypoint_fn("foobar", args, strategy) + print("TRAINING FINISHED") + sys.exit() From 203e3db82cc23d11266aa2ee4e6d941254e8e4ae Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 12 Mar 2024 18:23:12 +0100 Subject: [PATCH 037/171] FIX small details --- .gitignore | 1 + .../tutorial-1-mnist/checkpoint.pth.tar | Bin 180274 -> 0 bytes .../tutorial-1-mnist/ddp_slurm.sh | 2 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 2 +- .../distributed-ml/tutorial-1-mnist/runall.sh | 1 + .../distributed-ml/tutorial-1-mnist/train.py | 2 +- 6 files changed, 5 insertions(+), 3 deletions(-) delete mode 100644 tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar diff --git a/.gitignore b/.gitignore index 7f714a0c..2d422c8d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ pl-training.yml *.csv *tar.gz 0 +*.tar # Use cases files MNIST diff --git a/tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar b/tutorials/distributed-ml/tutorial-1-mnist/checkpoint.pth.tar deleted file mode 100644 index 264bb34c2b236ae1229f45f4a0ef49429c9269df..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 180274 zcmaHy30zIz*Z-R*l}3`%Ac+Q*QunNVZ%K%fgoH||2u-(vP&8;lA*qm3l2j5>-LrOA z2q9CFB4nP)9K!$N_dNga@Ao|a|Lt|Ub%ys^XYc)4>z=*#^>CGx5)+e?6Z;>pp<;u? z0z-lWS42iE3y-mgj0qVR6c7_&5xF9Cij%C^g1@gsi3GlwiMO=q^5Dpbz!1K;>mY9h z(SOk~0Wragf|dow@FfBRBSJ%i17ns&ghwmPj0y^l3J!7>9Z}#*j`8G6nRu&Za z0$;| Q`{y@;rQrNMmJzn3UX35^JdnJv0pv{QjE=PfQe$C5AaEhBmlj13B4Ecptq z-(vWKX1R))xGK1cyGpxC$M6+RT!)E%q+P{h_)3`}zx`8=;ScuYt9Yyar<0491w`{z zLqr~g$cFs2V8vJarv-Iag&4ku$bzdvDqqu8Je5D>FLP)NU(1s}>_2bZ`oC^m``@V6 z{Newk>deSckKyb7aa=>>`X9$fn7DrXFUR%%8qts88+h^!|I=}6i$9K!{5KQh`9}XV zG0IgVhHw1aag9{|=--Z;{AG@b;g9v?oBorzB=Eoc%k1BnHhlAcGA(8#@?!YoenYKi zIEtt7Ek(awMekOB(bh5i@t%Ae@4^4KoqwbMIBWZFU|ar#e}e5?MVH$D;imFAznx|N z5+R09Jo(&zclN*AnD=kY34DitG971p6pet1zo81Q)@ghvSL@%dPWlUWj^R)CY2q!~E5@Ilz@PJHysr$7iWcdKKi5?}`j3Ub`r&&e@aKj6*{S${+wqU6_}&Tp z`Ty%7YyN@+{=&b9{$rF|FA9o^h~)bu@O}Rt|E=wRNmbN4e7^+#qW|T9HQzshAMocu zZ1iI9-{EC(aCFR~fWSch;;D{#j%JFriDG~5G$Jx)S=h2Q!BPCc1b)!pQ1T~u@PiZh zOH5qF68KC1)zD$V;W4pcix$T&St2?;CY|BcAuNBxr+{k!X8{?&D{zq@YbU&g8!ew-(Nb+oG}6t4LPLqc@Z zR+jv=|AfT;56#^?#X(G5jP?esZ*{WCB0sKbcni4gX}O{?=;RZ^uMh zP5{q= zAC8Xo{wU(0s)NVk8SU>54*VI*VG)0a zBEN2`L$Bxz(d*Ax4hjg13=UrwzLbC5RqPM-Zz5Y16dV|^nqM#4`CAI%A}It%MXcmE zWd6kk#Ql)~zcKTVwwFXi1qX<7sleS^S#)~PvftxfH1?MTEDjCkvzdSH?vKCGeDvj0 zkw`@OOTtysRZ5f$WL#x~V*_GiEj(OhiqrW2kC}g( z{I|cm8*I1@dntGMXfff$*yE26W!w15eu6W_(4HI}7YS)ApB<>1ruGa}|cij>y&36-?x+pJH-;l)h z=~5R;%`+25k^A)WXI)`#)9I$l<4>AC<>(9Rx3qA3lotx6i-@p0>?+qrhw{#bJ>|ZT zcIBCT*vDgKm-E(NP~^SP(-rC`aCkSxBzbCHJx$r4#|hU3@1naWh47Y5AH)mny2e#d z;qpGda^aqTY}Zue{kbV&Hn%Cm-?7Q)MLh3mHmB)0y-t>m&udx~%{1-lUCt}G=_S0L zu!=i(d~(yBm&bSyZk*zsb?fDJzeuOk3iEk22FJL0w^wn;e!s-?U}o`VP7UYoHjEdR znrRDP2E5^Y`XcRc_fc7sLic!K)tp4(={40n`Se8I=kfcwxf(4^+&4G4`xOpwU$>v) zRn$G=7Ryy}JygVakzuGNwAbZ?Sa)0!^|!mv%7gsDpVczN1wyg?BW8f9(&yz{~VFsFKqZutnB?xn_fsU(JphxU3_KA3E8fbZ8-YH+~unkLiU8SvSGYSBErI ziId~+uQMG9)o`WxC>-__fThX>sP^8;02oFZ)gxd7&wJcGsn zc_Nqh2y)amfY`=vxU~E%Tw1mXMoCt(YYKKj%Xe9ln4(KMpXrf-A5*|haueKKVGR|h zE`hq^7!o?&igXKaF-0@?vmNYR&XX;Vp>CKODdp~h&0f*W&5CPGN7N-)(WObu*wOat z57HTavlH8HJQ9D7%Ef!5;uv@PMJ#8UCTf1DU|)GVvbS1~aZITgn$*_{21-{j$BW!x z8=5f*Pt@4{qt={4PZ^L98w)OlI-G;|-g2~Nh1kDXI0CZFCqbaD5*gUD0WQjfLdMHy z?3Hab3_L1>fje0cqG1h=S?Z)%GMTVVm*MlF;iOQVclI@q;%Ltrw;bLVwvsA8tt>|j9i*e0hdZ&8Aru!-&wp`CH@yUgH zUk}2Ze)*EI-Tnf6JcNuy%tXkXBT3A~=aV;AYv9hlxrBA|ASX;F6Aqn0j(krh;`urx z{;?P%cltXt%7#Olz9!@u9>hs$skqk^!E@F~khy0-muM?e;x-t|^L9dU@D+Ho=PRU} zv&4YuAo)jgiSxoKB>B^J((ZYTXzO?qcd2n?{nguWPT!uK-|0(yUig#GooeJ~&;=M< z)dr^zmh~4e0jQV{Qdhp$BvSy(&DdgnatNK0{lq*_e_jx^ZdbkH0 zH3K>SWJFb3jw29lL4;>nU}1CYEb6{NOhlMUa-5S8Qhptswa^?q1{3z_d6 zx9rJ?Uz$+1eG>YGzF``?LW$N`DH4!1oKzL5lRGxyB+2zItouBa*m|gt;*JI6!xDG0 zKK}r5=<*|$tOPOE$p_<(2FRU1p9p0&Fz4d{=Ea}K_H-SP=sUr<_hh0Gk_Ws=q0w{5E|Mgd8V-{KL$(vG1A1iL zu}jP}F=_H`P#Kxmc!sH@+_zL2QOfkWdM2nE0{+_bnWYo6jn6r>r#QP8p%gbrd`%5(_Sq zMYCkMRCtF>^$Q?-9(91CT^8{f?oZ@auOmM_W|Admy~)OqBqDzW$Ot(yTrHQh3_+qg% z#ypt^=Mz^mq06=L$0{3`kyi>qOU^=;L=tFB2!wYxHo~XyS1^C$PO!?hfVQUFOzVf^ zkbe9+Wb2xOeqsShRhK~n*^*o60*bHXdYd9s(cHkNwfyHLkaAtcZwD%OS8VeOM zy!I5-^p_H&lTM_#_c|Jme}TM}i9{m9hjcfjW7+u&=#FmacQc&qI9AA>d_S0aXIgOl zeM@oefE#R3;-iVnGPd^3IoKR4LthO(3~9r5K=8CoXkPQ4b7n>|UFjMN)y5y;bNW6w zTM|QSfncPH7z#2l#4d;|J-my_#kBcy-c%vxU#CNAo!5ZCq{a)Lrg#i=^-KBSy@%ROXz z&3o;4932A}rDhN*0|&DFem+ zRhedF8Pf?N&O=darwgvDTZKkeQ(14rW+q6}n&`Z6C1nSWa-#PpvaepXLyTi9Z2#_s zMK?~fPUD(ES)&(DEU`ib$IaN+YJpq0O;EYCgR_2w6TZ{bVCNpmW>0rIzfX&ola*(nw$GoeJ(ofH$_~K8{xHZ$(j@kUbBS*C70`b&m~%=s6{U|Pk^0Zt zWX^$9p!rph?U2t1N{mR~`%Un`G6q6}BuI_$C2KTjC~3bfMRLujkvSK%Nymg6sK0O) zbV=Hf@={k4nz@-MPpc)96&zs5W<4@_#AfmU_mSw)iR8@o9B4J4Ny2Uv5X(WOB=*rQ za8FqTTVEa_uQT(=h4Vv6fxjOyS>6hv0o%x{A=g1{WTGIdB9$C#QH4c=ogwhh2WHQj z>(Fzw0%p5+Gi|1_Fs=M9eButk6}=Te%n2vtmMhdsmH=@^x%+RmoAF9dkEEdNjClTCTogqz-5^;>uXm> z6mA?NS(OWk?gdwJ=7A}RY!w4TGKaJ!Pa(ScHLx#sH1u8R0Y#rY5PN3UICuAW@L4_^ zZw4Ea@75_G)47AG^bdl=jkOzFRF>Lk+K7)fDSN-*#qr1I?TC3Kgc~$ z4LSbjVCxV9&H_W$V%HAHpSupq?KGKh%W@#nrGZhtmJ143yBR5yPl8*V^{_N56Aw+3 z!V7mDK>yTMT3ymYCuaAeW5Fo00@k6TkK-_zkgvcTEv!nS2G$%T-c~8dZ30yAsA4?qb`U z3#pa+0or+~mO0&~O*|&ESUaJcp1$Eo+n&t9yTyC)h3YnDZ+r#Q7i5II6DQCNcVcYV zY-Xe7L$u!mcfn!SUJC@YNN0x~p+GGu_Mv7gue;2zN1>+cv3%!6yfteNel40um9C}JNQb$cR?lectg)iC zr}gN!n2orr{u~5ZSku&Pp7iWTP1N_R;+T|$Liqz3DyYoDh|wOPVv@w6<@dn9I~vwl zf5bT-mDo~$O_JfDM5KJBV1%_iwJJ)%j8~qlnaT?CSVMy}zRE>Q{ZLN1?Iql3m_R(_ z7*c=30*#N!(o+|e>E2~AftfW3Yv!fm@aD@f_@NjS*_nVl_c#{qm8NS( zmEeYeLv-GaLI`j2M$PvOy?o;w^_wByBousv<1Y?C+Ycw2esUL$NSj1wf6FIBjjYI} zrU+_zZxwB`m<*OLtl0pYQq*{&OXEkapk1XYbX{2{YwfoQ<9ax>D{<9lR#jusbBP*cYcYaR|0ERzocCmBxE~cBd3;#U;V)28UT`XhEgd*JECU z2BQ{J$gHUUfzw;AV;Xl8Jew;HE3%*BcN+tm@YRNvoxF>e6OLh!`bj4K<|50ZL+O}~BXxz%dy)U_R!c$i|*Iu8xEjWi)T$N~u zku;4;*?@gTQp}?^Cy35p2N;$JR%NM-WY!2=Flh_xTW)~$=EW!+JcznHuf^R3T420n z8}8$tMZf6z*t>OiRT*u|(k!v$j=E5ql7a#*cjh}ANt zQ12Sf)Q?L>nZg*piFw5j(RArk#x8L(^awX# zqPAxyyOX_%Q`&N|=e=X|{)i6aia<|r^v*p=7A znfB;Y%;m&!?9Vgj1)W=qS;xq&%!;-SHr~h{Q*82BFSU&vl~!nsW6QB3PFJv^R2L2( zFJ>1xq~WKdV{v4oAJoi$DyZRqL8a(g+^Z(WvCLh@o+_(HPoG#Qm}X99_q_)0b5ndF z{u*;0?g5oM3)re_p#EH%z52F~PH_{w}Z{V~{(UO9gT&wkaygt~WlQ(_QG zS!TeU>Nq_3^C~R9&W9A$H0JKDAnXs$LP_nf>yoer3eeXPCb4%_( zdCL^yY$Hn^EEk}0uL2CVG$2DR#uICY%OG~xl?k3N4H*X*!Y`Oj+>`wA;nyl)v_?W} zsv)_#*MdZ;7BO4WdBkSbU0Ai;i@2TUkt2miKx_0Sw0h}}8h6Ighkb6es=JR>DqVo- z`%S49T}3x6d59mBbD_C5Uohoh5Pgtz9~Ev-VDD{FCo0iA>Kiazl)o|ry#dLPeL0eR z=@h4>mc`iOyAm9aFNQbZLBx_ZX|B(1G<%SUv&X4X=KwWo{?UxCJ#0>k-z=jiEexom zyfFu*PQfqbQK;5^h`qXXIjoo|Ma=4?Q7btd`_p?Fg|p_w`KAe37We}Df6l^hpY&nq z=*4hLelJ_T_#p1CP@#Ppd@MSh&0G$wfY4F1Fj3+~L$`o|iqIX1fx8RnN%*r28Lf{{V%7YW2Pv7?>)shJb%x0EB<6|tLxD% z_spqvkqwR!_<$f?j;L*ur@e(?xbjCSjL!-rOJC}d_eG7UvF{zDH)jy}YU4v%r~LxM zwg>Epx%0uL`XR`7PbF^}@4}hn3(VNb*C9g3l=$BCA_g<2kO(hrI!wxjKJ6P%AD2&{ zKHE+*9i>xPj^9sQ-S17I_yS7yjw52({equ78_GSv;2onZSQ1=88vRUZY0W9TT6tP9 z)wLA1+!hj^O&C3TtPbBEcnx*d(vfM5pVl$9m{*Ne zziLoh{xqA={)4fdX@REowU{$s4<97;aIRVBgGpI2w&_-4*HnGxOOiIMzIGJ?t3?b; zANJOQ;b6VKgLy*FK++;L`ZYb0CYJ1=g3mtmdL<9PU9zR?ylUtHZY;fBT*5d77L%av z!Ssr<1r3p_VkfQlBDSAyl8KLp(vOGtV|;}yvEE)xu8n(09&>D{n+69rdg_w{ritXU z!7hS7?%>8P6*$fG82c@Y!u0uOWEM9GEP2@wGay68`Z^HBMKj6P{X+$%<`SSeD-j09 z9fN?zYY?uLfo^bm?-t2p;xz8Spz{Ol(I3TN=}--BCqKb#Zy(}1uZ~sm5jGyb zH4?>k>0q1e^G2iT+nFsH&)KVKLHP8D0;c9nguRhvoI)lagG}Su1Nm~CHLZ!TrBemp z>bqj3z8dG9dkeEEv=;fAxi?s;;VG!|>oF7Q+yP@XdV0=;;hA+}3&I21DFnl77&rzX8qujAHs8?{C%mAhM zgE%Uc#qh+LBM&v8jSrkNUNq8kqYf*^6rbC?-eF;EElf=`?b2HYTI56 z36H>j%N^(ua!X)(^DHdwK8I~m2LwM^S&&(-h?PbvSS{Ykxw}N3bZ`pU`2&k^nS(!c zHrvCmh>@^V>^9r&nhjcE+aTq;Jh|5|LGY9!?AwwKT5sjy+!$lH;3tU^2{#!R>5Gsu z^&u?GIt}`^>mk%;9M&$dg}Q`cWb1Qdk`dd%hRVs&63ypeDaR%KYh{US3I~l&8K9^8 zQ@FEsKj_*_BvZ5v3W@)*=^GRBW~UvZVG7J2<^ zDVRu(q+Nl1jFNH!6|?vO!|vRHADMkWA^vwdz&S|68kK>GSNg>Bb*99jZdCE@K zdW_p=Z-$bXo$&2m4?f$pma{PM6xzs)CN+(BndF~!xI}dw95(;N8YX>$jGl>15mmr- z$-6+I?ivKv4Ivq?53(KejOaH_YiOKbj_vwqU~TF)&?@m|z0R4E#+{n9^1^McqB z8t38b#WV1*vjw_*uCO;U_c9~rWuv-l1gWY*)h;zlg(SjAlaSpquF z5%_3Q02=MiLfz++;ndN&kkIFWTid@j&X2GnSEm)Q9-g<@id%^w(EA1|J9i1zPCkc9 zjT_ML#VlspOlx$A=0Iy!I$SO}$DUN!hbMTpSm87bIQn~7$@7r zA79Yo>r&Kvt55BF1L>v|9criCfU0MY!Q4ILNWi>tWX9MQ*fAnoG+SQ;N8$#NalM1U ze|8&FF!u}ul&+x`XQJu01@_db-ilTq?`K=bb7;%BA*lRmAr>B;k2_OVQ(xaEj!M>M zAd(14sWmVZ7ZI;33b=2h<9+)^Ayd@1N_iS*?#yPGdFB zdHR;^JE$Spz5W!Y?VwmYc@dtgZpYoDjHu^#8ESpM7{_Fb6VK){NSU{jiQl&w3H#J447toSuGI!D`% z-f}mjM|~cl+Yw3Hbk&-+rT)S(=f9wMUokqp5-0uw6Jp=w0oTeJnNB*6GK(^q(4+cf zM9(;8s)`nKr(!O(cfE?O-xCCu8%`myEX5k@jnqrUkCt!g#h{_9abmd~HPhZfEiZIn zhsrHf)ZdEzMLf#Yf5x_$CUe%U(}Cp3Le|=55{}aboNIoH({#<7xvdH8TinD1S_5XQ zeqzlZR|BMQV2V9PmM1ojT2Ri|7|z6bF_O5v z{Si`~kIcHssqiQ~jO~xx3B33&cF>9mby9)pu8O% zQ{KZZvubMSz)+3-254qKHP@4O5B)z-196w$1#+x@P*t_o)IW>y&z(V07r;KZkK7*Gc zn6AKg)dJ?6WfR+*sfw1KDHzSJzzvE+S#9|srt#nlcG_bPw1`-PLvAGtj5ZwOG_K#z zR_Lxr&udyZ#5RQ~>J4J2AD++loixV>5rG(c4)9E!9(nfgHY}Q_N}lw0L*P*-qW0@H zT#r;G^E)Mp`>R2uZf8In)ZT-jaAXD1yDCNoztAFP;fkd0w5Xn&kW1WR63Ia5 z6mnqnU_!inNKoE3(oo(Hp%(=3#jT#6uo1QSrF1;_B9adATuqbGU(vQvbLoPDCLBBC zF>XBb4yC5?>G2L{ns~!lusk+}yuGoLjM-y}c@8BIk|jmP@*faem%C)s@Ce+{`~l|t zo_km7AwMOnhz;)t`aibAt3F#WzUwe5ju?z?0kt?}g(%-PT*atI4#=LK3f`+e;J30J zxc!(P^6cxN{$mH29PvfF&;fQ>)Hvo{iX>qdpMtwlvbeon$lN$+4}pE-$kZ+sax`WK zUH{3H8V$FlTB5O(oXMkQy0@sS+I`yFvX>e~jixPU^VzsTnoUVbWwco_jT7OUMl!S% zi2uA~ygogIRDNzIjEW4mI_^GEE1pI+ygp2t1ajP;3st#o=M=dohgZ<)*9~cVPbcg0 z$&l@Fot#S_MFzJnCt5`jxK0qrs^n#$ag#qbJ==ym zPG(|Pz;MnE{Wq+R$2G=MeBAzAkzKuiEpYQAL4 zG!B^TCC;{*6u@rxrdbs8@5D}>f*!-(ru3$Tx!j^1f%O!|%K zu;8*TdpV^Q*DIT&|L73Th#||EsS_7qUc+bhV3w%Xs>gE7Ufl-WYkg3clnh;WKXc^n zDza~miiy@T*pM964a~mRAKAA@0-?)!1JO|(N-BPQWka0pn7Ez$L0V0jsD84CG>@fN zrr;G z!X!d9H^XYF!*I($4QClAkhiZ?tK^VWN#NFtqp;`6ZfNyc2OH*PGadUybMMQ- zF+}4w@N}NThEEH@d-!~g$7x4&k+LG6+p54i^9Q5lGmROtwUb%2{ToQw1TZ?6C({QD zhtaEv5_Fa77BG{PVdna-qFNY9rx#SRB`-&j=jXQ&#}!vOb5Cpq^{D4?@k}e(y1a@k z@OlSk?{~n!o*-h1E#y>52+1}6%w#M*33-JRN%7Vtq;E?k6Rn&tihH4Wq2e$zVXz6g zJlc?qT2li`Gfg4fv>CcgCy-+xO&rxu*oVH1hqHHEpjW8^p3dJPT7Tw%3EFB{7O@iL z>I4Gg?oO5osDwLvGQhxQ5%yI{3M^SAfo|S8=EW-^>n z^~^#2Nui+7SqLi5PZ;J(4|AbpI`;nvf*3yv@5x0b++G%PIDVMlCz>C!rtms>EG*ov z2)u$gmOYXOX&y2wyCO0$+oBcTOPXL#G-ZoQmC@saCL6T)0drYYugg8M0`=hp ztC6!4EidKZv5(;>;W?BpS*42J^s=Ds)=xp!B@aB6;fMagF^qShEflSu$(b!>ik7wO zFno(8KL44{6k2iV>-z>&EiD_b&()`fytlY~`2yOqz<`QL9>Zr>#!(ODVbpD>F_n7u z7VERmgMvVtEZr1{;f0z|*pmP|moFjDO%Ia{h6Cps4ahX-Z!l=sQ^>n0N0!JK;nZKE zIdg6nU3|)nmN&%Uoev}F+YA|cu3VKGyVql%V=VJ&cq${f=D;`(7&3-VS@1@q7;ZtL)qcNspb`HubAhGnmPu{o-t(cFCx z`q|FG<`vOQi=P$Rj-8GTDOoJP&lnBu*5e85O{{|Gv-J?E!DQ*}W5_?Z1{K|Zu<7yz za7T219_MXoa@}rxpiR+r{tb4@R!>&y3YV&i_I<`|yf*Y5Yt+JHxqdcu^R`61KYu8F zWqlZrJ=MZCM=ff3>1->i<%Z&QGjr6>aKb$+@=(X$44X4N@VW7B9OFNpswZjDTf()tw%{Bb zZqFt&HASDGdxiL3;tcz8*kDq(c^?rwQ-+@L4)oPBIqDH<&PMI%f_Fc%=(0pg*F}cW z-Tea?ER%(!E1uD=a~(82JcSPWB}PLYd_f7SiV-pPnBFoBjg)j*E2T5AYV~w^mRClwQ{wyZ zhp{gcfUBI^=gftR02q2iF^xbbNc?vPVN zQNWWY*E6u@&>hx0f0JNU)=7?9K5en-&VjeR5=Cy(Ka2KLH;$OeMV=GKrdh zEK#gMGJSESs4gi_()WjwUj_Z-<%ce^dgFOc<)ca_RiQu-wYHY6lBkDMv(z~;lTDZ_ zmFDp6@l#0K$R&$U)-apCPJzYFr=b7yd3Z7L6yz0zF?k3S{O*SrWT#E785P ziLe%HVcAb9(%Yd+G&+;WVsIdh5u)b^J2`S#*N>E|3@1Ii8{j5y19+iGxYFCf|I84< zKHJHtQY6Ld=&+D=&x}O$*b$XAlgPYkEmmS$A=Gzhkf@W_;hL-myfE}&J6G*wepNq$ z1q~XcBx)j2x70$mG?iI-_dBdP=16RZZYCyOju`1z#*}VSBB#tvNwc9bF$$|`%nSG9 z==aD#okSILHuD2JwR4X^Y}5P+AIq*mhWH$T`r=QGHdPy#g@wNuMzr4Q z?$i~W%DgnDW#bqqejW!p^JR$l*Y)-TB^*X>sUtwR7->{Y1gcpFm-uF6(CUq(VJw#< z9!e)ecFh6vlo3S#v^|kGk0jaFuZWL}EL0>9C98D2i1>N|$uU*pPM=IbE@TLa8dpfP zrFuy2urnmo+>UjnS0F{IoLHRAA>VIvNp0{)PR&dLU-+$%E|EHYJ)D{LITQ619I#||x~Q^J#ksv#31;m#hk{?TS$FMf!7Df`uuLjvGF|U8k4F!| zr?bbiQ%n}KbEIx__WIc{b7BrK_uU4w=hO_@gGFM{zF!=&ypy0%>8$;6s|SK>gUo@q z(-9;i6~Ulz8;zVdl)mp?fYx}Qd0@4I*i;%&xj@lc8U6i~>(R%CBupjUOD-cZbEn%> z6RCSo0vVkqMYxhHQDC@|maf`?-mXK5`TF}9=kORGOPkVt?XEaNw2m>Le>QvBS?^A z&quw&vEo%IkWr@>M#(kF`yQp)XRYWrg;CT>+nXxXoTpABGtg|_MeH55i>j~rNe?&H z)73{*FlJpM{i1uHF78rm+Pm-;J#kQnhF_gTi}t3{!e`D@;=(VsYrH;v{3MYUE=!>f zH*#q32xEHfiZLxVZlhK|F3|a@hiLCq1*-UMF+1w~6TDF5M9TyZa7f%X3{HK(F25~C zg{1{JE6I&DjJt_Pdp4nOQwdIy+lM}-b7_;L7tM3n%FZ777I!zOQ>V1kSiCkGVxHZE zDGxodalvxruJy)Gwx=0=?k2om9!Y!aM0JNVo9XA4ne?+q91~M-LPONe>8k!#8rNJw z%@T9*g{bz{{XSVF-#Qu*ElyQ=lH|yw2{bH!Ev+dHrSV<+n7DE=l)gKbo|jIcy{4jd zAjc~iUt2RYi#0~wO@>tQX#>?cqS2(({)1jCAA!}2C(@_>o9Ld-do=mFd{bem3xx$~ zw6DXD4tzd9`^vA-{k%1_;PEJ0)v%OKIj`1K>w1Ka-<3?Alr`zuw^cNCj%w5La2|DE zlgr7IxxpGQGsmUpM&dA~`{)3&xIrTjKg=42nRZ3^qjWs2dmD;MZ6-MIsGK8hra?#e z&7jS3!?Eq$T{d)T5=K7QL|YB-Vpnx2PMNbFO*Y-f*{irTvrQUII?X6me?)b=HJY*} z4sLSjIz>l?6w=2fVofJ|^qT_vhc&ItzC=BPcT+LDGTN(jl4jM&G<^tJML#%a((_ya zO-~rsFYspiNOEHmz+wJwxST=|I>X0N1k^~S>uB2V|o$19O=2yrqs`QD;hel!Npwy zygc$378rkLE21aj-tD&Z(cwoZHsk5>AeEn-D;bKR-QHMW zvIRHZv!!t^I%p%c8iQSpv2boCY9w3H-2LuY)ISEv$pftVSuL8eRGs?us$)5nu#s;( zaqx6Qx~Fap1~(jn&c{y~iLXnc{3$~mF7-f}#RZ5g_2FFNi<9ar6Ug3e4)#*9j~Ow; zPL9_nMKWBVMi#p?vO89fgV}2?F|T-o$b7Y4@Q6CiSY=FxKD|&ds<;kU_v?~GXLT@2 z+bpQ|-7jc;H3;r~umO$38w6K}YoK%AW#*mKc$nh0gK57#2ZpI`WRrg!VHy{`Pqp%HULSFk=U>BW-i$Bka)~GcBFXkg`pZ5_$ zM;ej$N8Cvpeg@?Onz&cO8P+|WOQr>FA@=ts5Fdl*jgDGVK`c-ok{&9;semT%7sSAt z>CuqHZD)M)OQHU02fUD&06%pvFl#ubprN`SuB=J{@$oO2ewnk3$%zQ~dUPo$N6Nr~ z<+1QnSO^778fXXUFk7wcAZ9`@)3Av$cV?+Edvt%my)ExSkh%!`=GQZ~uVg{&VNJ5~ z{AUO|tO^(9rQw8~GI3ZrhWJmehoGia5dAm}zE$gyu2~aEsgfj_9`PDhW|Tt8Re556 z%!(X8JcgX}Qo+|dmGMc-3eJ;bsu0>JDbVTF5=9PZDkS%&uOUWd}Tjyp`Lv;xr+mz zi$&i#5oZQkr!Nb35r~ZwvQK1lnC~;( zL7P}Y-?TT78&M$078?Qe1-{UVt)d$BT4I)&0A=<|Vet0?Xw3E@js4rno;hMf=jv(r zHd&iIuGmc0-Yy{Ow-ktFstuX6z>I+FR??b$h-hSMgG5j*vt(f$99-T7_gCD2xh+)? z^KCj@mbna#rw5ZSdM99OeFOyRTnBSG1;RU~MPjQ)k{gA=kZKSIZv~s7EAAGAOB$0v z%NFQ#s|1@>>lhQQ0;u^RO?)&m1iH1U>@&re4H>NlOw}U|wov*+<8+Pf9K9nim|>)m zdEKkQR6H5aF&sFPASz#+x&S|l|=hKRijBaV%^kT~zogp3#p*{$E9=(G_iSB1gPrE|ejG|npax53IL zmY$q>kBxk)OlhSu-MV5Ujy?(sp+Lv%j-c3C8Qv%bBcEj*5 zvcz=w0_rwx5Z#|&1IZH|$&`fkWYylIcr#fVwmB;&;v;`@FAsXuoRcD(+@26r8WfGu8R@2Mj=-Fz2g z#f>G2Z`#RRrz_-PtRYS)|6*J!O6|gqWl7Nsy2vfz+uq+y%qv~IS}QXHL&)&sAeHKo=AN81b0O7 z$#8oxaT0FY8O65?kXWnxq3JDcLLXsp2A&HW`etV4HtNq`5zgT09 zv0lxw)_R`jzOVbbJ~#LVzQR%Ms$|d5Wa8B_nf_j`jeQnRAp2!zlV63=L}hRsJ{LHH zblr9*t;)GXHR1?)khYw>PdG?6Z`2^a%2dg+C+kVrS&H`+^kY|vJNVlBXSgx>E?(ss z$?jKehQX}?v}(d>Y!o_=d^$9THJA6{?11gO^xE$@Y}+Z~oF~I>HO<3s?AH^M`4Qxo zD2gl;_vaT?ZRm^-%ZdAc%frAAP3a|@{Ly33z@xEF>ko6Ofq)ahC%)Z+|0>ihL}rOQ}U>mmcBA%97%_*=5{3wVbs(nK9p-Qt*B;40&tIkc>6w z0mg}a2n+PcqpUQdSM`V__dNu^=>s^~B!aA2RZDX2){~TaO}6UcL=vKMi!^NiOx7g# z68F<5@vM(gWTbHqNpEi?eKdg__gams){G&bt4;P~gp=|A#gQ}X6tRAkJSi)ZAlb== zq;J7~BDDEI6; z6t^5bK@}!>gWAL*3%Rvd*NArhoN_s zF*R;*;iQ&L!zEK6qiXk+VD;!4l{ooIc=@;^?w`F$JWGF1gWulZ&n_K}ZRcd*BY#z~ z_Z129!Qdh8)2PP_7TDq4D;#m9y0`}PslzWVGVx|{f1zMmFZ>_?}ZTrlc4hLi)5NWHNG z|JkKX#;gT5@fXC9#}#6?^;SVtb!H+*6k))_v#Uytrf<9(T%m&Ie6u5Of91_JqKQM@EqM ziimSE@mzVu82aC9DJra81YO~QME`gqxvzMdcq@357IhWk8~2L@6gH5Ra+ip#pBMR_ z4S3HOfAS$~Ea@Cf!>&3FXr%3P8hYvkvUIV<^G_ebUt;{Q`HCjgR-1}t@BG7K&X(im zIiK-RVlHm!uEVb7sn{Xm1fGz$6f4b*M^{4nxZ@)cn$&z8y}GQ4oMRQ~;g63|m(qN^ z?q@VIR!Tw_5)9G%*h*}ZH;8vHx`Ibmox~r%j|G{nZe(&7B1Vns%YlXP_dp0i04IrsL$4Q#Y5z=Kkg@hxgv4BbqbsGCE(n@TG;J~G5VG-Ye6**tW3p8Qxxc?^mM%7PYbr)U5y*wJL9zYQq*}i z6&p`JhCjBf!mqS7Q1Qenbk)NS?<`~Tj z4Pr4Pg6LU9le23kkz=2H@%7H5*qr`+;K5i@n+>Zup_Hq|9VDiet5KN<30^98KaIgZ>^oKFt;JCmR} zV~LdPGrT4BGyZ<|6OI;Vjb23>@^ z=?hh?X0sSK)vv&v^4s{mi93-nD+DjR7lk)}kHy)id-*d<6><8Ki+GlFKDHgX1J9LQ zPw#|{#Le?}Vx8ljc*HGFOj9m!3obOHYs(Mfz^6v|rOGGdt<=b=?H8i*o1G}&=w76A zS4UX1XCsc6^TuK2zWBFXKSFiw@Z97-XxgYx@5vv7+7If?-On8=e#cW+bRTb6d>fk5 zALA)`|6z%t!t!^jEoh&!7rNati0|xtfHVyyq2gT-h}p_iCbT>ss-K9DQ{4LlP>w z>I$6QbubDl1Y*9&s+<{dN1Ik} z0*_S-sNu_-C@tU{GWPw;4?nU3dP^^Im(zODAitZsd0s#{U#tQ7O@;O-DVT909@Mqw zVRIQL5K70xorz`OBKRuoRxLqc;(B1Q%uil8%ZaVBn9Bkld$C<_<=BsdJYu<{SxAr^ zTk;|iG&SOA^I3IJd3p=0POGL}8fWmLh~qF?qygWXx3ekrzu|6*GOKOQhp1ITc!~Z3 zx~a^vx6R~he9mJBi%+=g)D7e#RgOL7#0-J#3pC}$4j9aghj$O+Xv?}|w9R-Gcx$@R zqh5=^z9|m^=Fg_Bj<*GWvv%QuyW*_kuy@pFu^IMAI)yDCC*V82Qp_!NG}B5>q)j3n zI=X2C+H!9cWG|nO&Q2&nn*07yiwG-t`fMR>e3ea`a+7G)@Zn&dR7B^-33;i#YiYlf z4A(fAh6bufK;-U1x=|1UZH`XpRrCQqr(_BE7wLnG`))Xtxs*2Qr3l(jn}gaERWdk# z1-W8!i#A$+9cs;rF03a>*OH&-Z^N-I~if& zo=tSPHvAfxa1E@t9+ki9A{D$4JwTYVtR@mn>N-Wz(y6mb|rU zCFtA@vPVCFthCZ3-pZF))TMPS;Xo<4jF}B{pFN|-3YDx_`WYMk>j_)HZ(&yFjiF>l z9X3sWf=~Xd!iT-g_?pNnDojtq!6zABliP^3hx%!R=MYtR??_vn&gBcuZX3o7V%M^3`@@-0Q4<`V{g8JasmkhiZ)Az)uI%*7RJd9@lUz@Z z!@U|SNwwr%l9n4o6jn#ltYtSrUV0AheLa)VG4bH?b3Qj}SpuxOD9MT@xUkA4K1_e- zEb`{@FD$X;7PWl1kWHPK#&+BZC;cjVcw?C@&`$>Jc|bNBaV8QAr#Pd1+my+urDmk= zL^KvpEpXJ<-FV%4@jM~+A~v5Y2l9d(ymZwQ-0JxahhpV0VjyuLqEIY&w9~#b;UCMx_eUD+E#x}UPO#rt(?O;>9udsg)^Vp(VcUCdf z4wp8@;n3SYU{IvZQu1dp*Zp#Ab;oEz*6hXMQCq2JsXQwaTCu6Nlc1|$8Y}r~!V;$$ zvZ5}I`QDepR+Z9JX-OPX54wX7<*dMmj@P4K`=9c=tS9k5c35MF>*kPdlZ+QcpT=pg zrSY1N&iuFM1rWM691r!T;_^r8xXSP}RXF_z3eCpjuQIFg+oepW0P;IbF;Q zNHQhP(|L^3ogi+i1*mkykjmD}f|GhPAUgs_jzL5eTB4) z+E29EY;2}0BIDC4$r8oQYCOzl-5VPAzr|9d{scb`vIj6H#U%5}&wzKCedy+o!5 zy+`{7Rgon>o1`7DC%*T$lg_>0`J&6GAkR^TC}{y48d0450iYSIV9_}ANeNoC;J-SqRhDq$zb>gG{@}|nrb%+{)-b|d+{km zLMn`uY>&X%70-o56C2=J{14m`XitLkwMog912|{zL98QCCSCEP$mX)!;wj};)FV!# zZXa_ScPwziC4TepY4d(6G+zf_=SYx3e@(J0N}E46)d8Lz+(n}#TX5stlW0L&4Xrsf zk!#8sqQ%A8MEdw~68P4P2tM^-g>y2rxif{JvNfdjMUi#==Ek>o?UEwNu` ziS_Y4^zTFn=y7ReZ`?Pm8}){7CtvwdyOx9BazpY;&j+{bM9{8J^PzEBCCqsji1mGw zNyEnbB<*T5F)Uk6h(Rkp7E(`UKI$N4!;?r&^Jm<6W#Og<y%-p;S_hgKPrvWDP-`^d?FGt?y#~Kxz54#%4@G}M^IN~8$zRK7p zBK|j7aV>}h#qxNQ;SaL#sE*BG%X4xls2V3+YoV)x+lV%*B+txUNpAFZy08jB#`_*= zvl9{b@6(9F$OjVo)Rx1<{Wk71ygzc&5>wa>GB5$bfnUiE_Eyy2@+vrnn5u5tF8vc|`VORa8v$YGCv3XW;xNgND z4%vMO>#m#$mr_QutZA0y$Z99@J8Lqr;YSg>5JMcct6I!oI8G8S>XQ?5M-qiC*?3%7 zFsV|KCtY1rKzW=#4LDj$M@T%z%}I^uR(u}Sp1BYAwU?qoA9*smwiri!<*^sSi03bQ+i0y~1S!UHGN*b84;K zid(B&1*u-;_|~XH)W7Z%)w@%Mk3Sg1dfhV!mu^O;UZeO^VkzF#bX@H5_>RF%hGe~I z#ELB~_@6(+o_GwNvr8Qd($3>1&3qdCdlJ}vKL|39*T6#QVw$f15$P&AqLRKFoX(C< zpr1V-Zm&wG)gw}9%$JeWMm?OS_+_H9!dmpxOcFheYolwDuAsSbrFg^?8?10p5`EEn zN5{2K!fjn%c($(#o`hoPp+!FQ`RpSoQ?U&_>ivsCcPoI$nQ)|iOd18RKFerIfqTK_ewcIPQN8y`@jkP3A>Cu&|3UE>-GPA z-YaL_%w7J6k@x?@$us>wpLe>Wz48C+dH?s^|NA~vb%l@8?&epiHt;KxTsX~he}#Uh zpFzw~L)I1^51GT>(=;O=a23VCm?y>pyR%Ed{#FCL{A$Ghy!#15_Pbd#`OT!aTZz8a z7>XqAGMRkrZT6=pm2m~>%&M2LGllA`C05*zNQvji%dWG&Iro^sja#e{7P6~d+t?DP zQWj(Il!g6GV{<<{v64}i%%KA@?f$WFeya+z*k;7KZykZ~D0Q&w(O`ny7ofH%19Wl= zxs7Sdm{He!X8t6YIi0m*Uj65(j*maDX)ev0=gp&|mAYYNunY^^a+rQwxt^am=O8Py*SR#dK498?}8A&Y$=cNnPUH*hRGzR#I7PouM*`&FM{I z4v!|W*GglkL*p`*WTnqqm79e#&wqs6mn$JixXIe-)+^9>iJ*O9EW~xG(|6so+1b8B zaQA&5xU24kj#@j`bbd5jex#0fJa5S6T$s&T-NYHm%Dv3>U>&t9b&cJ?>~Hk5Q64g)@0OR?n4Jq*@2j75>)O$5|7ac7`ENRX z99IrE=E<@K6Is?1{y|uF!I+i6DE8Ms55|o?f=(`;#wLYHGl__C!tX|&{JxlN^p$)a z^(Zfdp{Qiv-2p_o?mJWO{x;2;RD}VEy(9{o4)n zrRO^FGiNUA4U}YyWYt*GI&qI_V!G8rnRr<^9c4~mt?c99Mw+s5 zdtzzX4^2vgocOuJXEE6J1WHPV@pd7$G~DSd6aCxGzMo5hmSSJ(BD%mTy2`;T^B?F> zu%{E^BUs<%Og8(0Hv4M=ARV30?r*bWhI7TiMRFF66|c zt7m4Qr3E$I+JBNrDK4GE{UyS|m%nKeGKblt$MKD3TgoR5s{&LzjJ2mrLs2@^5NI@##>j~%Yl{cVwTOs z|3jI2x?yM^lXh)q-gAanyLu5TKX#Tq*-^{leBQ8{^%YEdSq^hJxsM&$Qpy~CBt;ve zL~Mj)2Gch?$wC)rvIl_=%J1Ec5==mW%z(^?hQfDo@%han!*MGpR>dJ-!iX1jZ`!A9v!#Kh9&fg zHK`so_C~^$`S^H3i{oQ1^q4vuon^p6mL$WGpu4C>$qTI?GZ9bvdy;qca6yMB{idJ1 zW8vVgtDy5Pm#gqhrhALTI#81Une5M?YlbG#geR6XI7%Bvn~R^366@e%ExQ(*)?9rOE0pdNTQEZ&9<6 zH2M2=5}SA+hk1{c6&=>EXFv41*b*ykQRtgG7Ueqmek20QP`XufqStAyJr50Ze70-+fSx6L^_s*`6<%X zS1ag&{By9$Qv>PV`Yl{jB*B#K^+Wi9quiE%Qp}0J4iY^N%17IGQ{VM@F#V!5>=9r4 zD(oo8Se>GMwkmLBb0$R^C*kppeBlAVTePeR}=A#4SR6^&^Wl!Yl$y-Yhydvg~+I8I~^DujbAAn;c79HbH{K6l&oY4 zb8bI@or?Kv%!XU6KYTJ1rk1hP#~<06wO80F;WIFQc8Ikc-NmjyJpB@((Uqhcl7^)!iQSR6ruWm-&1rh)#aw-ZZjeF&Sr z%7SUF0Dew62{)YT>7RNjs=d$#hjm{A)43a%t@19&x_f~>jkbV+h7{KSfrrKe>QIsz z0}emcAZ$Veh`LvT-Q<2@bK-j3-K5R#jZkFL^4;|7<@jrIy8_v3Egj~zL;x@K#!!#R zF6=*_G8z+{O^t>t(75Pe)_s9M(z8mT@4~8b%kEGXw%LKrFn9;|4i`hP-*Ps(MTrT& zuVJ#=qJX=6j=juNVn5wSvZu3-(t9;BOksF9%W*#nem4$LmB=C3d$a;PcJ-o~-d}Vu zc@zsD3Kw>6bLG7#2Q4YTxrMq$bkpaFG0v`s2g*S@1Nxd!SBA%bssFjB{~?E&*#8tpppJtZFZBQ;aqyzJBdf^teWcG9KV=iyaed@V= zCccp^NuJLC!x<)Tp>}V)Aviw??-xyHdAgBoby)>lCsWD#&fa1D#VzdZy^rGD!UNW9 z7QpTwFJZ}kjHwJPXT6KwL&UCG%%#vAq%ZA&Tc6KCx}`N!Ikk}WsmZ~<#{;16_7q&I z#= z3K!!eap`DCF_NlnUjkMi=HLNA75--6fp7Ws;nb{hJp0UP8t`k7%Q#tr%`X<>6H!Z% zo=zpL>@0!9P9ON#DY3NKemIv{+QZLu0p53625vQ8hQ?ZOc&7YmTwAJb^TT2y-c(h9 z2N!-J-IKb>n(%jc_OLusacdsg*W!mehDowllg6_F?^v2QYB{UdeGcWfjhLulA_&}z zS>yG+pycAgrl`8Xt9zwPU$c>VCIxaH_D>+Gt&Uy&Hxg@yWn;fyb=oKXZIl*g@dKU5 z;pD$Nu*kg-p4z)(+Y>b~nuM^NO~&l{s-2juSj>XA#RY|f`){r88-r-dG1m|R9w9-k#Cks;Wz$qt!S)zP@0+rVZ1BHqO8DEVOj zf{2cH63zO*B=PDfJZ_mSxnnz*=w*&1<}&J3V!~l~DNtsIJk{V=S`8ky#2-J=Sb-HL z|H1CHS=c3gA?N0=Pn`crGQXJ(Fy(s@mi;{yka7t`x?7Mc3wLsKV=e5SoeXo9FC>%m z`=QtG1=_X!w7}e89=5y2%Cn}&pr_*ba#L*>&K_UEow)s8SRo0dTPX_*>(x--tz2Y3 z#~q&--HWSzBJry=qu5#xH#BQ7i~bYIG2J00`XK)h`fAw*bzk1YBH?|`GPDHwY>Q!m z-VbTDOf7HLdJ-)P&0?jyCb1MrCysO!33m7X!4cS{`z-1t-mLKd5*TPT!piS&ZFt>Ww=L6ogKJ8 zm&Ff`N3H>j!2FvIJ2XC7to@IqL3|C@dbJRC)_1_h%nWFoA4$ynxLTTGz0Jpn`gpd%oV%~>Lui1 zXd+Pz`$o(*wUgw7vbc9^2!SznWcll8XzG!Kpqcsn_4kDCa;xM57fI9NcwboW{tiqd zC!_sKe^T$2@igmE2d$R7g=;QMgc_;6VC_8#j5TLrKSxRO(b9?fKmNkotBN?hdJ4@E zjixRBa^&XoxA;?NBQLx267?5KK~nS)+PKY0X#KF5IPE_}h6j%)9#bE4y}<{t-Uf4$ zpYevIm=uxXv6G0-O9LWIdynH{wa5&KIYcErlcXFkA|X$#NtKix$@M--LSOI3{=O@r zGU2qq*iQ><`bOc;Vtun_do{NuV;-1qF@ha0TVxG5AGWB z6~kUq>vVV8Y21a>6wA;Ui4?qPd?tE!a z^}hBI>ES;7TJ0;2@O%a)_l848x(06F;E8mMjc81kD%P1j1@{~7KqWi3VR?<6IOb~t zQKX-7PoNz+^L`#_Uw(@m{&-XDk-SZernqsf3C;CcA6v$Tf#%P@B<}jPoYl!4NS{U z0eS0qes!%X6wHz!*9(k^te7`{XW3hP{*MBw+;YgO-`xRsX&4fz+->Nura2yNZ;ErD z$&x|kC~UBHDc(2t(OYYLby~S`>J{$y@jQWu- zx@T?#G|S8dXNMEC`Og$^eSR7`jrDNXN=yFExe;Lg^bXDUvI4J9uDs8F5%sCOLG{Nh zC#yzm=9{hJdGcfw-#ipTa&^Q!M9)ZSUooD)Y_SP$c9~JTA8Yx`dFT1Q)zhG%_!N3Q zXB{QWTKKkho(~VcD0m>3Kzpqwa2MTPa3+a|7dQ&0JW>p0qK!LRQ=O(dNpi6%viID^0ja2BJ=$;NvjH4mu-fUEnaZu zS2;~wyaol&sYUzr`-E%%uEpn)U9sM_`{+sAe|W~_P|mPmII>-%C+5x_q_J0=kZX!O zAwNuDxZF2%xM_x%4H=H7+p3ZcV{?e()@I^fbRIt{IZECi=pw$3e~8420OID+Mb0OD zBb_ZaubPR1ba-u=|4}f;d{BpJGWZwCMBvO0P!uObFBTSsg zrJK$qe8Cc7>wazWBy}=rdKN>+$$3E5!?9#*z8v7D>^+f1zSsXEZdFgn0jWo*UH%$Wr#uJs6j|`LG(yi1@jhIiTm*v! zqe#{J%V?9^X+CaCHEuJ^$L%XBaQH6{{cP9|uI5Tus5B9dEvw_yZRKgf4+UBjAdgg& zBzY%)J#O7>dnO&b8dSur3H#jTU{-R1<-K3Zmh^|xD~;yt%Fk_VzTqgg^^P{H&l`l3 zcidT_;&C{<{~HSWsDr*H9)V)xAtbFNNj%o*lCaHJuvbDel^MAYpR`&Gcft!n)8ZUz zxsi@;{xpG(^Wv4qXVm34WeTIF(gBF9VtlvN_?L$A@`fpsHpK7)^&T%oq6jA z?79?Owz*37YpB>b#nlrv>&wJw{zGhbKty`CoFN0JbxFhP23%SCAHi2v6RrOuNI?2A ztfzgH9PUdc9;Ta!?T6vSz4#Qi-yqIs?LISY&Ukx{wZj&+CyB2KgFy3 zB2kRqYrO7_iP(>N5zJ3G;ogNy&{ELh%{zyco7RtjWFxV^cKSyuvTnscLZ9MO!xP9g z0|EVcUd%MJjKEhWchS;0m$7BGBW^V2px5v_4IAr@m2NLZHLubH?e!_w2CtmvCDwbE zzj=R~8U|Ni>s1txomSy^Tv-`qdp!kTe_(PQ>KNI9QW#{h&7u|VV$E7K(BT- zuXWmy8R*HeuIn3_{OMc#W2J3O?@ljU)9k>;Rr}DNV^} z2{2a7n>}&Vnb)p)LVut4;=W9H#M`q$x=WCc{t7Qb;*(irU*n{it9~FeU0FdF7-%z> zka=vyS_v`7X$=nYOW{@DC8El_YtWDp1*0E)q$VAkQQEuHxWrtNq?ldBwD|@8;E{xU zhfC0v2S-A}`aAsA>vQp)AWP!hc?~~yorzxUNGIxI1F_E0W?k+n8PK-nrK-{o(?y_oE7` zGh%RVOeWJf`GSudaNm_k(xIS)Y6t|^Xkf8TFiSG7Okb6}fFB~eS$45kz(C^$4kz~;_NT+fp4;tvqS+7t{&~9t>nm<@{X(7pLF(;O9bE(W57n;x2b48ZBIPX1be4Dlr>Q1=E zeOVqM@I0AGFWxKT1%>k{{c8)So~P6Did@jycmNJ;FaR~FA9U!IIc%Waur?|g`l{UN zsrRa|_isKzr^U|MMOrB1p8_7T5NpVO>LR41CVDf|lR2wPik`fc6;0IFV);8CvZ&eD z*#6FmG{2^bX0MiGaUVj^@Z?mEpnKL&CP=f1>2c68YXv*5I+;c0yo0W)d{B$_VY(4x z0llU?`*9qeNUZ?l8FTmr-CJOfnk7v2&)|EDWBDyAIsA?*8r-yZWt{2UGO_orjlVgj z1R2zngFtc&abMZX{q@=gtphpui@|H$jDzXL&e6Q{;}F$F?8&n zi}adNGS4PS({9;mbVE)SH99}Yuei+7HvMFJcbPBf--w}kD>bN^_i@SxNPyGJZu)G0 z0R$e4XE)EbGo7|~w9LF8u4&mbs6N4DwIyM7bssFMwqdOu^V#{ynr!iCcedSjHS06b zf$4+tC_v)@EK(5ny&C$^o%0P;6yjLiiDfKQoMBS_wUgz(oWb6C&BHo3cA=z-Fjg+^ z1NN=zq4|$}`OA+)_?*o*9MmP=xw2>>(SF*+(+4j2nD1EZqGW=VOR~`A^^wTFOA`gE zSD<&bV&8|9E|Duak4G)oidVLm;qR%A?Bt0AHhsl0cBM3#H3d?Z5@N@s9}UqRgAT0b zu^9@9%7w+s>ChWxilwqDVV1X;t==({R^;i>#z)tww~G{H4$Y;e!9_IKP6LXzw7_VY z2Uzek6>>&QV2O{UY32cKba<8md#qN<9`^)@e6Km!G*cvD2dc1 z+P1(Fg{H2=1Se2gUsas>&kBV+zJy$51Ly~7eH^#fNwEFg8VFs#6@@WP;f&}RLML9H zbD!qU%UzC!JHdlAp;H@O(6NH}RtI`OQVXOShr@BB)}!!5Zw%?`St zMh%US%LFf>A(PNE#V*6=!Q~C6kTjso_B}{r4gdZyL5QlTu{Mr9t=AOoFr6im&~*~! zcYI_MI8)IFX=jmF+da1Bww|c$tG&q9>n>COwUD_6J>ZpGDRp>y8Q7&;R3*o;yt((N z*aT_?eW6~EW$(bZ35SH1&lXeDxK`@%#!)y%YmdM!-iKb?X+%G5H-Mzr*Jz(k8g>>j zEHr-!kaie0YTXvxzTJbmhu)!C+m_&I?h;si!W8QIQyu-;zYbH0VM0r12_)nl_%RGZH_*4;!aO)Xip3;NHnH@OEhWc21nZbNClnK zIEp@gTY_eq9HFT#h~Dw&<&JIGjyzIS`6(ZAx!%L)P*i#amB+*RgRADE5AI2HwT~*d zkBBuvx6hn>MUxX~TiGGHDc*vr-Jip)i+ajc;(yk>XwX_hDbhN4Z@0C@e_sX81$tE4 z)*RK%+#|e77>Y}O#rYOw(fr#Fga?f-Bc&f!Xjb!eKI@q(mC(wun)Q7M{Y;)A6sLQ| z8I4F(+Tq6)^o*k+TFPkoyLIS|-$8z+r5>_;lgCR09HP6_CkWe@k-S*_XX9wuf_xPbk1Ti5AO@g z`*olH+5Zq3-~Wj2%xOmQLHp5*l=F1-x(~eZfz{ks#kXA8s_*>uPcONYTSv>i1mkF& zlreqEALerUhRVJB!|4r$NA&hQA9PY8e&&%#E3|K_B3^Pu7T=R7L6wT)YA@(Ea>;B$ z&kx)}5`Sb-o?bb+R3(e|UnxVO0)4EQxCD>YHpC|jQqTkQGQ8EV94EaF!LR4bqwyBX zWS>|IzI3{ZOUp_X?*ocNrn0GM2b?Y)Snr|v}ohwpQa_f2WIm`xqIP7WJ; zWFqx{5;%TykWkKmS^gYlg6aJUIJ(wE&}jAg>ZCgg_{OqtD8xIzEUi@k>WZS}xaRjc zH1*nGd6R*HrOl-M_=8I(c6;H3Z#??NA9@poy_^)h9K`>a(8ssm(oHt*MR+3?-3#h633&YaMyj zF5anPp1^^yjPs7QNBv73k*4fMF7aRzi5Su(Do#<@(B~fN>EwlRzozp;3NM8TW|z52 zw=LF6pFeT>PMs*GFA*K-=J;>PZNjOpXRYkq^@UzfZ_=fk3g~^)Otk3JVRX`_5A_{P z$HP7);IdPJxOzt|8g}R!4hwF?4-?{XyW9x;>hcX7_vaoyIoOC2mmI)Zi8t{R?fba= zjy6tsSYUlwR4rHxv(di$>(TN6KdX<0--Rm@T##hyM^4^b*ZO+v3t_hXAafr5cH4`g2Eg%$;PbFaAfXin%ByeIh}ma&+Le1GPlQDQyM)b0gec}}qX6B=mBLF(!f~u%B;K&Qk;_i0<>slJ<(%q#(Kol{-0#Ji+)(l<@pUQY z$3@Gc124{T8QW)|_O3~I9USHo4c&!0gVnrf-WhJBQ91-noWKIUxw5HZ|8&5jjZ}7z zB5cWZhBxzonkRbB zTZ@_pJ}|xXnW7zjwxYGKelth25hAs{<3zf<^hN0-YuSg0NurdAUZRKhez8en7P-dl zW;PXRiMZK1q6FC&EE;KxOv_Y6EqBI=EIbpLjGU}U_L;FLd!mHM`-~j3oBEU;%TZ^g zbzk|7*A4jfwyDg_M}_r!U1sXF|JaQ=+nL_QD!5!9!M;~eAh*?-i=!p0Hu**;$vB~c z&+C}o?Q2lt?7|s6tL4tGPqwaH?o79)tb&>mGnlaO4gKop2oKZ*V4fPo+OGcQZ?ChW zW`PE*U7;7Aty%)In?^FfRo`G*LlN{^eFBdg&*9RXP`Iczp2c~{Fv}7%7FT%}bW;y7 z*@bHW$9S`-N+Yn{TFz7#q)?3^b@Bd0M%^{Utj_~m*zMv@RyuCD$R=a8$hS3vHOPEm z7BekGPmMi9&sD8N{+ioGn_pOn^4%;%1^=dq92+K!>O>|Y$5nqAS9Ab&ULlOQ>|=!w z@6cf%=d-rv9`K!R&9vjvX|_oGxtPb%JN6!A-;trNwToEs+%CvHFF;PuR|-z*cY-6` z&TNmgG4LNQQqnVK35|Vh$9j2@#LGLZ^`nSw`mcsP;;LE5{Rb@m;~3E!ahD{4{G(r9)e zZZVzo98)>rTK+^>C9`{L$(n;J_#Y7?1uYOsfR*b-Pi?pXscs z9U#u*E2#XJ49&q>O!@gR@cxv`UmoobUye)%MQ;oKYJ4Y@hunn7U@ho(cn>IL0sF0b zoE|%14vFU+**nij=$ak|GgK|8usevcw*`EU*+go$y<4zRT76qF%_Mi*Tj@MW7rb429WYp0Fx9a zw)fNsR_IkVzV%0gYIwjW2Pe#y>M~>1zV`uX#ZO+jo-C>|Cuw^L&18ib_6H|;5 z?{)a;$}Ga-*shSB?3ueglYdXxv^fUM*;LG@O`>ei(p5~e^fEZ2Jv7+%4Ub7_=y1``bRi?3`nQeSp1hUPZu);kJx{MbwZU3X}Mr9m(x87RpmqaWT>iz*uIP=!wnK}^Smk~KQk4N zP01z(Q}+_-6&<+YYc7s@7=hhG#q(}C0U3WKUaU71)4?e7DP{7%r?e;9&KO#tELC1QS0B-GseP27S?ZgQo|q6Awnxo+Wo_)SgV}5wp^c%_^h2tfc@c%!4SaE$D;P5S=l17RW+77g%c{ zjA?a%fZdCzm1n&${#1(f`@|vs+*v~usw;)gNQOd3V=12%6NmnueM0lzorMX19r<9< zU3hO$3X)6{{IiWAKt_U14{QaGaDR{$o}sm`uTigfvBzwk2J~1&&=n1i)Uo*-ZFD>X zx9&R7h8@dzdS@y1Qa@ zY^}aPgJXoWa()Z_@`b?A^`lVdD}Y*~ov_#97FD$wMdJ&9@Y>5wAW)S9tp)z{XZu@P z@~)0bc#g%xUOJ$`>j`vdoH{+8nU5wAEjadYnRsueJTms~MJ@-E0jDm2Du*A)YE%zD zPnE&m6M+!$T)Z<$aSHYoYS31*ySzjCpHi198932kO7{d^;gkF=OTBI$AinE#IP3pT z@)19e3J)(lLW({OQ1d0q^#7vhOyg?$+A!Q)2u;!?NhMT@)Y)r2J4q!ZMG}%ZBqUQZ zg-VemO-Pa^M1$1XYpn($B!q-yh$Q_<2uXPN`?W8pbI$&qz4r6m_jO&)KS$?_@94_52ABoac@8LK5rqUm0_tAUb6X*mXf#~<7(5%HrsF_D938?xb+{#@+^+MdK z%d6Yu-EB z-Gl6@0`nqc?&Ts_H3H?`Ea$@HPZ7QU5Ds#;5`tIzV?E>HIPKXpRAt#rTn&G6#q}|Q z-PpwfwCf_{2Yb<&cPEh3cunFTjcIq)N2;gznWl}(q`kM7&>bx!>7|jHlwTswQua@$ z(|vrY_X|v$SQ34C)&|ubPb0xYpNNsxecZVnGQyj0_mSLe9(P&HfbGX7qnLyiWbvpO zsoCz7-iqD@!|yDGmEYB&;-?Be^LPW*6l6i(Yyud$1BA~E=i2|QEO~lsuJqmKW?vaVwM+A1@ZU{XM<$)t74!;UFR#K0L=Vq6Vkz`J*}y#?vETlZun?WE zTZfm$%%lB|RH4tkukgW9rKo#m8x0@u1g5H-yI^0k3J=l3v0=yDO)L0%qQdb|3atBKjRl`ozR*K zBRC7s!`LJ5Beqa~MGA#pblE|f->Er-w@BJR_D|LWlhhWFCZvMVtwT`#yAAXw9DsyB z!{Nu1XRx7Imfzv14q{aXr~T@~m0NPLN`8V=%kK>Ncxfv1ds~Tmn&Q#7xM!HQ7{M&p zY@|Qw1a6#s8+|#|j=PJC@yO}%_(7byFkg=E5j+!~RY33v(Cmn;OhYukApA;7aCE^8k z&1CuA7&PqNPLPa#i}%f6Bq+2v!TPEi+9<2(+EnrekmqrZR@o>L{&X4sVb0Smo^K9n_}yKTy_ z5%>$G-IvKSk)>d5{ardH!b{{znGj#SapX&e*xg!p51qa;o-Ar9AuTS7cWaikDTQ6O=seas%zZVo#Dh-(K^HEU&k2g+4UAjOpk>xrSn+jFmV=GauA<<*$SL$C#28Y3)W-1 zv5L)E2o$}`nFkYL+Qo8s-#-|p|BMEYKkD>L#x?l+@CrDtJ_8LV)7kzxtLU1gbLkt= z#}l3=Y9ZCqY4jf-TJlp7`PO@oYl>f`&liYu=$Fa>pVtsmkCZZXw=D9#MGwDKP^Y72 z9iTaprL_M0S=wD!L?$|0(DJ3)VvjbSrWwDX_x;1jjr>;9(49oXpFW|Hsl6mi@fo?4 zxR&0zoJ8vneU^M0CriWoyre&tbl?HL=LIV#RoFLHeDh~sz=UIWkl*G+{5SVF_6UE5 z-e?3tVv;KKNz3uo7%zx6?+?v~CPLOXUtC~4QTQafwg1@ku@P_nuV-kl5K)s2C8=s2;H`C%cv@zUTE)(z$R>P6?dbrd*Sd4sjc zGks5Q!nTddyz@XW-po{mcllNX-Lp;jx)C4Y#mOSlea;I!B2PgzUJM!hNVqj28Vq9H zAUPahT04MtnF(ZVzXRmUP1rZS61(!Pm^_$=71wCsn(8&&oGN2D_MbCkI)=l_C%NFT zNsGTNe%{k|?gNeT2z+tI5peldjemTb0=3RQ=t;RIZOASbrZ4;}yvrDjEVHvXXT4A& zqgDym8ZW`_42CByio&Ixx4_?X3va3}b{|VZ@sZI(_}-P_yi>9>w5CUbLqR_DJUb8a zy_a!&_duLiRVf@A;0XR3BS5d?7;gTuP?Di6vZ0P^!0o>Ccr_n`j(OFC`R8`YwfBOc zcjYwaf9WbbSz3?Xw}*qHf)a20*PQPkJ_5q?mcXmVI%q#f!=bxsJi0P?z@ty=o3)I z3jd`dt&pE6P2~GAArnIUb@1{BbMWpkh6`_;=jh4@*jVK1s$?Xie5?KB`A=_rbx$(L z8fD-NmtfoYFEKcGMys%<>;h(cAEW3$C2&)zgeYusMm9F}l9{V5c&U7KnVjkgRJqny zYMiDBKc)`Fja`XEJufJY$I0te+fa-K-hgf3L=gTgg$;bOq~@8bs^;> zHtaBQnQcNf9Ul`n&6l`!zC0ZH`VQ2}E5vIg7)&3l!`rOobmqfr5Wi7|zi)L2=5KR^ zJp2%{cgpj3ub+Zl7v=boFXEx0QjxctI*{LLq{4@49feDMioE`UpD;Tt56|0_!=*O) zpy44+*zwpGJRv0+PZ)R?g>W0O&%#75PdY)Uex6F*dgl;a{!JRQ$Wp4ktcIwcP@>9e z7vWphT7LAuSboQ`BmAYXFVK}_%kM0-ljVJ8OzSY{2T|~ zd_oc&&Z;317nHe=Yu|9oEaK6U(J#=HCS7W6J_Y%n+l|8Jj6=c;4_s%}fCE+L;FvRC z$ww!IlW*<92A^H=#{*k&K!g&WWH6LgAB!V~clyMR;uuPPxY8Mka`aV~xsZ8NUUIy~ z#J)?x0PVBPkXCqo!&fa*=oc}&xoAW!Rp{)d>$s6PbhX&S{W+-oYk?LR6)ynX*-l?) z?jU6wWHCJ3%5R_#>j){?7(g4= z-KM`^h`yx4L>hLeUFe)zM{PZw*|lji=#4N9x?bHJl}1YF8Qp#)QriJ+hW&$96eujsO&`n2ol95iQ{4cm87lbsuRh1%v`q$-xBm~48EQw+z`o0Im#p=Y15 zb6f|`e5V06&$htksw6zs)EeJvasZh+2T1QH3*tymNi5(B;nYPyZxhH2Ebo{f*!r$esa}SwE1B!`8C- zk}L?fI4fNKS;^B+9vzD;!fr(x z=*84zaQxOL_1t0#109?Mw=L;#X>&RGxjLNkTf}*m?`4Ui{%T+&BeA)|MBIP+OJwQl z52rh3iCjEQmSXXix*xuX_oZ$H>8Cuda|cD20}c}Nk%`o<_yAqM>p7V-z?oERSt(uQ za9jEzvRe2tB)p9O6^Pb7up&Czz2w9ZPgwe7D!(D&6Cllb*n68ip8ebb#ebWFB1?CY zim(Pe=luwHeRD0&IgO54h#%}cw@Smm6kYl9- zZVWt!&nL{nqXzuJavqn-OY<<85}^iV?+1z5Wn-~_5DgkefsnHMP}!zw!#O@y7GCR{ zflH7+HH+TPlDCAj!|BVJo8M^CW+O%IqiyJtqgQG0u}hrHY&WUlRB=}9rN!*Kk277Q z$0Dw}!xUW=taf`Cm#sOO)C>=yx4i4<*C(+A*2&Y$^DEew<;R${t26m;k`i^Q9L6g5 zhv3PN?~qT+pHn-NJ}N(a7~Ak&7JS>Sz+`osFiTm1oV?JBZm&9qJaY#Lvn#1kk<=rCtL9U2yb4g&ft-0*XmFvtbD1ouU zR2CPrm>vF;Lt=ws+3WLrn8%_(`l9tH!i)P;y%iCx#nYB}+jWox??xP4nTlVPMuKZ` zCDyrH1V&E+(a{8Jh`#(3JqdorY8vgnlI0!o0=;*u2h+1f+FivpFN-?vRV%x6%l8o{xms z8-_4tQU>JhScm1hLLp|$Z5%zl7I&Sf6MDxNW2>rRFmy`^E=YJlG|YZcrDK`wQtecR zzOH1WQ$z5_%K1!wqYaDpn#~eb^-;0zVmx8{Vcg3d}$h=M|{WS0w z?q{Py6FbjRhhr*YZ$uUkos$Bx1!Ex1;~4qA=^v_iw;rpcXOXSz^3g}lBJ}m(TUr$7 z$9_FOj{n+jC#O?v*o%gVXvqkb`Lv4K8P$GzaJfFv%Ng4(KN&dulyE<)46zc z*9{WKLg@JCn9dALu=97(W@f|3u|OR|Y<1vm*>w9H5+n7(vqw)tHYaPPd2!=FHen#n z)jvusw_lasEj~=uPK2}k_Xb3`e*~G;%!RPn547W?EaX0CL}!5!H0u6`1~s(f&d2eB zWA{TzndL27UtS}usk%pW-oB-31{O?KBMz;Qm1ok93MzYFnR+E2BboP~lWS>j#r$iL zpkV!)ybhm)RP5i1-s~CZz5fB)v)Y}m*d2j&y8UtOtBoY5LrVN^GrD=tBv!Vgl;%Ef zq&GiQq6V`LiH&z4TGB99a%XZAdm$%?Op(ttTKgY;p>05FG~bX6-77-Yk`OVQ8jj5B zj-iY(5yU;*m2UcSgeX*oE!_0Sq|{~aT6$ht%U$)kE%=h z(ALdySkH1Z`~9$>?8K~x$Y|VUd_9_D%O-}P(t*2$j<=Gk@8*$ifQ9! zemsWV6`v6HrW-G7&;%9|jw4)sA+$#a6b4Vjf9EcvX(KAJbN6JJn>_-IyrdsF@zdjU&%ew;*o-r3)1_qgc#=b5WATQG+dkshK6ipvq$xjD3b&1;Lmxi zwPi0WoTow;Jl1F78T*A=`;+wiuTMg+t2SMqQi_Yx=Hu#3uhG8)n(zoEK>qL;fNj%J ztjcPf6i@NBHaj6nbt|~$)IyHa03N-H1kWu52JBeP2g$_p>)&MaH`5gP_T%GuKXYSV zf1nOO)k2PMky#GsOrOJpkCov1XAJ-Oauh$wHI+9#G=zUW!D zDpdRnM15z{@T|Fe(6SLdI8f#<9#u$@GI@uuZ=Qj}JHDX9z3sR;-3GcR8K8%cH$eQz zB4~IVfgkmzgJbDh>^|O^AbhTwN1TSr)`uRKyo}mZ^ zdyh$U4$mY}q3W!ktS{>NJcjHWmmvBy!)d2Pm9w3A5nD$0hpMJ?kUdF`?@9AQ#Y2o? zt?m-=nNB#zUb&xkQUa7 zys=z(f2bUXPEmq)-p8qo<~6AHAICpkQ3&Hqi_sC&wXkN(Xb5`pTl((KY#dUdLO*Wt zL^;_L*ldMjx+D5N{rPtcoJ3Pub>9QBV!?L$eacBJY>;6)9Yl{;lDc$y>_BWVauoZ< zi!P<~`QWTM4y=u8gtyB_lm=V~0405YSaV#g;ff|h`}mS$>;>Q zq}W9+jZYKQ+RmUN<6>xhmjPRvyJ^dZcr@qTPx!m*99pw>1Uvjl9k=le23qUD-ohX( zdt4hPh-|Qj@p*WRRRcLxdzoxUMs&)}P|9xHAQewe(ZI!KRN=V-MYH--%b{*8zHSOv zl95EGK6=Z^ubjbxV5f|bZZ!+nz++Q&b?SDoM8cmruA57*Jx&cO`J2DU4?H;L-@tx zw(-e^BYAQ_j-RGBncrgQ!Rt+%!WUhg$j=I|0w!z77i~7>T^9jgK5RJuXq+i;bm1KA zlT#LQF8`4(xM~5T%|AoJI5}bKfE`G+us^D=ON4tyNo8wL5)RuU@|6}$fcVd=q=&aH z6T&VG#W&U!2)jk5@qz_8p!TH<-gF8uuSXl=M-77EH!R_KxhKphnE-y*_rR>c)qwpx zU=gYSr@ieE{?P~T6}b>|muH~Y|9wG21v|L#IRo0XOi<5IUx< z!@m6aFuhltQ=xe9e7_7}wkzD!YJ_<^F3?MAgpQMh;DQhNh;E4lp|ljVU2hg=J2#VDdd4 ztf;VquCT45fA*(R$DhXb+rM_x;0ev*96ErIf?Q;+bXxT69i#~_1fn8WENRyheQsW| zY}^_*R@-LFwniPLmn`(T^*ffco~DCT`ez>f_sD||7?4ZqG+QM2W*>d8cuVZlE#N|Q zbA>gU0m8joD@oV1J?MG6Gt!?EjJMC6f)5%UqxHgC*wD8Ynfy&53wr7?sF{d!rd2r0 z!4q678Q$y`g1jG=A+0O-IA8UxlFP|$!sf*7SXsFMnRaLqy=w{dplPNwdZ#VgtTPvN zh#eEp>T%R0>W(z`$rY-!;V>C=YBK%MX3q*ARI$!257~^fadhpbfy`-j1XJ}+WX7p7 zRJU#vOM7R>J{ssTCELB?=N8AV9!E^IK#Q3jJ%>Yb5Oo+VdTu+#`=+H4-Q(;-)ie!6 zZ{AKgpDT~M6XwC$mVe@No!&`_G9*U>mK8e2mj%P*3kb<=5-T_H`{;{Xs6L8Y0DoO>Hidt48`&1&K^AV?A^4_kWJWWobh++jLAFdrs8&D0 z>b4lt-uN=9XR&grQa-Rdy#~yEm;qg;bBOT2=df^pk=MiZ(A1wvSVezC*Zw12fN|)wC~ZFwGtlM`Zo}(A(EGa@KT=^is$XYVr6V z@z0K-+1jJgys6r3!-~VyVRnL`U2%pwS=q3GV@A-ech=C*`z z;3xVA_p*_T-Z zi?Flb7jzoDVX&2T%{t_R{UrgJ4VoCM=C+JpPQ(4%DMtrdFjIis-80?U-NGNy` z0kb}zgfW%o@Lbayu8)@oi-OxIT+SQkM2Fx`-F=wmx>3b_y3Ba6qi}NO9#H7k2fsOU zBs+_yNVA|@IIsGQ>#hAOq+?>c(LNe4xO)~`3{=8# zk$!Z$g81DX{ULEYe@v?QJ`G)4N~vMjB5E6~%Mz!LB}ZFt(tC0TNRTLpED4Px^1VOk zuLfPz*5S_{$f@E5<}O_6QUUL3RAn7Q2IBX;JD83w0gu3i@WR7}2F+E0^ob78?Wu-u zkKMsuK8U1Ug9|wSz;HTnaS~gqvy*)nTxethqP0y%wE58(rvCX53;Tb*-;~hq5fQ9G zGnJjI-$zaIx3Mms()6QFY>~n|GB9~QTdQ%Hw*4nzA1^17@WrWIpT0Y`7t*oM<}%b~ zR*6!&pW+ABN^oc8APC&9K&zXN(|gsKBv5t`w(u43ODi7#m4uUt3w=po`9-3%yOG?G zFG0Kz^w5lF16QNRP4= zo7V11w`nh<2Vz!ps*{~rY{PG)q1;H7uI$I_mOrMiPt3*{U$5Zc(xZ4rjrRIihdH4v6um z6Zq4e92~xGI7)15AdW#sSYnuoQ?vV{kB?+P=T|sx=MQrkSKiU{GqbTq%yvA#p9`M9 zpU^QZTQGRtpSWJm#*xR*Vwu-Jr25=cV)yw!p`Vz?X{kD4zh$-@?p~ULtj*+v%Bd41 z;R8;f+Rs!tE^AGD(mvuhyOJT@>p!?w*8(xB=V(XiY+4l~52}s#an8#hWMj@l>Zic7 zhXHC7mkxs3onz?O06Vt7Z!scC4a6<%gz#CbhujoDbYR*jb}m|< zO2<5=57P$I>aGp6$%%=1%t$Jk6G?6NV`i}Rio~+!w4gA^k-PV?m^)ajK^^bRr#IGG z(THT%GPN!H$s{NscPiA#k3ZJjl^Od9N0j-SZ#VH_La94f|CZ&_5>X|%lZh8Jdi2!mRI33#=3Zwx46r@uLtMj z^&v6*8~&?49+d8g{09f|tQ)QfcP5xZt6wZ$t(b(bhF`$5e72$D9Bcg0un6^@9fY#I zWAUH~KSaLFd!!ar%q_6=!~a}{V_V#&$K$7i<_aBKWN?%10<2@DkG1Q%h?2)|?!WL~Xvx7N95>-6 zaosouo&UNQ2XsC_uVbf^nK)l`h1|pqIeU>$AP2WPKLtz0sO)G9#Gqy0?Q1};9Aj(w!&vBWKa6S0E+5c^$J##yHeCD93=@#@j5uuM(~agg~cX~NDR*DnX# z-8sd{D9qr3(?6l;>>Q+%Rd4TTnasUZZ$!&v7(QD&0`)YcU?=|?JkG-jwKYuRuI*BQ zmp-{TZq{J@SJ=<#G>rtP*~UHf_9mo!9KJr)0^Se+=W>lGkSTMn`ooy42h6yRAYX>f?`g+Y6+<4=a`>7;wA z=(g7_Jf*4;vxQq|$DEt^#Gm~*9&4aS$3}}Dm0o;!{XwjhKxw_$mGm9-AGTQ(gxCMG zp}#DACBC;~(VuHhcoII2A4k_q9DA=wN)p4c-iZO&;ORw)$=69pQ?SO9RC91nZ48$u z>xycHVAK}(2{knt;I&pNpfko2hJ`4CX=Ny0UQ>q$Pk4zvl5gT0(<1QJGYtO}y(1z0 zH{jS)rTD}+V>q|^E4B zZ#o*eXD`_s9xoL8|KFuOB&xhKy}1Iw{99$z1btQZ@O#$>dbE1XiB^GJg z#o^uMF8J=9p>ReSp|B@QK&t!#JMVahU$78ZQC^RummMTcszTKSO7a znu%p*+>;(}jz*&=jKk-?%86daMH5X`lf;=_Huk=q%B^rXFfn9C8Kh3!C6O$s7S_A_ z3&|zfuu{dD3|O^Spo2=J$KOXl?Q9NYTPEWZYcqrmdEsE2{ul3FD*E#B%7x=S6gC~t zgy0)H(D#%BcliyCnLDUsvlf+b)6&)|u7D1Grbm0Xe) z_v~K=lGU@k@TeyaMEF&QjTR_E;j0ZK>)AvIBYv=|NR5P7D3TQ_zlC1+4xuHXLgeuD z;85*kicCi});~1um@-@Sn&Sv#CCaXDvN~EABtQ&%BRA z)e(JubZj)JDvaYDCobdz0;ljnLx%IN^IdtbLyo+~k!%S5?#jp3&)^^Ry9&G(%`Nj&lEYF@8CuK*&}B1L5VfC{#3wD5I&MA%OHcHY z>IpOHJdY*N`(z<(Uo?~cDEo!i_8$ZN)4j0akzric1yxDmlN_P>{yZ|b*+$Gt{1)`o zhYLo%C%BA+F(@)T9qoRY#|=;M~zxbTs9o3kvPW0eG2Gs7E}ecY_v zHB@C_5t(f!!Ip;Ubp7Le;w5t4YCcPFu;);!c_Ie4Oj3f8`X5Q!v6*!7q2(ZZWDcC$ z+$as$u?b~tZWA`@D&qt{0c-8{M7J`&`f3*;9auB?7)tIYk7o3TRhsYPb+{JH2xNf*OH&q)<73Hb$O#3OB`SK1L(PBqA zy%o6CSqYLeO~6q@(az&i7?>uDd-KY_)UfCj3(`wtihuSp5IYjF%|+~^&sP@o^aLAn za|O#gd6=E>D`Kyf<}n+a=S=qQpmN)<4b16kA|nzZW`#RPmn|i70TZ9@&3#1w?#P10C`Fq5H2t;l_!-NweqTlp)VRZ~IuD zKj4p_7Td!^mwx=!aYK0B@-C>oZo`jCli`~RvPE86G_Ro_#0!plK&w0e*SBne$3YLk zzcd*n&(7nrf7vkR+!D|oBf5zcwxK0n4v-p5fJB~zgT8jCu@HkKr~@s!S&Az@PR7mI zvuN#wfizRxd2f+#rPWy?J7er1GX0-~NFBug<%$&?OCLa=wVQ+6lUZcjFCMCv48(yN z)wFW)NGfNb3FKxpOwQKD!)9qrwCwnRr#Z*N%J@6d!!t);X&qieQ}Q>VlrBTNsuzBC z9ogT7Pn-5oZ;NIj^NFRT(Cstq8lcU)Z+a&7e-(wM8(GlYcnIE1KZdpH)W}hP1^)Gb zJ8)&4LVjPU{4TFc+dv^qJ(7i{D zeP__kGmL1(<>h32|LNrZky!dOU}aErwQ9-I@O2iLt^vNdGI<*IyzYvB&&@+WP4nOD{QA&T69>_e zw2I4(PsT=Jg-FFElhcW?1*Bbb;Nr9j4kqM)e^PwWT22_I&!w+L?e(ghdYQJbMwIBMDR(?7|r}b>7bJ2X- z_PbALPFcX2sC}Sm-=9)m&rZ;|hq#XhTgX6%#mHfK8_L=u_A5uVvq)Rp^4%Bam)A6# zmQQ|rn;950Gs4j*M><}YWB<)tj=>O~fIG@1=a&8FIg7uoQs z1Iy>Eu`i!e(8kQ2Et&oXE2cEog{g>4TwQIR?OIa6R-gaE&5r&-vQ192+7&z5+ie%A zz;w9Ig!}BCLo-WOuVk@1*R!tYjbxd7qHt?aJ>9vbhQ%Hf@KNY_YuEpZC1+2X|hiz_2Y`v$S;k+SFO+nAQqI1>2gEX#5J z%x-6_V7ghunBUAcR+*U1W(L2dnsS3!a7;Mq(t1P|I4+~MFDBE?`C-IGWD>|nVzzm> zA2n*dE$UIJ4!bW)Oupc4QSpWANiyjum+8&K# z?SBH;C9MKB_Tgo=R5^{h8=Pd`7XO%;yeIo@zgZC6>e-q1gn1b?(g+`8p+#igk6#+h z19rg-gs=LH} z)%_^;@n{7*t$UjJWGk~&^90sqd59798&uEk7fpdMHnlE@z4JLkLp}Sm27@v-gB!(^ z{%Em5S~X%v`VcZVS0{1BmbBPUo^GE0i<@Z>L36ts>9)LTa!}V9?OJq+9)B+OuWyQ+ z9f$8kuJkAEb4;M?_MDY2Z{eAxd>PS_T#Tz;Ef$TcAMn8c4Ei+}0 zzFieG+l!d%(mr-l{x)43x}+?>K)L)<{w;R;$4S!h_oe;p274wM`;8jx+eHgM1k$o; zS7=1^5$<(^wM1dfak@=Qo=&-!g~I2A3k~~*uwB7Jm=?$guLI0j9`k0)GF6!0BRh8Q zmn=I!Y6Mj~Jc?PqT*CTv!{~rj?c~n;E-D-Pl)jp{h$e{l0griJH2caA+S)jP4VsWh z-TS=Ah9)0UabQ0A)z(PE!XD7n^VRfsTsiH(LchVCbtLUXMD`;%hNT$%U zl1idS(bi+$RBuQun=|GM8Ug`A;8K4tiGQ1~<&h_uNln!!D?n*KPAG zSDV($CdzcOxgJx?os3P&2fy9RY96!-Q-7P&-ll^r=|xk*IyrC~8@VEi)!KQ9d(f}ao&aYSS2d7%?Y}`6#y$}G8HSO?+)*;uFO=9{ za-m(luV}sROy)cIHoMoqj@4T1WP=;BnN#{c)^I$OISGm^Uuw+0nz^wRb_dvLOMN8$ zc%Cyc$-?cgjghs+X^E0*?8qq;=b6D*&%+-CSXfa{2v=Uu@nI&GHM~%`9uoBdQkELvG$U$DVxr zLqCuHO@|-1U{ee$$i3v<;;i%?i`jmO^=3N~JDU#j>a04l-H565ck!f zf1+PAoN?pTyjLr|E0r{e^z+J0bLju zx(sIo_ermhn?N4gAB4XX`k?c@9v>#J&40{^!#!8xK{BxzXk!X=y6l4*uQa&zLJjik z<8Xv-1gr>3fVYPBVCy;r*ySCNRbm67>BjK>ej{o4@(ro`|HGHQ$^w+k=GHyj4>P^b zLgdZ!c!2J1{CvSd2*08teQTg7o?An(N{TiLDH=iU`KA$zOoX-DZjf<#XN9h%n#64X zTzFZgLoctLfo?5OqoXud(AQI@;GRtbxl20kbk)TsBC~7_+4uRN@KIw?*};}qr4#(M z=xTLNXdNFzJQ@`23on_{o8xkXI=K`<)vQ9On_EcM!U$?|=pX0J=h~;ohA=HnFDCCc zlvGVRCWIy|WIk_R(0xyxsH*O9E_n7!KmPFU)og{tT>C}19S-SV>2O{%+AnBYuiLShmOt^&u=)B$x zk~_bTy6ktPyIUTRuI;+S6}6!&r*+`|*CRNF=+ct^d|_q4QSgmSmV~x9ldbLd$rG(* zD4@0rsf0}6S;XlG@d-- z!ZXUrt>|)UU~rBycRQ*YafzJ&beQIS3LrzLp2Wso2ZRCBlS$yhV0JU+4BL6R)Ba&p zxa8MtM%DP8tnN^~q&6a1ND8=2|HPi73auiO+Bb<-?nz*jn*ygs5z$$Lyz>5L?I;^o24WhL&sBZsS> zuElE##v#5l3>`dJM0T#~;hx${XkKsvv3t5gD5rwt#sp(r`Vr&O;Q^@2e~JBg=NH`B zvOC<0t&1eB?$@}+BZfF*Dv#$Lm<$cPE!0ep6lap_arE7{++jCcth)L*7p?GFP}!P9 z(I)8_g{Q2X#_xYK4kVz8WI<^ImGCL)Bnr{c@yBdt=dBfw=hM+T70Zkoy3QhkS zjW_is3wdKH-h8V9_uEgAxGus^y4=B|)e!E;o{_qDWufzP7jP~bztOUR1;W$5DB+XG zBB_u1KF)2CF?7FC6my6BxM5+k;8SG=dn5PaUAnoPhM_v>)%#)N2${AC&W~FHjS{5p|ZOTX_s3Lh1JUJ zUr!b-J?lxcnrvuK;$w2*x*^RQC$ds)K9RrkwP>93INIu^PZplvM_a01kU7&n5v?6+ z)OPX%vS{rs;&9T1I(u5u9=o%2zCk0cZJ)*#5iRyMT#5bry@?Io`>>q|zGgwZ4DkK^CSpErGEN&XK~ zTDgJxE{&uC8$D^*gaLH?(-2yfw2P0TezyL?!Y~NZW@L>$K;}* zL3wDy&aKEUX_T<^qAOlxIs<#O&iWrkXC6q^_w`{3AqgQ#k_wd&;-01JKkq%~p0n3lpY=SrY_t_; z6&xg)>u=A?sH{Z?=59i#e^2J)zen-vp*(uEydKS76paqe?L##h(!xG}Hn(u4B5HmU z&1=nEA*wdEN0$z|6-HF!D5DFQ$c>HbJ zE>3wvA97x$jIVtR!eM?7@y;R{e$25-Ixx2zAB!o(I&KlxeW7jCIpQV%$MYHXZ`R_q zVhBwcGaPJ{Zy+>QQc@6A)TLBk5dg$Le*y{f^G5NAL$B9ggOe<08IULu3O*Rdk=MEBohLb309 z@Fn7pvPz!?qJKZIZf*)uD~`a@Gf!jVhCiq`As(MRq(Y{qy~8zk z4rBM$D?;C`T)+bT!ao0XqS$P0(p-9$4;w#(chmh>Vz+51$?BK~QWF<4?XFl>GyExy zu&3ag*9j8Oanw2aA~HujxqbgCk!$;ab*fzOUB`0rLM@EQrp$E@hu&Nun>`B2u1l_DSNthFdinsFH!+1|?XVRO{V`44mh_Z--6zbx)})AM z+&UpXPt3*A&+^FPA47?Q$`IW8!I4FrK23Z#1afX?ilEcUo(wyxPyE-^@rOh6g)`}Jyku=RZhkrsoTa+(T{uSb_ME56U!=*Dz5uM-w$!4%zn%{=*vS3T zu}ACN(~;=|OJX8Fj)qpb(-yBbtBlfBBrW_Q7JIvMW0bwAY~3AjyxT;}Wh&v{l}b@V zMjBo6Yd&Nce1NRJDx7H@iZU;sqjra-@Vu&Zc)_lR;LgdgTa}OD%A6&9>dX_IQq@1a ztv!}%oKzsCYWwiHT116TC55s3&!F-ud) z0reGdBwLeBI<^EH*GQ~em2=_AyQ{2D_8gd>A4F@nO%_KU@F3$;9-=Q|OLRW$Cz186 zCt=OU@XZ%bY0J4&H2uLLtS)dkX4b}__c7}9{TW3--4SqNoC$4FSpuU!WrIQMS^6#V zBu;m_Lec|N#oKOwB1e++NUOtriX02c=c&D<%3&6nyqDoT-Fy~rz#1)#ze{U zN#&%?<0tKGUcsLla~wZ9T!H;(O@V+dp*a3+7JPRhP)A2$b$d5#oGVX@qI^lfh5~vd z;Uvx3mrWb)%;t>ihro>sT5PS_8`$&gCpYTtLXoaijKrxv%sQy$WjC@kz++~qqSbR|srZ(i{&s?WT4Ta~OdnAidboqhZ<|x<;UizV zR!OTg_S1&aPIb%D&AQRd$$kVgPT9g5x(>1Vwrkn;zpGiix|j_+FbJ{61Z-FDXX}J` zy=(9i_EkI+DvH!uUyL1dRg#BklcnkH(RrZWD=>Mr&Ow^fN|qxN4_ePQQkhAc(W`np zn6*6+4#h>m_05C)oF$d~u@lE>f!;~pjF+cL$|ctH-7Ef(|9;AOctgaIInbAnz~?4{ zlwu(pH?oZz|KCa4s1QrtA15F!7b&EBjM5EuCU{<_IX+UT2EE^Hna%#mY}2Kws6z8A zH+heUjq{hmBZthR*Uc12;B;H+V4IE?-O_{!>r6c#AcNO*(z1(4ZCV)h9N&f(&-jR@x}U&ZcHY>w zv>II>I0?opH&gvOp*FW6kl;0j7GnOs`+DC%7;pr7gPt|QYM1({g0Hr z>O-UEEyKf?oA8~L>HPgy8t~G49QzZylifbC5xRRDp(-m_nRG?f0hM*G-mbp!?8*a&pEj4;h+DWiE2oMz#yE6o==)Z%Veff2Q(5V z6_g-nwV$*ip@y0(%);gWy+t*>8gMr(0(tH*D!FuFH`uLz3^ji(c&Ytm{G{QDpi=0B zQhwBdQPwrEU0XqaM~{K?X|AyHW;SdKlY*Hq1CX`0fiP>_#CkJVF{QIg;Hq}4F=XIJF6$6$b$IjC!d3?)Mpc=Nl^RTN^$H^_Wm?IN)RV);VW&W4O)RcYeTf&%-v*uu zD!kd@7Myis5Vt(K$(2ThW94VzSgNVo+GfEs!LxCU*P9j0DLBiq<=e-=RAKHHZ|A@{ zM4X3!gSR9TT4eYi2eNp3y@TAE{|zfvaB>&bO}@!T+!#VD?w*IS3g&E?K@jU79>F4< zf>`a~3YHwv1B1&kJAah0l5+{{>)<&ycl>JhuJtmzEYM}uylh!`u>qK$&Zb`yhQJyq zgrcp-neJo{oH|ip(_UD@@=EKNN_8^zba7`n-uqee@>g&>ZaS@~8OHXd7r{t+lKtL0 z4J~PrN5>^kVPSV29F91I0-kP1G4~Sqx-na=PYOP{l}CggmBRqP_Q_hp!7Rhz6@Ov}Dy7 zxcjdV|NJ8hd%8tPE%G{&{~n2x&&K2DLz{4Y)<=Bqq&?OeHw2y~&Vlu59^kfb2f8G< zYrcJT!1_Nd&|R}m^hxR)VjeETciNj^`Bz@!j~v%>1FU@Lk0Gi6YdI7 zL*=((`5K>4T-161UXt9 zLK<>TPbL$K-r}y`j2)*IO?xq8z?v1w~d{#BATr+~n*w;hV$xK@AavQ!ZISKz$E`_r{Peafdj@c~GW68@0 zsF9HS3Vsts8|U=#yPlilkTSvtW&h?EZu7^#zPV#XuTHulDIb@0NReN=33l!q&)Lnm zg#CgxV1wx%5V7D0H{L-S9WL?4za9yBr*%t_-Tq{dOFawoW?JLZAu70Ox(2!)^OwH$ zd`k~2E8u>c1!%9~F$h_G8{BkFcsaQnz$Y3Adnn3m*0l{Vaz&Bwe|ZCEj{JswF6}hy z<1`^(+W>MS5|OV)I(=1i3Ksa>hUl*nx_udfCu@XVv?T-Zv*SGjWHy)P^$Zcr%3h9&^KMMsG!HYfRC^y7^#GzMF0_ zN`YdlgP>*bf~t*4g0JPzVT|2Hh>eS;?Pq$R=|cjWQjx$i%8Xg9MFF!*u4dL}7Bf$B zhNUUrVwT)FX0|bywfi1nCmJ%@wB8%+$ExQ{cUwBUzik%h?Q@&k9p8wninTP7aTOC9w}Znq?Vqisa|Fk8dSQ6 zqrTt8N90e@8S?KTNAn!)nNbXT7Q8_6MgO7Y5tBga#eT@!EQiZZUXmpfylM;1r2u0h6_e3K+#bO z4edP!w~7#a%$NvoMKzFPLduVu^6?+6R{rxSFU@XIAjEAsT z?NHfp3{GfFW3QA(vy|Z1aDaW}rGAZIH$Mjme&a+o&)J<-oWBJontvfB@CisQ{ahTO z_Y;D*7Qpe|TC{R|7W&a#3cYg-A${60lofwOQtmW~jq7z}#@Fqc`iGbBE8hVoFY^aw z>HBbDju~9v2hf)81QkF3LygmX_$G~UY@J*!T>gHPdtA1R9&wA|x}S&AqlZH2q_G{m z##(23v~n9XpVo!P_E(7~3!}ynWdf6RECHt=V&##8@s4R^^Yq~`bn-cL;}3)TEk^KE z{h-9jKMXz14}&)OW|9B66e3@6OQ^fk%aBuiMd31l6*AuZ%UgaYi@| zF7P16Q=Q4T1;e?Z{}%EQv4@GO0tUIk4iq>f4s~YE;m4S8&|7W-V;AkimxX#x=}ik1 z?=6SofQw}1j03cLmnayf+4zr4E`^bH^WyEWS#yn$rN{Xz?+J~#1<%X&3SpA2$%4A;;*)sH_1k1*0B8vmGCfVW)8#jZJ;L|;<{A77h`raX7X(lKlB6xUc`HzNr9 zIK+_h+U=xu-FR}UT(~}#=Sbewow)y3HU4vF97_13!alngv7QYjoZj+j;Nqb`f@cik zW8y$C87+r>deh1KNp@s+!&0oE^^;Z})bxDrF>b@iAl&f17_!_!Y2UK36AhuJOMY6@wc%U(iWJG@EG#M?JQGNVQTcwoH9Z z+A9LMC1q7)-dZ68sehTI58US^j)Ey(vI_nzm2h)A)3Ch6n4zDG+3zlU7XM)p+c-W0 z4-965`NCXa4<@kRl96ElE|S{qI7M69LPS}%t*Cxk0D3IUs(OU$@Jaa#-jsL~)-O)S zgTtjrzQD!w{}IbQQd&tC{`V5M2z(`12Su``b|Ef4cnPU!WmEZy?y&cw8FAFug&lqd z@@$kc^QnRrm(1)L!0zsP^ne5A*(qG9i!D`I*u1wC9qd?dwaJk-*IPYex*#`%3oCuHj#O zwq-8!w5j;?Y|>u!5vhK*WSXl3vCEsSkCAZ z3bbz7R!(L0P{GOa1bY^%T6@tRyffq>mgbw0WW;WMN9#utZu<`N&s2%;j~D#W|76A6 zZ=bbJ4j)ZEOw8pmQ5I|L9LB}BeWPQQhr;L7@x0N!DKJ`k2jmMp_VLk2AoE2Ec51H1 zeuuT;%%x^(dG`=%etnwIHK*~y!NV}^g(vD4I2Ybe?a4&_ZhU{!KdQtXmAD&UHHgbMWA~|qwEtw?kLss53Bq!bOlb-`6q}ZYY%XDqPalQ}9;}0by zcJ5QOJTeif`QAnGRtbXB&z#gKi@6ns`Mgm_67m;VtaTRzkLXMrP+T^iEn20>P{t;# zH0cpnc`z2dA9+9&PUT(7=F@@tER?_Q5*jzY2Cq1>l<1$=C6j+#lzg<9&8eS$YwgnT zt3>AgFl*-{@36g=Fu$1*jAl&!E1a#(;b6rnn6lRoA`X0nz>W5FsP9xf!Se{(GVCd8 z9DW{R#|SQVw;jyk^iNp1-ibXKWyc)!-I%PG8-)F}XOaG|*erp^Z@;*JJx@5m7QYN; z756qXoii_C3;brxQNFaK;Rd_hmJF#E^O?0~K8ubMnofp?CF;x#j>nyV+@wU%y15m$ zFEs`^Z9n*v=}+SZ?ZLX?IxS81rS5}gMbGn_1fOvk9~fUJQa6m?(^kLY0`5)Z|7b_^ zdwRce)s3C>xb`I8e|!#qHBERA$LW!Q205x|ae*f1%93JTHR3h>3HSC^8ZNe3jpyCC zLON5K#-J_p}wWpck1K0m?E!GFo>F@${x6U2A=}u=*$6%?32E ze0FoqbT%^bG@CRxls&h&$}&DHmR^5S#}e1gW^OjC*x0@UP?H!5Klcxy@^};Lg%E?b zoSF*{4pu<-Elcpsn!@|gL|EyO4ts9uV7*n_v8?1a+7%XxHNTkRJRJ?#;yV#^GoOH| zFprzF+zRe*j|2902+sZ_11_zN&@y}uzcacTo(qgdzd1sGccdcj4D#Yzx;OI?mHQ!l z%ur@1tp{T3fm?e_PJ$w@!t%<7(8XneB|E&{W1~Zw;B3<-x#MxzYQ@E~_}${y^f7%X z`7ql~k|9`C)z+J_^=Yf%w<#(4@W}xig&zm`d4{;L$A(5I&E@*P{^D1D&7|{&4oJc` zpTZ^|1c&*aD9J||LoREf4R~uQvfGy9z|2$d(F=KJgGJI1HdY%-6-xN}wi{H}L*V#5 zxw>+V2T!ER)-Gl0rT-)vti4pdiqn zM`@Z~CY8T1muuSV#yz_59UL@nqN($}V9JldYJVXonN zEUj0AeOf(?U9fg$4;2d8pVKL9sedkWZvr+T|B(4KB(T7qGwezD8CIp~&pZd7^4Weh z{Ko(ne3hMIw=UZA_a0~P*H5P)vlX+LH_M}H1tF4Aec?#ba2c77wl98s?M?{|xWy;T zPonKd-k|Mc?_tdu?ReaRH2mPwG}x;#6AJ1(`O7vT^pvp?Zdvbyzg_)|Hp!(3iIgDh z);=2#Icg2s`!>?D&S*6Ig#$|bC(r6WyaBZp&0xBv5l%0Qf%ued%p<}a%7pXz-0pF} zMiTb&`Ee+W3S`!Ov)TP%QzjL!>(=-AUV($Cp?LtrkwjfqK9p_U)GoTf(xSepwRe9$f%B-HRciB8%3`4&z>&^uv3)P9fV~ z0&|3UUr&uI@wK@EpSSn&FM|vr^L#SasT@i&#;XYX19CVv+!Vgl`csGC!<>N0!OiAD zoWnB(ev`)n=$|j_lKu`y#_u1%byCf0dkR5+_a|;%h6URsyOKFDi+8}x^BjkE~r(oxHUt%`= ztE6+j25%~kKn|We$R}MjRv7dH?z$^Okg)6QT3v%1=gvnh_&qwZJ`ML?l*ThJE0R6y zdXT(RDb;pRBUyLX6RlnQ$o9YE$Sx~s9HK4EU={zv?#oYc3*XBUSR~EJ-GgZBk}Ife z>Qt7Im&=^z55WWWPtcF69Bxia#%JS>W1nwpQGl{F%RhXPtv`Q)J$p8a#Z1#;)dSJ& zT6QMeKYRoG`(-%0KK~@k^9*ONjLMl~?G<+S;~i!_>ljn36tT9~x;U(DEa?$?g~zRf zh}86@BvVZj?~+jQ*&HQ<8)V5!h4G~0S0T2PzJ?D>qxk1fjHe2` zEWZs5mqnW5D$bZlznjb)pH}d>3NMh7qAdBO@dO&<)^K-MoJ2wDWt?y48Nm@Bf{&+Z zavuHT8Mn`YjEoz z!>H%$YqZrR5sFte(?#@>DC5^Ps^?)#?~hKWgTHjJ{<8J>&7@Wu7W4$Ynmz&h&$i*d z*sef_@-&fS9z!(?jli(w1ozSS4qg55IliC2k;xgGuxN8V`epJC-aMgNVBig>{{5MB z=j4;{=Cr^=(;vmY6pdjss6JcS;Yj)yWfE=K7W`LT2FlMciKpxT_)ib5gl2Rrcf(MF zb=4M#f{d*B!x@**;IedH`g0&3vB90ceEbPGb`JsgoJMk17GoScinR`l#BLKVBR$Q# zk{Pu+f*Yq2&qx|e+C3bx)}zPt$d7PrRc44=3VTF32M?g>AI9Ly+$$|tGWAWQ`oS+be!0EgR38SjW^x71?E3vanRyy6p%lZBnlpL@pYM!vST&H zeiKh%W#?Ax8XGHcx;=2gg4NXQMFWkSF_n+mZVVNzr_eO@d)W4?8-4iQ3KGZsMSD1T za=EFT3o5&5ZTVmu>QeQ?A%{}wSo7PoTC18O}adVDY21@sVRAu)(%`r1h^D$a!^izi*+?@9hyO^Y@UXr-@IgyN~DIip1lz z!$j$;$KY-D-N@Na4yQ?Nvff`EOqLfNA|D1O5+99CI45H$Hr`>2-R4dq(<=W7-~AjG zn6X zyA(Tk)Am*Tq04D>(G^peF|3^a@B16Dnf(xQQ`S+HAQ^g~VLWssEu%iq&Vts45!n0@ zqD_W&Y{R0-Z0Z^{Jo)f2rroND*1vs^8n5IbQOp}C$gM(IBZEb@-oGPo7&OLCu4Bx=3ChMlmwT2nSf=y z`VsZD!g8DmmOc6fWu%&t^Eefu=*uNaT)yAM$3^XtJ_sM|fm_HlZ9h4)52cCv26e z1w=1ufTYu&?AJyY_GCgQTivQz>i+gM+c;_nySrPpG}}d^barP1t2!)j$0{P&P8`R+ zxNT)xdY@>w`7zdg`yNC^#zCmJGF4qNl9fkYV-|A*nDYDu5Pfkvdlb*1lnWfV3B9>3 zqL1{|0T+5>NH9J4VGOJ~y9mX-dq8V6rci=+z`VKEc!E?O>>bw!`-Ahj;;nyqw~tX! zUbz~c#K@sp9wyXJa~L%XtE5?qiNe`BhjwLF!|+uxbjQ{(>h{-+1Qm4Bzj-IP1{n{D zPopKNWxXgh6Ocn?u|&o>3~uNQ2dT&9T&df9DrHepQe0w0)sn6vsn&8Nxv7hCZC`>Y zp^eYVAHwAYjAS~qo0N9c0>4nHk$7`Vhf|kE`eX?jA%%4oeSTJ)NRV?Fz*I_ujNcS;P#VhPhJiyUq#R*7ce)YSf5sP{^s4B zWa+gh|M7$U6Z!jV<57gfMid{Z&lxYgh}f#bT>ru=w35%qTKz-F`SKrlw~{BGVmg@= zr|l)%c5fk(()uLH@(TGElugz@JWPsH5{UN4%cLXd6fxVGLHds+lQ{_-36S4MuCA7l zl$~KDKIbpD#p(qoGKr=!%i_`RXX>IFMIA1#atci-9ZBWY---O^j^?swZAZf5N3DN$;k?vVTrygbER~igLk|AnbOsOM_E%rAUuqwA-9B8Hmu^Ry zZ`9E+NfT=S{XZlh{0Z;gR)|;Z>)@OkRbW!)UHWL%6{sAi%96Amqr7H0ree{|erx^{ z9Cvr=1MMpCjk^tcF&vbCFJ~vc{aK%sD%(1L4Qn-Q;0s57jQJ%R1SWL_rc5JEpUJRReoTrIt#r!ljXZDW=%!= znaFS&bK7FW7S9V}`(BP`0mkE)Nl!PV4D4i9%#r=st;&?A9AeRJ2Ux|~hcFuRa4mE% znchakyJw5VDIrNDcGO=oWyv>ktLY%l9ULoOJN6z4-6so^6Su%9?=@ty3LMNy{{GSvD% zj&l-1NWad0`m3eyVjqawuA`u&=oumcNXW%&o+F1YXd^mGj5yn3KO-rI%AUMz+ zPMO6}&Gu$o9G+r5;nG3AsagbeLtf%9UzEA{J1Mll(Sjz5))9FJDL51{9+H&QAuwnw zZr2LOwb3{5K7U1G@pd?wX`Bq@g4bccq9J8?-QHm0 zvf~4h_7Z$1QxR#t@|sw*){{ktbICgI@#2g-O(bQ{N`j9Q@$)m&#D9$U5~s3<*ly|} z(*0aU9K5`O_)S_!>cd*d+Jsu-bt040E{{P{0}se~-4=3iYCh?Hu>}W>9YOm0g>&Am z5%{}tBl7aGA2gIZKm8^wLe)J96)e{Zk>MBu5nQpQO3QCA8eFO%zcQLfii2 z@bTxJBr2`F+}xHKG&fPiZ|xYy+edCf^RJB}cK(i-Z*+tQJ__Iz{S)7w@er1O?S~m| zLf-y>3jO*&3A_pRhbtNeP_0RE+;LOx(IN@wx=)u*UB4XvPF~B`OiM&&&6;RYwH1mi zn9iH&hwxRi!)U9iBSkM&XwT}Y!dyLzzw)AzDvx+bH@p)(oytYDFz*R1$TNUv<9uk4 zjD3l{^hvyUoCq(tatKd7o&YC0r$g?jHPGI&0**D`!cj+F(cVlo>@zO_OI^r89wWwM z@7iEkIX4b&9NmZIU%TPQ3MZk;4d!kp=fpRNr+F_Y^?fU;^>{&~;#9;Y>vKuOtbTID?mdb5C&8CycaTq- z-9+Tqgc>geqkzNfVf;i3$srAU`hE5^BCfm$#ow00pC8gRt=FC$yE#>;PlxhmgC}X- z`>kMnH&&wbPM4SDj4h3k zj4S=IYFKGz(RHR5TFDl!k+6-|4zqzThFH3{kbXO9gG0Zr2RDP=Y_f0;(u|zK7XR8u zf32-zg*VkoH_b9FJ$K?E%Mw_289#(9h3jJGkevgCve#i-ybumq-7Yg(|BO>cjTH3DhY26E*%l9v&K-!(}po{<77k zKSLW~%|~@Ow`~aTIJJvDtzFBPj*&>q1=7YJzb`<2s(=Tft3AwK*xLP)q9r~9D{sqC< zKK(9NwfGqw{EtOaaj5&5CXl^=+x$BR9|F_*2RCM zpQhiY`)7DFePYNumX?8~Sr$Ft)Cnn{Ur^b*3!p`)TfU6$vl-?ij&T&b0Q^etxFxEg*tP2ksEIAy(h z*Drj#B^5+nk0phjp`z@MD~askDj53UB{eS_F3EZSj0L`TWGUyBxz|^TNPW@;cGmP7 zY|Os~rX`9z7wEx07i2NX88^_1?G^1h`-=I!zQ7ipeoB{)jxLE4yeHlb7__`AY4Qve zYP&=Ve|;Py(c3(VIz&9=Y!hZ8size~%Z5>eT zN}_59Q|YXZH&kuM08Pxk59XuNV990%H34T}*v#4RXA=)?J8r>8|7Eb<^E^dQ6S&LC zR*>|>iymB6O3P2&X9q^>l_qW1Dt&Eyhh^O~DmAvAS{jlvwlsb~v($I}S=QI`h8?=I zwA6O`#L}t4f5+@Un^L`#uBAG4`Ao)iAOCmBd1@u}uHsTt*}$l?qKE%G%P*>$&MwGo zWG@G#Bu$pt{8y)H2yrNY5#9OxE}cXe_{>AU!zf7hT*yRY-m}Bfs-=z_HB0wInL$Y2 zLV>mLitSqy!xnB*WjEg}XXNKw2pQ)ATNW5IUGv3kWML?5EdRmj4L>NUv$KW{jiIcd zQl3543p3<~;Lc3lhxO9AVcnK~XuKP7&n z*RxA$j<^8-8lz2=zD_6lrO~9bN||hwZNd*cx^b@N8GLuhShCpNjjU5?Cq}C-k{=&W z6AiVOBxnpH&NXAO!RS7$KXM|;xhKJ|@6W;kk&8%Ink<=BznQ4~&?ie5x8jEva*6u% zD@0my9j|+I3o~bdb<4k}mttE*7gFTNt8zKKw@#hfH3X5IyZN}>`6OPLF&#bfPQ<8o z51%RzIK=J`_ER%O-_QRQ4f|Xx`Y(0^CpJ0CO~@TjQJaErB?*jZ(W9TkM6>+g}WD$2NP#uqxQRioW=od{%Pv&2jzu_+jokJC+ z+Gs<`PxR+)I-dKi8IOGZA9i)-u$4~)PW$^64;7p;EB?EP(f#juT#P%O^g9l(t4qXv zms4<-qXvFzu?n4jp@X;leu}sLmw}|rtF7KxT*Rj*HsLO-P}ID}76<0v$6KEa!&$4B zU>(id$YD(+DzsmVEVEyuET`w#!K;=8%~uq!FxiFuDjZ1|M@Y1nJ_(X}Lt3(U@=hk4 zTzJ2L+?v=yb{19>uF93%+O&iycUj<+1}|Lgi?PD+y>xCtnee-h#_sN_NI{23i^CJC z)iFD~>bWj-sf`5PmkD^%&uKVEuOG#{dxQp@9w0wS2-=21kkZt%*zMVL{B3In3U>*? z^GZ*XH!IzmCS zsa&$a<$ht0FE151eoqm^FSDh?dU`~`+iqEn8=L~+ZU3WM>I&#*QD@1&o3GKmU=3^; z)4@+Jet;C#r68r2(da|vYrIElELr8i%He!{fQh1}i0`8LA0GscOBIMg9vNiW3mN6ZI-(J~;XWfyJG%dN#AM*(+ z>Cw_bO7avxvsWO}uPTYzs7&H#oX&cf4%l_lhOxpXzO7rV}C26})WgK562<%0~LQG83h}_275yI| zo^-kE7%;FL3bXvZklkJj*y2}3vxbQgOsGVTBOAD}b*|jH-}5-TFKbZDbQf%*uoY*G zUC6obo`_sUeVo3;l%_a1^13t9se{!pex>S6Qnu?FdD|?oZ`Tc{MvCvaaWAKmoPE!c zwW=m5m-8k)v1drU;Y{M67(t?Y^u^NOJjEY>IE!Bemk^utJ>>qB9P+BDU&t&RB)6}c z5ZkDwq~?`2FWq>?z_~p+mmhG$h%d-(#)f zQMf_rFRrO*!{wo{$)x!sLCysl3)9lwaS9Zn|SOGAm%f;_5vWd*tZ zw;0*m+~(Zl?D)s~Yw_K_QcgbRlO$)-I~4XyO8D#~`g7+rK4H>`r8D}m%&|1dq5mx; zD<2La?=w1Z+UiX5(r=phP?=c#y+WHj4!%jk#H}PcR8xGT(^$MyLtf0f){vBE=kU|c zbzRi? z^#yveV-fmr?;SpLdY|BG-Ao&N?dZZ*KjC@lB|(1&@%umLh~~KyV6x~OeQGv>Om_7r zR%@S;Y0F9p792EIe<#z37e7h<&MuPQFp{MGRK?E^cakGZddS#>d&G|$NrJT)F=~s( zdxM2%_m`u*&9YkrP8)~|a+}E`aX6Nkdyw8pS#o%W7hTx76Dj?3gtHBEpe-q%E7!|F zHUf9o>Bt38akCXNtXq(k>k#32xfhHsE(Pt~(X{2jWu&CTsIJlq2pz8o38!Bo6nuyC zQ{5!WR`av`8hDq^E-}H&Eu<*YPrX4vH19EODLTY&6jL$ zK|5=OLHZa8DG_+wkw2%R>e3hFv*#Fc#AY(~vp-uBTXl`Jb!U(de@Br~`;5uhrfX#N zCr#p(gz<3a0zT~24y-*i7uUzk!z=v5B!A{iz)POXp`+U~DO3E5q{&;}`EHS@UP(kF zvhUD6A3orx8MFA%bVKr+P;oH@PGn!Afn|$0#dijD z`jf5Tghzu-?^q}b)}ng88;@|nyTEDSFvh!~` zfe>l&sh;yC^7S`G)uHdv>#hHh!GA`QXRB0j+}RVN%lB4etvj*&(y`%KTO5d=HT|Ur ztE9=FJ%U?oaStgBkrO97PDE*~y%MG0n#6WuCe}ZrPF^Zn6SvG=c;CrIIOS9!uDPK_ z?r7^1P@IW(RSu`B%BDd4hM~+eiMYH-N@6!#hG-uxAUjwI8NcWfof_OF+_&{y%qd6u zJ82tv-_nA8R3q`mku!+lL{oTf`H_$F5_%lZPV%Y8FC*h`4JadKqV}aA4yGUlSj=n1_XiOHf+%uD16?m6%yY7H?q$)$bd%(W)iU`fz zKqm~F2^+m4K&jn@k0CV>^s9wt_b-6F(niSpR|oxtH@L(h8<`7(#qRGoli!C&;FBN@d-WQgj1EQalCjk$`< zE&S?Z$$XuCA=f{zo=c`h$g1xO{sx`>t=?>0__x_*5(Qrf)P`5c_3a9Y$0Mu+~ujZu~?dRzfrqgw*G$upVDJwH}(A`C*$zlFBA zPjD~El~qYO&<7euEKc8o4H&qve9YxxEw57Gd8|xK~@#G??R_C3Kc_7|l8G0{ojtGViz$e$oRw)Z*Gs8<%AYY^g-a{!mq{ zzhV+3$}WPc@j<-PXlI&a(n+P7JLnekbKodG&Q$`XcYH>`;!q`KKJF4HeRvUQc)Qb+ z_l#NE)qf)1^#tttdH~z0YqPYy`}qu|2=uPu7h3f;or{w>z|?lyGB^Rkd*~H46!t#@ zhqplO{utiPdIU-=>teGPbh2wMcI-SG1ujm`v{&~czah1oU(xx8g2oxzV&V*~vlPMn zLV`@rW|u$ zsL9&&+F{2x9|-y;g0$_{@JHb$-2cbmn?eF;tXE^#MKfmd}n%*wL{_ILoi4zH(O#>?58bMsiVVpqw(&%=dz^A(8R_{)s@!M;!=eh6e>VTI!PLtui$C2CRhv?gc$B2pg0(e9>EIyhi65izVjvEvq z*Yg#mmW-!bzR_U$ZWBk#AHwU*sk9}~6gH2NhoSs8B+1YYR_Nx#4C`s6JKTwk2>8i6 zBfi3wy-y+BXD&5z<{bsTG2p#I9Wq^XU}XFWP_H$hvj_VG3q)&Rr}h>$bAK#*?(PLL z$>l%NsU+q)^9{H#YZ+Lr>`fNU;m>}MZfZBcbTA=3o42hpfU~ z?Kd~>y|w}Vc)kFCn7;-WX_ce&rn6{0Cy(ze{ER-gcah~oi}-h!B)leNAMUF6LM=VI z*g@qzid>m1xO@K;5>K`ygBF3zUY>RLaaStzu5%~4&#yt^yb^YYg)w@$ZYO-1WR2WR z51~=07ZklT!RO6!WOFkdHrC#Uk%Oh^<4*;=zqW(ixj75J`_1pZ=TF0iA$hDrd_4YI zHX7wb@XY#=o@~F#a-3*%0Zo}Rozsi+N4fHc@zc>qNs7}Qc;+cV4Kx078)}W zPl!Pq*-|wA_Zjf>nSsZ7Mc`leCGjauQ>rBO3@ukZjdgG*a_~h={HFl?s>Tr?nAS>s zcUxlnep4LUFNgQMng?qnmgB+U|BzdlD%ykNL3mFC2e=vIP3zj(fJ48Cg0V9|jRd{D zc^YkdQ3oMmqv37MNU+^@4K_t(z_b2bsLD`*L~%C>x~mLOphqWHnNbf389FP~4T|6( zDv#t5Fk|;Y^vq)HJ@6Mjdo~8Wc;<~KHJ-+m>V2qe@;=a;lYx&kZNbft#^9>DSzJk# zCsOT7K-0R%lIZjw?D#OieU_fY@Wo#=T(=!&{>#A|&mUpR;*MZVm9uc43C8VClR2e+ zNgT~{q4z94iN@}#Wuz3-(CEZ^oRGF34g6bnE#*}tI;53|mltFckM5l!6=zjpe+s{g z|LRJdhqO7pcb3AfOE2Q{WD2Kk)m5>)r4t!zH=w&;6u^4>NV0Vc&rAO^0qux8&TdYP zA)!@9NYhgRU)o>+CDSBHY?eM&8(qQk!-53&(QZ+J{7=-mP8H9ztRPP=N?|1h79T%8 z4x)@Z$SC^-c>9-B6tu((<(~URHsAdZhdAs<8s*bSLBBGxKTwE24vR@s=Q5C&QG^vn zd}b?M7q1U^&Q4wv#QI-OV%3vNuwu+CJVqjdl-Em(o;gkyvig?7S6Y79??4@!AWURU z1_em!sxG@eCrc3i+kovKkYL94onm}6D@D`(yTe%vf0EYjeFQ$(v(@og=;=Zi;hio+ zVPBX6&!Rp7K~-tg>s%=vefSu4adDvi>vE}TZaY=q%hAuhsnpUxoyr9_QP;LBwDVyZ zE!%dU-#0eUj}=GgVaur~I<$eOtZU&}YzfZa{|_l$yC*v6Z-HgBcjEuPK1YeldPw8@ z8QeT?3{G*^fpk=!roN`({Q2I@5HKY+Cr3Z5_D~MQ9#I@$qYj(*0YU!2CQrf4n_w zu;V&0lP_mFRMiC$DslolWxfv;^p3UOxL;Iq%9;dkya$qj<7oWe4*2Qo3ptWM(c90p z=!)T5{6dDsKHWX+)(vVrb43c>Oa6*RTsjB|U9;e*XEVf}Jxi>6XR$FszGU{fCib?3 z1N%VpD!N#<3M+f$;M1!M@WL^JXsY2jWc6k%EWct-AFViwIxp%X_2-x1qwGTXRbtQj z2dd-#juPZj+&I(uuRU1vd%La2uCkEM&{@VpErUFj6VQ&PTsmO? zjd<)n?k>A1nF+sN$n2`H z3O+j0TOOZa<~j+Q%=-a7t)!`0_fxX# zSqZ$%)`6|}Dv6^)1yT1-BGu&!AkC^9wLG{E7QfHIvUj`TQ21z?zc7w9JU*IzKJOYB zPESLv*AiLQ>JrPQ*?`BH(PU?A6tmd>0FqHOLJ<#Sk(EieDBR)`>k+vYJr>zI=kcFK z^0+O`T)1n+RiQ@97oqWr55mFu-oo}haY9pw5*{B~Av_yxCH!~LSh(TR7~xZg2HcQw zPH%KZqBn71)I`o(yUD$nNg!KiSv6ZxScC-mHCAxvl*#u)}r@uzX6*ka;z+?kw)^}QQV z@0w(cfBi$k`;nkJB8!xTXW`0DRjgL?iqo5^Moj&*x%kHySl8BAcEhYQNPpILwj@Q& z4t)KHVx(T+1C=+~_qXn#@?+`Pcx4JkaZee!Ws|ViP#V6K_X;iUe24ydAqxI*TlDf!6npoI2c#&-Ai3RA=!K6xtE1bDHhzgiD=2H&S%fcD{zHQY>d4ldDxy<(lTmAv zrlIFo#uw0HkR<3tOtf~cOzY;e0wGYYEtLr;FwZgE2m|R( z`0cHKoR5qhoN%3kS$kQutSSnb9~3~`PBpCGoB*rTW5G3E0W9rZu`Y4IR#B&zSG^t6 z{Tc+=N+N~CKn&l>Jb!-lVx;LfGH(a)%jjLzEo_)x+I9Q95K=f0>#l{R*`Va_qMit~Wx zW7oLmOEXa3Zg*DcVJY*p;~8iA%@`)S$a3`Q1ae%IMRd2mWb-;x(b_{TT)vDFE8BAi zO*buO^)INfA)`m2#-6!oLt`Z>wPx|Eqcd^a`pYo6+Zg5--9X3w8-bUO4aLC)kCB~Q zFj{W<6)D{Nj(57m;0~80q;EVLIZvF;?k>`V&wc^it_KE0Ib<=+O^A*Xq{wSc{+XVL=T5;%e31P{N{aj1MBKVf^8&7ge6q+wi7yi2MB^+m}D%3j^ zB)qvYUHEy-VPQ@B8e#n48evLzyfE-ss4!!4ny}jPq_F0Dtg!NyjL@KZkD%oHEbh(= zjHlO02$SEW%?Q=K%&PPM=T&}9cbj2b`|6`?zXkzZ z`wCybGhNtdyII(gx>i{HXg&@Yw-1-@F2>lr6gR(1#Fh`=;l$Q-yddro!p#Htzc6*- zoTv9t!I4G8-L{3Foyzg~^_d_A31~)i9=f!I#j|iBUhq{Fr(GIkoboMM@eBv3KYjxW zlGSO&*17ENX#psz%@(`tdd$haC%g8&2j_FM$nJy&I_LQq zGBtJ+-50a*Cc$`IaVwpz*t(Yu@HB~t&eFj2eE3W{YV}L@ zs?!OyozL9A=W~8q;a}053%Ajxr;qWbe+gK*b_m&Qmc(&leL7QDi+W!E1P2zk+Fv`lvm(R!8w$>Y4ht!y)EXStJ%D;tj+ zH)pY82`jW*&luD`YhkH30RQ$(!i^CNz%U1aflXH+PfLweP`gA9+!_NZ zVaHI!>J=!vd@gr%*=YLH(;a$8^PfMBW7X9}An>waZohG?Y;jm@y{3q7dju4ah{1Ce z8_OqfmoFO9`9^Qqtm7TX;=(ibS6CMWo!!Om81)Z1WdB1;e=5^KinZ;A*B%x+) z18Ncc8d{xlNr`+sXTCfdgwj$da%mf5bhM6rKeLC_#o43wk-@B-Ndn56w1bsQ+JrpM zII<&d9!Hiz(y-;%Zd8(~Nqil5B8N!~nCHa7@|(+H+IgN$`{gklJwKjqwsZyQnRUER zR1e~2tS61(|2T&cr)@9%t|cpP{vv(}4BU#%B{tVDlb7E6kzKDQzE$%Gxyn(9T(S-# z)SPWQPk0L0o&4|HqEFrQDUA5Gjh%-0JGV*`-gMIPT_%T<-oVd2kIwe9v4*lzsF#GpTe0v+vX+ z;=lDL6!5PuC~i!RO$cXKaO)`teNYg==hOxHtEEj0ER=d?vR?dK>RoC>8zM zdz@QuJIt!T3K4mV@9<2@hoVvFD-^`~61}Otw2B#3?6joj z?g8+p#v8exTZYBs4$_{-b0}Fei|*msCrY>dw|C%LI z*3sdH>zmQv#B~ zlc~FCJZ?B_M9mMcL2osqMGLGQaeR^%Rgl%gE08Ylg4>HDQn||d@SS+R{}5~ZT@&@^ z+~uB5$>tIQV@1>Dk09ym1+1&bK5{%Tfpv;{h|&X6Q1+}1*jFTLJC;uOEy|*OU9;G*@+s80O`F!9PC${JEHch}4wuDk@Y*ID9xWj_ zc_;7mQ&dD9V-4`*3u#z#|8lu?|TyTa3S}1#+iWYZEu;ueM%H2rhj& z6B|#cXZo6axYB+CcaIyi{8buY0!&JP$a7)aWC)Ik6XayeLMhaqG#&W7Z@tZ~-h|xg46;N<*4nBlmEA zwoTe(|H^6;e^J$>X7KoZlN~_? zf-!B(JAD{&_%}QURD$6B_Z~>^I98g_ZXv1-;HUmxiuVmejUI5%k#X7z1eK- zAhz*>G1w&^r%P^WimP}IZmNU?4x1m3B>qg}tXAzrfv+w=&g))yJlBLqWR@{m>FdCB zlQE@#BX4AS`wj_;hAuk1=>7=%N>b0|hobuNL>08OvtZI-765qk{(52M! zCjiTrgCcKh->ait>=rlyOjbv_=-UJTA3UqDReeYPN;-&GVv!^FTm z6gs?s9Y{2XSa%03j-HJTpX@?Tr+EgY>V?Wn(PHX+{02QO$RVG)wh|}*OLShJq*(4; z2+Ucvh35Y2gxJa_)cmL(oS&;nvws}oV$-(L#za37s`VdjKa)jg&iG0FOp~dE(|zja z!}r-vDv1kSFVGwM&SHU&P~3g5h4xM>qcTNN;zi`R`1l8B@&3oR>F=I@pf|Sy+&X;G zjAlhz@9P6{6Rtr4tcL0XI@H;U&jt@|pl<}hG|QFm#Y|VFGs1NtZGJcRS{zS42A?I# zQU;_n_zT6$^T?dKeW2Krz~{ODQEYyQIHgRdu?~7f^-wc?AGQ&`JrKa8tOcw(JAux= z<^gxCkFm8~CsESScTwruQk#Y9?mP!_3^T>Wf=oR6i&JSS0+|hE(6b}ZHsi#|%EDPn zbc@kC*kv-0*))0`n=hw^trV+K(eq?R+KtbZV?VsbqY+K($>bek77&t)QU#f)wk51;J|9on--H7~Ds%FB=dl+MbPU0rmFgQ)X!3kCM~qw+k^yLiTUv;<8?XHXbH2jVeTH?s8D* zyaS(arV!6J*6f}4zu5X>-o?fH2lvPwf>2fit}^_&wzw5Ox6Pn#iM3E=VM}H9%8T9d z%*EH{mDA{)1iCU`THJSNtXMGpJZ&30Kr_awiftYV#fh6Hh?{OIiB;zsh=0n2iQh!n zihZ;*DyIaSvp4m6Q3UUm{;;H6WMoxSL3?7D-xy8u>H8d#9P^lo`j#)q zTl-FQTHTbm|B{EFzuM9458-$t@9Zn^pMVpu-GH^;!?0_@9yB5A0~%ZXAEmK=^x3b; zl*x%iAN3OOaHt#pRKAnSdiOx$`^~VxCkFgV@3P)2-xAS0U-DY*0cUZ(8=o=j#QAm( z%_`T zhS7T7#dKd^LHyo3vkg;ckWI7oN!^qJ7$0^MOx_-VF(1r8c3U!tKTFV2{e$p5wFq$B zJ*Zr(OI;gBQr+(baNU+b#l;~I-MRpc|E9s>^#m@>hyYCwbq+4+)%H$>zWboejPyHltMm5+?PDOK2J-Qt_4+D} z`CAv3hBiQX{6qM5)c~a05>XQHGaLmWY*iga%cbcC;m$pUVyY_Y1 z?(J<{+dWk{Vp%)ZnD7wyG|357W!DRJzK#>lvMCw?pNHZu^jGZ24c~c z^Z0$Yq;QO`EIQp&kDb3gCCvxFXN{ODErAE)~(xsm~M$*PICHiy8WGWXmmd^hpqyhJAsqalIYN@MCx3u=N ztww|_t~f{hfzQo8cR+U=&VpubA{yd%jrRA9rDz$Iou-`Yh zLpS>9`xt~sSm$h*l4A^Pa_3C?+y|zxiNTHJ_Sp+ ztYNIoE5Y%WGa9s-iyLQ~F;-nu;Cz+`TH+Ls6g>Kv0cn5KH`y4^RbgOSl@k7ZM2er8 z$1+l{a+s{BR(zuc&_z!rY_eaKl=8Ff>T8)KWThp3<~A9omMRD}&qj#Mym>GE0c*~^ z@UTemsaUl2q#wDRHxmBtae?GHcBssy8TsY9Le_*VVwK*8diXi?j?W3G&ZB|geqB-a z*Ha?Vt@k1YyGoIJcQbct_BPQSy=2Z*#+0!(;hAW%YvAjnx5VSwI1ouKgp$3c!qR`z zLY1|(=xvWON~yMjJ%)>@($e*GF}caSa+*lfncZ}Fu`|tCqE16{<01eKF(gx1x^fy*GEMj*j6xzCfzsi~Y z7-H7%c4I?d+X>QISJ_s6Yvn$;9${A5|3I@>dZEzg<;bN}19zx|VJn5TSe5jlkY|dx zLbwdC%74z&NJMy2k}2ByU@M9>ZNeo*ckqD^n(WVohG=f03~7z5Wd9s{g^r)y!1|4t z!RmBgWSefRMS<@GbRxm3@GN?qu?IDNy2$AIgt1>n z?PHh1J8n*@zHQ>NA2t!w*V{-1JtR*54X`f#l-d2}IuYs~M^(nxS*5BTPIM=c&?)V# zw_g%A@|%Sh@b`W@CI^Rc1^9BM82ypH#m+5!j}Kh^gPZfF;hDbGtV*66Zg6;v5B$4< z7fo@)4qdYac0Z?~px+0{k_1u zcY|0wH3HWf2{d6S73n27GAZ>BNtc{88~bxF>ejPoWz1e6r;^!7(?JKHJMGE-3;d4C z-EVWsEnYZO;Ts#h%LFI?xWImGkH)zPyG3CO7#8P^=avZ9U=Ma5YtRMYad|4_4xdGz zLyn<*Frg|RL!oNoQ+B~)AzrM02sy0o1c~!w1+RBq#h)MW4rEJTWcVwLJ!$ZbsGf4g zrvD{lr@~AwvuQL~6tBSYW(|0K#wNrzj>J`}o#X+Z*}fJU!(Mz)j233*k&Ri=*mmY& zR^h@~ve;}A#B}}R^anh7zp^s&8!Uv(|MYa*9={e$-xdF&Ex@jfzZWn21*shVFgu678}Rll7I57-+?YRfrR3^h&x>TsG$m<5DCV6Pzz_8CI~)r zn<@?eGsj-fd`XyEE~8*6YpwGt0V`R&L+Q!`{P})fTX8NTgbn#{K!Bgc|lpBAw_EQFNw07=5!s&(Dnl zAE!NhXEYEVX})AH>Ye67y;fogB_rFUy*|j{-oc9aj%B#PrjkQC68O%VgQUqd7W*p3 zaQH+77f~Qhvbv>7u$vKXi8Chk(&{Al`c3Yhzdvp;)JGO$0^x6C9x3#Fi?@jyao0Sa zE!~_zE^kq?ahqB%sLOI-vTmxdUK`@Lj@>f_dNMO@B_4jg7I|d?$-j1iJvB9#`4y=T zCzs5{_q`2;>(gFh`I=JXqc+6ehq?Il%N2NL5&yjyDPlQ2JKXrg5tpBJ!7{Gfapc{_ zxP3zu&ON*r`+g6^W~MeMZq7n6WZyS}q*=S2HbdZACjW&oh-4$I<2-ZIJwsfi5pO!Or{kg}ED|O3bIYbJYtD!^ggB$bVrR z^fEkGZJj9@KSjU}3#_2Udjjl;3)!d(HNW;!Jf;Z*61sF%g%z#PIzo+CB-3kOf~j}LRdUrXnasJ| z3Rh-%(=2`#=o)6kIecqF2@495o6K$8`>Gf(I;w-)W4v*6Oc}1`v$;OW{y49870ywy zK`I;S+4Zm+)#=osCWiN2>sS-dK0kI$#dTWWcZzl&kpZ;=Iq{{x3AAXk3NjxKL~g+x zWqMoS`Pwy1jQbyUn%-m7+Acy*{06yP7ct}HH$%{O<8bm%UIQhdgk8e0IvR36I*=@ z6Xoy^taECbsOpR|DsZ1o*0jfVBn;*W|=PS$#S&Dv16N z7?H}ZL3G9V1kQT54L@1hkGfQf@sx~A+<{K8X3gz*y`TWMy6=Kh%U-e{w>`r2^DMr< z50#ww6C0L)3X;kYo!?hRT_%-KLE$?dnwd%2o-%r9Bv9!t0gW}bqoX?e;m93T=+GIa zamy9NlRZ{Zr=9@PT4@HFzWfkmo~v?q3?U= z;KbYss3t-gTiZvX{FOJ+3bpxg^szIW`mq-D3P;eEeV4f=zaoP zqNAN6K~S`n?hB6v3HBA4Y0dXr<#M3%Q4P5uIgttn!|1Xt`sn0nNA{O&Elg?mr4D8G zP}aYkodfgOE21ILo+Jq>yIjb~5EU`FX(Ge?9k%N?P7st_`M|iEo6tF-TgjRaM+la; zq%momsdKXoIJ#aUzf79x;N?}YG5aAgTK$i)(DOp&nM>JT{W0u_Z^zk~{vx#h!(rsw z9L}~$&p^U?F{t1~8rkZg1`^uQDBHA+J7RK?4F>@zPD(&uz4>m8V;fVkY9=x})q~P5 znt@#L2q>91hPv1K(}2UdT#&sxtSuNzj~~i`)vFb$|GF z`tpjZ3CQ7@FMjx47U#ZH6npNTh_=ZWa^pl-h-bxQXlNTpA1)jr$~2UOC!JfsaFrAH zq5M7f#H=w@{ATolf(>%jMn(%6RG_fUec3b#-1#LIeau>I#6yLcTgf}l(fj2_Cd zhIc=bzY`YG>Sdm^!S5JtQvU<*E7#KQ#!YnAqBGRtcN+b+XFg53eV>)NdyV|LEKNO) zOzDLVU*3@&$#tftk>Z5!RLX@B6Kh6%fUTxKO60{KZWPjcHWTSO>lXSUzL`oK(;(Sr zR-^5%Q8d15H`O2Lg!O|N#PLHc`oqkmvu8hq?T_QoK>bpx{dow&bPNO?B@uMRSyk|mGN2OFYkGagXmYZR_sPoKqhm&OJ z&2?(3`$4pxKrLIoVC(~WBE z<&pMsjQ#d@p|V5YxbgdXQ23EpWL#Q;Txkq9+`JbRDcolBM--vx9u+urXgXdb4#b1` zyIFbu{d#Md4+!5x(d7$e#cd;aeud9rD$>#r{{(^fUBq1R!rE=Le#dnh6ZnOu-;@y_ z9kp8Q_{K%->5@rnv}VzUujg6+&F1iGl_EVba~9e)Hy{3NGo}f9^>NbqGw7|$XBJ#l zv6reIHv4#zRez@qbz8qdR+cYBtv$hReNn>AJ)jKf#Q}8n+W046KLV*AcMI(1<=bzKinrNunTqs`AJ7T8L=7 zN~iAc%YNng196)n;&g)6P6z2YNMWnN4fZJwxieXo; zV3l62U=yV*pw#d^1euuAvh6eIcaEjMZC+BdH)85_jrY8`D)Ov*7qmi@z^+pE1TqvL z{%=t>o!>Btb`4emE|y~c-1Z{BcZx{Jjv{j0WFq&}Pai&5hf>yO7td{&L8ku~3vRQX zaQ#CekasDQI&51+g>LIm^ujHuaH|Cv&8Bo+@GzAwzDVDxyaSh$_h{aqemddoY3jT0 z3hn=PmL{JhRA*`l5~e+;mu*Ifx7D`N7_Am|$IUvylZPhkCrJWYH@c}rTe8iePm`F6 zS#4}`N&x+OAWc;KB@wWSD{YX`q{|~?IjeCcB3}AQj>n~tldDFdx+86Dt1y+&@Ns0> z3s2-fdNz9FHxtR)&O=P~7kK1w1KRuEl564%(2ynvQr+7?|D6Tx9DV_vw~tkZ44K2l z>cx;PzloKI@DNa+B35Fr61f;0MoSba?D&~Pb>^2*gGKzo8K}j!lu&4Uauk-{YJ+Ht z`;5x6RHl<>lq@LyUa8~xoT=!pX67l4GCA-uKYr5r|LrWizav{FrZUUO`vWDcF@>#60mUu1hvSk2@KC>k^`F(%R14yqCfzw#0m z_;?>b5A}xpIup|1?ZGMf<-w>O$>8Is!R|77!`*x!!CZ4FCbvH|a$bpYMC11i=2vMy zyRq;QjNK+L1twRJ3fmrsX^L#HA2TMBV?dQEH|t%Y808#F7L zLC5z8B;?y}kU3Nj(Lpca&i$#dm3OZiP7Wi|4u^>490yWlBm=L!6Up4m{JcVUB^>XT zgm4)TC|VW(KekSXD(lM1_i8st#O7Yo({hf9WSWRscM`PSvxkl>7iQD~X^=e_2TT95 zq-mTnk^FE9te;81oLECPzsM5UgmYk#slwJw`OVC|)JDQ?SBw5lt>kj@?@o4IU(O9B zyAZb#WefyQ5?p=tn&i57kb`HXSSMixqgNkFj-V1ES1C!_r>jhluaTz(yyNX8b>w$8 zZnkEWT1a!Xnmu;x<8{t2{gIoh&F>SwLj2zTlv~ zEnMtR0L_VwOuO+>Rr#-zpE5n>O09=A2=m? z_T7k`{agq+t@3=1CxFO|8A0-i;$(7`$!=QAc1EW&PVQAqb{z;D zb`6MfEjyU}lc!1P2@BA4nhkRMzH@t%r-G&n3%R}=&;A;1W7Z%Avlr^oVgJSSZKq1* zv9r-6@?j{Xysd=a$+e>SpVgR#yv;B1KZag1+{u*$vdBB2d8ar`nK5_W?fR(y;h868N$1We& zDcW#kE_t3SN2~9BOJ`xAZb(frn}I` z?Zc3t@*h>oQKN;CTWI3iW_nh1km{^m4_l|MqsKFk(x4?QUC(Gz|Kv&3;B5?;@UFib z&oiO>-a1ysUzzI0zJdA+(V$zf1XNGUA;UIjPJg=#88XqOR<#OLW&b4l$7mM>J-7fv zB?|CS?0$duj$q!0268*W&;k&|nRC+Yo#;6HK)NZ#lor&l$jvuF6d{M9Smp2A=> zs?QCkl_kQy{pX>Dt7WG@5W$5NhoSYmGgUo58*bI*feSN%_!PR*aX#CbsCrJ|Q!tsV zIQ1P$0s%~SD$v$NjIF%Y`KI^p}uQm%&2242^$Adw-L*a%q*^u*)= zm-p>%xz%3r`9goz=qQ_pMdJ}c7Ws)_g*twnWu0)8u#95+) zCW;-|Zxe%HfW^jk}Pf+QZ zS1W5eIFMG2VS;xbrOL8y#9vdMq=aI;{CVj3A&Ig`BWu%wpRA+)V@Jhe!^0#l^!!Lqtu(EIQYHX8ywa5kV}9*e2w zCH_to;|v3~qiE_|N2+?vk_zZgSh~C!>Ux#wz!U@8IyjnM99c_VepZ9;2kYQ@emAka zFTw9xE|BX>NYT4&Gj2Am zVLfT?c1#oErqBf=G-&IoJ5Y5{jV8v8qVD0D^vBC!`k`?abRNA59cNZki>LBbX^blg zf3cj|m*<9@mfk`c_9bjK&+U7`=wp?<)%bDVVeGu+27B#M9y)in8imWKkg`#hczxYX zbVA)31x-ds)95a@oP={aW1^5xt(fe8aRvLk$O?b)To1PjcOtt$8~QuXk#ph+5GvP7 zBKf&Yv(zNIVoD0MiXW2NkQjRGWdl8^JyqQ3Y#?5AP)v6%d_i?b-lXwG>*y*PL%WA^ z=zi-Xv}QV zx}qg?^x{;yfALfJ+T}>RlGf6N>pnxY(Pg+bv!6@eR7~}XvuR=CQF=LO4pqL90DIQR zfVovC49E--d%gP*Ft!KuZ%hZh!zQq2#S zu<9$1k?3usK&n_oa#Nq<(2Z?e-LWmOvo5&3uhM$gV@LQq)%zo{KQo#f;`dUk-X+*=Xhi0_P>p6%Go^c1Fh4gOW zZ|L|mntqgurPVt` zcW8g)UTU?*o^ExT1nas&K+E7FniAxKJLjB&#ly{H>x9kt*njJA%(gdfK6FP6V68;@CMrh~p3Psvp#4F5iVb8uKY#^(K zkNzA+eV-q)ecA@-Pvbmvac%~myLyHg_t99@y&uguvlvy5C_~b>)L?_xKN$L$$cplo zfS(y5sYn&u6*sU`{cGXG%2Vi2<#DX1_J?!O#K^{HHt!UdAyJp2xb*r_Y~YCxtm{D^ zFuklutTyjpl~EL?`=&$82N||~+-M|z7Y6RDqR^6*QRwwedGciTYS=Nt9waKfNCo{| z(L1iHqN7fRc2_?pQ~xB92#2Xi==M5zF!>zOR~_JrH~WGtpG6zkRL^}~YbZE9 zHkr=PP^51S-wDR`RB~&s2hh@vr*M}Ih9&RkkwsmO)Xm79em=X9eB5Xx*#GGW?K?e= zwvPWmxR7zg;kOaJmEQq5!#t~_?xtvC`zzQXlO;;qGauv@+mb%Xv`YPt@?=J%x|hL7sl9M_*#=l~<}Mk^P^a(pIv{F}JGgGqf)0b7G=(IB zgCHO5p9{d(^Cq1)Yd+nXu1PnII1Ep^6wr+-Ay%oWMza=oa5;a&@k`~iI6aLc@4i-( zf17&9mPAUN(Zl9 zr6GxWljSI=3k(f}k%5rMK!}cE?kES6N9Q`$?k)Bz$mMV|+68WWjkUec<=*NOp z^pRbUAYgMh*LH9pSeI$it4#{r>(^04CjT~s$BIDlw1H^0UIgr!eGMitOQFjA4kUFJ zLg=^}64NLNUA^0Bfd6Bjhq#vbWt@bI9tKF^!ZTDOg>h@(cgQ$kiTpNtLgf7=^m={) zsA}90J=e}<3cR(!(uDW4_KYS)22aUeo_jFUkAJp*T?-dp<-!fKR8W2K0{%F>h1>d* zY1{3wv@3ly6-*O}#x`DH23IGtdHaT#qE)SxJF2zV4=+C1{>#@TRth<+Uqd&$tNjIg zSzn2rBk#=f9Zj+JM}+a&Iq2uZ1t>}W4O(LDg13kZ@c%gb@@TB0_g^6yDwKpqQ-;Vq zyl3x6MM=_}Xp*E!isn+uJVr<;G8dI0D#LsBL!{C~MbS*7=12og_k4eU-ut_E-Fx4) z&L94<_S)~e&wifgGkr4|OdlsW(v5y2Xs!V-I(OWHDjYaMhVR`;YqPEB>aRZZUHN_L z>QqAgOmC16X2S5QG?xaxJ4l~KThPRXGE|{Kk;YH8q+CHI`B&LZI)3}nBC9Gg&0-B_ z=xs-P&6-J)*qa;a+9~nRoQ=c5`Nw8v#mz{X-+Z&QF_8*a0mcv>8b|d!7 z3kbxlB3!;6Np{U7GWNBCmvI4kJmr;W*#{%KYojJfpivcKLoc#Hp`T3CG$a!X8b!e; z=krd#hLL376mFkJEFU6KB$n`*Msjw`(V`d4QNO_d*nCx=P7>}p{|o21 z-2%br^lKy1(U(EqnmiE)xgM?8h}g^5Uk@O@1A?lfb_s?m zUyS%V|2VRuIh>nVyorzc(!ek25xjR3tN4@q()g4KLxnujk*w4D3IFbc2~*Z`XTgTv z>~!jIR+6aAT;fWaolCEWyJh(M0LmW z22Om!m1OPrB`aSAh`Wvsq#tq{xPTfLPCjEHH+s){@#0lRtFC>d+Zp}@|SU4hmVrS zd#4c1`)A10%4qV)`!?MmGN73mQdDy7cTPWQR+aw2pVb*hCB?&Zw}`E*R#%tzFBjXb z%@%)cNf7^KKH`aU{KWEWRK$zdnTrpeu;aQsWavhjWa17+VyC~8`QqokE;E{ z^!bv7itN*IUG^+}ef9JCgTx;DllUhy&hzHWn=9W=4ywL#GKLQu7R>+iY_#r5er+4G zaGlsAON&pMAy55y59*!1i^hH0Pt!(+(2o+nR6bRWs+ugONwR`Byxd)?6Iwu%Pc;$u z@^C8i;w`Ow1@q+??byUV869OcJ?mNJ zm1WFyr@-Gfwq$dsmGKrdhO#{Nvi19u9M=v+O^zxtq@!5G*xbj z#8bX$Y=Wrqmq&Gd@*wVjz!kPm-N*e-wX%J!Z$Y-+G-B;4nzU-BJ2lpgrxy+heh8nP z_`gcUq~!Q4x`DHx{qbG_a zY$`59eAKU&nWVuFdKpK8<7D{Iw$b7*A@zi;(5GjW4EQ;kGW@*ZcSYArzj5C+^Y~Yd zI((0VH&J_ZPzaG2!JhV+utMt0UwT%;n?9S*jMHbZ@s)=ehNUs7h!t$g$2{@tAY0aH zxR2RzgP6Z#nfTG*nJi68iM=~Ko44B>Db||(o)@nb_~Ba3{Hf#yUjOtEey5xXzhL4A z@z?-o-bv0~Z1(b)pzGZrCOu1el|9vB@r`(~-nbO;qV{vuZ^HE0e2-SLvaN@cJn)8B z@_o;*m-8Xx?TYw*>3m+>VjSo8zLC$eT*I{L()jP&KX4Cjf8eyGV{AhLCRqkKzZ6$r zR3II)&nuohkF|AfUPf#y+DMm!WOYuiZdKjX9#W$jMb=n4iY^_s5*eB+vMmW&Vz2xa z;*g?Y)eqGl@a})7a@9&|yvOmc)jubjaD)4EML!f`gt;@Tj5Lns+{qq(@F3(Dj;j}s zuan}Vd_IXR?`*A>kD13yfS2v4c}GMa)L3=?<5cl6dl@b&fmUx=mQ?Nd*ou#TeTCBq zvaF7XLT=T{K;9=novtW8O|CUF;XG~R&Z$mhH)Ccn;^W85-OJ@{X&Lvm{R#Q1a)PY+ z^nlc!R%ag34g4aB9)4!74L@elckZph5pLh^3)R0J>%@&!*GYW$>+0_hOF8N-%*V?z z#4GFe@R|4c>NXtkCS|eNkD=o830`8)qQRtpyBnXp`Vtp+ z*N!|IFo{R&(L{Fnn(B7_qh!&dJ=GCLFT~}p#iIPWG%mMKk_?_Oh2u7?;y>!vig(JD zi0|(|B3>_B$nBgHCjQSpPQ0)c8iif^&+~uUN6Y_yA1xj2P5+^mPZPiLpAEanw?hlr_5*AAMY2~g#rzxpt8X@UXu_uI zO#U=$+*Uy*w?*<&JEc)6?-RTEiDTpEU*s}E=5n_rl*yg%6)bn~R=z3Cko#S+gL!7H z7DxRM@}DxAxgn$Hld3)X#Cv`t7Zf^;AM2jS9sW3!xTKbFrzI}&5hW$!?Z;(ts)e9u z6LPQSHost2x1N*z8JVQqVKq1U_Ze1M_=LNIHiTS=;E$h|;gqA0U%W+)S6X(k>P$=o z=cy=9pV*vZfhLJ;=-7dz_3Q?&FF}C}bI9i1XNy=)SC6Rca1cL0H=UI|IK!_!QO9Lw zt8&@bQkeUl3!LioJg#n4Aus!WB`zV7MEXQAzwOfuu2k@aEF7B8p3NOZM(0@)$_$vN zK~uH+>Ow-EpX0ua9mDN0@8b49m`KFGe7SFLt;JK+{YY<~mgsBaMb3J55=rQtMV{wr z@V@O;%<-ruF6~U=r@KqBr+;*b+Xffbu9e1loZ3&etXW2s(TSXm^&{uXZ}6w;!-acY znyNPBgG-@k8GR?E=Mw7Jmc>Z8uiAHtR4HAx90n4=@%Q$s>XqK-DnX14){Y1S7var z@5;EBp8`0w)w{_)r@f>!VGsG@+h|)Ue~avwbS6quJ-DxBPw6Qcdv40C3XXF-BrZT> z(b!O-pS0aXhPxIBzKcUyu5AiGyT_huUVV@#`JE6C{d$k6Gz=%s?GEHhnl*{M5hJoZ zm`$4O_gJak>GETA#`Mzc|>u zTs)Vv*Nmm>8oIdC>>Tr){Dhe9o=L80>Qbj6Z<(LKuh4Hf$Cd6qNx!)C3q0*E{+Qi* z^8J=4Gjdj;{p*(!|B58eu)BqMEO4abL%wk>bIghV^%1bRljrWbH<7n*|A@DKujGP8 z^i?HE3tWJ1C)SxWhU~d04H5?zLh6~VWH583#d5~<;G4v1uhTEZoNX&N#C|wc=aSg% zYJXyCd7gYacY>Y|S|PSySxdHC#e%gRQ-!8+r9~6(5$8AWE5U0xm(q7Q;C>~Q z3lR1}}=;}moZpZlL zROwO^J8K6*AIf-Nw|G(VqAl#vpAykLxeIjU>bLwxDI5OF!fAZeWqba&{|2VBAcG7{ zf6go2I7!^5>#)0d!Axn1B*>U0adxwoaT)Jj1P?bEqP|y(%edVx%uE-NOn$ler<=Pt zXyL2s*Bf7mH)s3t6P%0qjSG^=1q&VO<~DkL;(qZ{$$RAf zr{}zXOcQnbe4Wd8@aE?A1n{k6TFJhE1>E6fX86u`1r3!xOUB-Q$c9BcB_Tdi_~C^N z-LO{?*9{WpCZT&t)XOx^C(D*9Npy>Mttw_qO`lS3-6Sqz(I~FiCy_bVCoQZtjcsXt!_RZ_6Xgd8xgpvyq~Ts7kq}RwC_-0!Zi45H@|RK1Tfg%`~oTBw3%aky10lBvaNl5(X`9IYWM*n>sVdZGA|9@Ra zbQW)6+Rn~=d0DEce%c9=5;~t;S}>Sjp{hzp>uumZj~0`r=?DL7T}@JM6ZD1J=0DH> zeqAFZM*O$yGIz9B{$H0h=c5IR+p=+it|L63ZVeB&XTbLZxommqdKf%u6@33}O24d1 zqK*&cYdYGGpkLrI++}qM)raQNOp6itpiLL=KUoDET5aL(@o#+foC?5sdm#39B3vDB zf!g}_aKh9Ks!-)ZZ8By+^`dmrVm+9J`j?}DTqOGSOduKNr7XH^HQmcu)4AzUYDPRh^0k*x|IyXR@Kn)^JehN4V>8N`a3jzZWK%|ItshJ z6+!dyBYJiJKl)5z4|O}g5cDK7V0-6bxNW`(7hKp0PA!+|>*Z-w(R>|9u33gkD8<|t zMAN`lH`*^Z0lhwkVD+zRIzcBM@m?@<@Lh~8fqT(t!B#YOGDVx01M$>{BBI~g#`m5~ zBIlo{(L$pWY;yK6p}g_u{&~Kn;Sa*;MdvY~zi|RH>e~kAZoX!Ldma&O<&|_y`((Un z`-^!d2s~TaP_Pc%1A$TS_^FpL~|R%}zvBzr)PA)Q@gGn+9o_6)E$MIsXf7%ofeCsGovx6B3BeXvxqLrTmVi#EMSm_4rE6( z!Px{3R-8)(?J08Nw>o*SK*0%O=7rLW2j-x9@LBfs@nIa$R>#~rT4?d)Sk$sV%~mVw zq20cFpuc65o$ra)kmOVer2z0#ox`?^lF*x3$kt|0#b0-pqjuIVJazdZ9sDYemh8&n z`afIXCmSnJ5N7vZYrRoI=*%7~E70kW&Qpgb4Kyo|#?HDc%v-}7d+sWO(>Z_a=$?fS zW}e32;zf8~?-%>GWga>NJi~$%8`$l30zUT*z*jRmSc%~*cyKNseq@Uv=gC++=N64g z;bpkf_a*Okw}>fq+0tW+6v-mNe?MsPSlIL@4^M6z3J3OBGxarx*r@B}XgFFDPycv? zGuo24%8Wy}WOf)nd=iKK6+9Ns;DnjV5T+ev1!YG+h!5Ds@gttT$J+Hf*?i+Vxa*q$ z%GtAU(lZez^jsq$N#lu|nUG<)AqTd9A$ai9M>crZ4)m#PK+Rz?us&PW&U{KM#igV1 z>3G45z$cql>-aM7$Z70|h+xj#Xy|dQWzGNM*znnN!LHZ}K2__&Y0oa$VxnN@*guU* z-Z>#?!S16}Z5%kwi>F=JUb8giKrpk?f{6i1(4<|=-k6n9x$#agVIhZC6;s*p91R#V zZ6Wx_ECfvZ`#A1es7A9kPsiBNJDBm41a^0f6Wq(w1&^gZIAUNXIJ5H*{G9Nq~ELlt*zUMIC zcRSzOG#n1)vI3o+YUmL@iHN(;LdN#SdRQ;Ce!ne9l9_AHm39J)g~L-{&o*se+Ag=QNrGc-;*Eb zUNb`}1$hoAg)*W51i;A+&J2a3uKH4~F+{ zV<2my11j&XgQbUufbyK_a8Dx)Dm6^;u`x{yw<8cYHG=!Njd=n;KV zC*dd`66VFA+($cMrqXYgv!C^XufM&|q}W%>7`G5NwhbQl>R=E_~@(NlsR$V`S^ zILZTR7O4~F%BhrrY|Y_Hx=HgM{5VQ;JO{;Qd^!gB_g z^%YRVIVLzG`VgGDVoqC+3BNbJf#_X143vZyd+ow3Ty|wGC?+Pu;$1K3Ow%~%np?m| z$jal6it}_#kt)l!)&h+OQV85|9Bv-Pt9)#x=1Xcx`0oqYwDSwEo+Jw|v-9C6od74R zE}>g=Br5owqkFgN!K_i=nAEFHLjS!A##ZbC+XpIWZf<~*Gd6>qof-JWj>Tj8?QFnu zXL3s;n{{u$L4%X-@!$3igZa9Oc6-)H(w0?>E?B-3^cED-@vp*ZMOqsBtFDKibNlG@ z7ac5b+(vL+u0RaB*E1Ez@0^M7o}8o5g`k>-XY-HJuXT#h|9c@To3b4|)fCx|`9btb zmlEClN=RQinn6ACi$U2i336a3%)Mm^R=3AMlFE3<%$0^E*;`;t_Dq9>5w_ zhuVo+;Pmh=1*1w8ayo{i=I+1P_FF= z&E?k9;`(27ibE0~WZ=f48@pl4l@WI3n`^0aQv{SfTnJ0Ur^0345JQJ$v5N`QXpCT* zl|I!RM@pB`!+VsW@zx9Kyr+c!SSA5R8b`$ndWslQrGmGgx?%IF>tw*FPs}3L9_?~c z#S`ll=%pAn{2>3GwbNJZdFDZu*JBM*6BF^nk}{m=SBXsv-LXk+JnGq{V$^$zv$~|n z%%%I;i!0;M?`}Jt{x1>le4B|uZF|t=aij44`GNF_3vf{FEUYk>6h!M@utcvI%+5r^ zoSjWHZLJPOsIJB4dRM;6RRfeFE7*}G9-QOlb?oDxBg9yu5R|;D*t{HhFuXh%6!dLC zH(&}sbwe0Q-LxInPRIb=7-vZPIRlmXN+R?31UTuHv2{i_sHKoYan{!h6Xj&Et#1Mx zY^tZ0u}4|cCKbe#<=C;>gns+^jP$1z2p-bENz!;dd_L2c>0XS7-Sdjr$bo&dQM#0S zyZSipjPwPid5Ul~R~MFq&!G!57t)B&?#yJ_cv`9dgDSjw#QCdF#i{kV5PLF*jhZtB zJ%9AGk0FG=8EH??y{rFT^lQ(0@dBhSBc>BEY>x#_nD~DBZ)UVUh7(9*ti}TCkjlZe z-xV?KVjt5OXNP`&L(zDV0*)5`SNO>c=Kc8zH5*xsIJ?s(Zczfa)T@%s-?E$e>R*BH zy(j34MW1Q)m`^OzGYX;#%D}`Vl}20qrWSi^1khwBwEOIV&7wync$k+a^WVMkLdT zk^VTmU>Ib-(u3}n`7}IiKStcl5dsD%EmDz!nDDh9ra+|aA^LdiCRn^S7zQ63221skbxl%*yVs4#-QOyhcrX{-JYUmQ*DKhp zg$wA|95ZkypTN*7gyyGCg^3NfakytHCd^xp%@h9NpigP|u6iC!c9AAqy!7zc$9XJx zkX%iNtB768w}UTtf>@5Lqd5B`4^n{RzJ(~j%Yp?lz6I_?Gqe%oCbSf~U(>j~%# zScQh$+G+5m!}PMNJMQt&!Q(;MOy^=duB$D>rn?J(-N|5cg7et3Q3-hQvrcc@v@D*P&(pYob!?oVJSn5=rp$M1Vf{Q|ebkUz4pN%j~W<2fDA+_MHRHQ`Vt8*?-ko}Ta|%hm70+xth+>}CP8EZBsdVNK-J z@iOL8Y>knNPt%|JiO_OH4GVIoFptgmK;1wLk7_(2N7)Na4lG3LbrG=aX(l`=y+Zxh zGOnb!m7GwG!J%nc)Mqb&gXjOT!eRI61%q!mW#gclnjK%bsuUlV`tKm_xiS#PYz)V7 z%XPq|-wsvhoP<)n<#;ipoX%7^L2d3Ai)FkxT2Z)x-kKH*J0-l~ZJ?OiJpRCHJUoQV zixqHVz+$pqv7fHLEJuGAo7ioe{hfXi`Y*#R7n$eFo9y!VENHe9F}YA}3_pfgS|5%! z6I1aSWT9q7C{DT?jC-BGqfbW}W`1)A*JBio#vfx|wh2tnV>T>VdW4+cU&eNht6~=) zp2Ad}k5naNINAQffL~M>jx&mr`9bDO(er%?TRZwXo|GD5{I6x=+Q-epMR&++7P3jpl<{x<1vj$YfeOE~8SN5~|0p zg+VdrQJiy|fAhwl53l{p3OBwdTA$XjLBHCld87e)8*TvGSyFb0ws2_XXq@{Xp1Dd= zRI9Lo{>B#Q9bX5T3x=V^#zs8%FAXiOd4grYJ*c%zW2K?ZX!%_Y)3i^)j*1uHb7d#Y z-&>5+Gmhe>lSeSsR*nScNaJ(KFdSObZ7odVv~t z?}*0ww8czbBMMHwvjm-cSu8eSE%Yq%f#L`0)G9j!SLrQ<<*w17mr(172S{BCf~q%vVUJuK z)@n<^!gWhwOs6O8+L-`v)BaKgTVsCc`)K$WHI7=R7VtMaa=>dxCS3M)rd7#J)Oq|R zvc{W&Q$QxI?+AoXr_aH$6O|y79tmsIFN1TMCGDBeF47Ior8B(_v%w*E;AYtZFczjF zL$|D@Wg|LZ!Qt1i;TDI=t5ulgkJTXc%}B_bz0R~F^kDErA6VofW#<#3ggcgA6w6$V zpnt1(!?|cxc1dp~Dku)D*>bD`zos3)r&qpUnJa~3o#J8X1YTnzwVILGuu^+vc zXkzRXEKFX9?n>9uTy+G~`7?v5EX%@u6|=DZVji;)SJB3Ze!f9Q38agf$z-KDsI;;e z?@DH|F}v^bx*q@7i$fRi_2dyaIcEgw{!(XG%G2@EvNoEvbP!J5^?{`Zgk$nd1LBx0 zhk0QZeCFgrF#Ht;Z=WU;gT_3#(y0U?kK@^jay?usYltqj86c7#2)&D2s3={~NG}Jx z-?)rl5WW|-*4bgMoHp>YB&qGjY49g-A#B}rmep=(fcB^$SUdMewQRs*-XW%yh1LzX zvykH;F=`>cTwX%al0f+3FR(8?9L&F0Vu6v6i#l1rk1`YBsHp@@sCyv3p%h8I#CNEi zelzn~ABGd+b?Avl_RK-1m9dkp^x4%QI2X$n zoP&s-KGIx-^u=o(NQkb2=(q1dqx%rC{jd3u7ck8zRm8v`b>Xkt;c7BZ9zXv z5hog&;xU~fa;vl}MtG8%x*ilS776qG5jRIPl3_+KtqnYXk zd=`EIhChp-rhEL@qp~n8&0ByHnlEADPEFe98Op{oefHs12b*y3GSEOr{OmLk&$Nxk z?0215trLpM-`3!s40lWqI7m&rgR#x!ytuSx0oyKDg?WV)cz()AoEe+XUYQ-nNP|I` z-82c;SRcl1=?k&;XD3_nAcQ|qEYw^Y18~N^si@nZ2FKPF!pZJP@OWy9W_LsBfXWlp zZCNilkaUAgjbJS3%XIiPN)j99gh26-Kv3>*!qeTQoNi|TZ7e6O(k}!qb>y(J6lH!$ z!35?TZb6?G-(YtQP2n*a2Oa~I@ngIWZ?6-J$*L8htJi!#9eV-{E&q~ag{7PKlW zLXU+nsIs3M4shI$DJ>J&+uLuk;oJd~ib$#+QTUrg-1vw)o+M*@y&*UqPyHZ^~LQGmQoB%6>NVm<7z{G*NwDG;FgP zjYBQout_T$;k9KqJarT1bz#Qzb9gAMSTMlu>0>3k9Ww*qi~S9zV@e??;vUV)c+AW_ zyx@w=Bv>tL&whj(!WZG}8m!Kyoy{$*fosJJf2DES{5|xg&PiVX*+ZN^eG`taH~|C8 zis+dgYhe0b6Z8uk0cj7%QMXs4z=bY?{Aw+1owC8ks?YpF2xVv8wBXIQN%Yr>2{5&1SuWWE&NUHNJ|m#C>oj=lw}I*6ERe}DO1lxHwi18mt*N) zTbi^smAO2##f~T=lzqCHj<&kY_db?}Tr-}!U$bE+@At7R90tQSD^QP{qwvYIRw^ky z8FQqo_`5^@u$vjL#b0~lgbZ*ghz`8LZ;98VjZ(kCOz^1jRC~xqaxLuf4V9V+(J>fj zJcaOy^5Tbgbn$S{9}>Ve;CqQ5>_%Y?d=xoSMF&rmRvrysw1z`|lrbKe^O~#)2qubxTiX}JxT zyvykd&I~gT7r=(0!y)ydADiKLmzn?Tqpjv9=wURNJ-z#ft{3XuNly>Z(dOE?EV-V& zZRur7)i0qd{K|j)kwXVf0_IHBe%UZOgMEF?KJ!7-x+(S$4R)xmSEmz`<&F zwbGm&%fWsBb6S_DK)a|qTXMw|>yB&({q;_uU6zPt5j!#C-~hO0Zwq53>Of|A2!8d9 z<`o7ivmw_jVBeoWeq>%b+x7M^cC849i;X8hb=xsirwYCD)$v_w51^5z zU2@MP)J(d^RvWak)x10WFcNzIIm!^RKOuz)-J{=yA5jGtbIV`S3t`GE&3;M z4$C+e&2l;~_P`iP9W*T%gs$_Z zQ2ksv@UTcg{c1sUPk*|SPC8-$E*TJ&(j{&9iD^YpP1eA?EiQ=@OxLKHWPf6PX zs-NSat@S4x>~DgyK4;n1k+0YY0}dr)Ptc3uGH~AT5?mR(g${R(#b~Q8UhPH_xzV3T zk4c)qefw@&xh)Ig#yiqgXQimewPDn_ESH(;RI;?LdyLHwXX6fKhzF`}1z&C+wm3$y zo+4E^m@C53DaSCxTZwDh975f79Z1MJU(tlkUzz647W(yHDs`Gw$h%Fv&PKiK;%5$? zgsqN=u=3DMi0U7P-!dbx?qm>ZwOwP*VfuC$b&iv7OQNl}_36mb9IPs0&}=dv-}fm~ z4GEq|m3pDWXe*p%7J}kPb>4KUJx2Efv`jt$I-9Iu^!+33;P379o$pvWMQ;bF?wbaE zw+CbT&@AA`Yf#nUTVY9IE0MSEVcu&u!3W(G3{0$|r^ZF%o)Okyf20r2C7lM7s~-3w z=LTD_G!B2;4XBPD?Z}TZTT3C=0|%RbXNKj3WOz@5Ty`AJyf9`-GI}V|`o?$}MH2Yt zInCcNoehr5q&mfVbpD((I6vnp?Kv-vZq2R``ce}tzdFF7g@e)mfCvYvoW=D&Cc*B! zF5droKRq4e0Hs%M)9B;%Snot}Y~?oMvMmYx)W*OZOGKEnS$|KA~p%Q43sT zcO09WBk)h1E^6i8M>&ZkK6K>?)^h$BJQ*N@Pw_Eq`1JdN2iFv;m@x?-h>pYh^+({2 zoD0?6^{F5X?Ni6eQIbg z@X~5lLQ_s6)J7hFeX5=0qH!b}^j#D5{_4Zp2v>M=@G5&^QG-VvCeX_@VJPdS1SW?K zxKYc`(M+Sm(CgR6?Dt*}ys$QcsPO`u9q^A8t+9mQy_K{{GYJlTw86O8Ga#McNFRN$ zV6q=Az&idm{Tc9q1>B7jaNV6~QK*HR3@hpTRb@2sY!zKMw1RbAQxOt#N=S)C3A_H~ zE*s~0hF8@{!rk!`VS$e^_xAk?FxXZKB?W_FS^s#LsPha^NeRZJo59NDXk0d&foSU{{&01aDyO__#F+GbTXm4I~@FbU0`;S6fTn*MCTP9p!z30AkpUyr2QRC=Ke{< zcMt68{-yIFb^aUJxA_9}PJ4z@op)Hbn>_Y*>}Soj!)eMBIa=uOnYH)1LFuGmTDDOd z5@%+zeY-kXZc`p=e9XhB!5YvwBM3_>Bbju12reHHft@qG!PdhLTS6zXV(|+;YUxcH z{yh-)EjUbfr^tY$fPv+R=78Fv47$?$C^W{N1(k7ekoeCKSJ)k*g1{U6W824BTzyifo+*O>fsFeeKW4q)cfLiX2#X`>?o0KAWsw z?24^>#)92SB{=mkg|Y$bq5LkAiq+<{zh@XUId6d8MJ4b}^|XK`n?t7GPz-8+0DJOM z!N)ENj%u2LUgaXl4}VH6+FZ$33o-oYkAQ?hvmx)=0T{};(FH;7@aMx~Sh@HtQCvgF zb&VN#=fDlJ6mp>KQjv_}#tYO{8mS!?wXV zaR`TH0V^Sl>4WQ<%}{(?z#3yFfM?Jfo7jvn&S;gqouc_Qm~ki)-tT$O`odN4@#=^0 zC(?ywZVDs)lNE7AcmnPf7vtzno6#ozJgysgo?R;shis1pP_Fp}#u!?o)~{30_|**h z|J76b2~+3{!FRL$LoUe2hr`rksSweA8$_FCurpCA$aa=9vHU)|TImdcSOafNZDNQ1 z@wi;Ggk6@HLw(i_WU?J=aYf=hTzsL9$?d(2b~Txh>ClVwRaW4OgN?MrV;5 zSW4+^Fvx!Z{aQIVY_Kx!kCVsm6Kvt5^a2QPe#Le-oWv5XAZU_}g^bmD#P6#qPWUkt zhMp>;DUaRg>8LyS%Vis@$UV;){QAVdFv`U>SKH`@kZ-!Wr41XO4#O){cMf7B)s@k!Nhxesm`Li z?90Jw7I~XbQfGyuds!E`pJo-r$mW4j(*z zL4RmpW|2cpL9b$vU1M4=dD2u$CB!P=7N8BYn!htX&H;*wHZz}B7ijX_vuwj_GnjDh z2wk^X4b5(Q6WPZupyo9Wo8J3?9|j}3RQ$T zrGIBC(cAoh?(iA}lMIzWeOM055B@-3$83awTW=%dx5M9-VEDb)0G4^ELvL$^&_4_L zGx2ix<<2OWwZ@lHtbF!PMG{?H!(sNTGL59jCBvVz>0b^z{IUvko+zXD|_VgMxo~+ecB3G@fqNJ&uMn_=2E6v)j+*3 z)Uv6rW8r7}RGM};9IAXb;fzW>IYPs?JUq4{4(0E> zWK-8^q50orELRJs*XP8tY&Qd8{`ZN$((#b*c_6~A>7zl_Mw>dWDnVr<3AEA;!h)OM zsPUn6YOwYQHjQk>*6`5~es&1NC`hyOE=SPalw$XVaN+Ej;d6<2s!7bT@zWnZVev6) z@OdW|emnvm*GIG89;>kCmNA=sU;+kW8SFY6M^`FIW0>7w95Z${+F0x0CfUWzqSpnF zq-x@#Gc*2EC$X>TVeI~@$xzvOj$Rp1PhX!KMxRISgnz#RAZ3dIKK`x*k4O;Jc)1rY zJ1N>V53QsPgYsDGUxk{@Zx*n-R*#`6e+oFc-N3IepJT$~+iX+LeaIT<0Xv)3YcAcH zh>E91gYSYdWc%}c=vWerTfM@l^aVp+TG0IegA|ssNT0OdUynLpdpgHR2c*({e%sOxb`#i6cNWSug?;T30r!hZpnF*s?UwP!?+({!uiy{cIqMXCW;GY*pD@QBFK^yx^*JaIykf>? z3K^m61wJ}B!pE;SNx+~%prvF_$IPAz3lh&lR_l9s`?n4zNabSv?E-r0QW_k&+rmdK z-37f3DfoBM0<1k50OQ`iVhxrVw4`PxCaj&r3gfcb#P|vfsJwz5S7hPNHXjIi;fl%& zWBHV0lbB*;Eft-fh~u68;YPI3-`masw=?Hh)}UoraoQcDf&{$6Zxe+~9ePb?BJ^EP zW9CC%k}J;Z+0Y_sRJ(JHwv=zBJu0to+!uVm>fMQGXlmR{JFgV(NDqG_!)Exvu7#%VSa?aPP6 zchuuq<#jFCkzt9--{aBPb0F}S<)C!x@yhmp3ur@14LkGh3@m-o!z;Yj!q{0FpnTXC zzAtWNd%Ctmk=IsQeEt(lbZuqNSQt17HF@BOINUId;pUA#bpF|m)KT7;R2HrTCleps z)P8}wl^VjvGlA%;B8U19ve<5+?prtJ9eZpg$EK(5V@F36($$r5Y+Sb|7CH%MHPHzj zI*wqEcn{t*ZWiAoKOlb13iNtVF1B54fv1;@fdZ!ib{m#Svt_%URzDPm<_&Sz(Lo^r z#|yQH&yumYohiYkqu*(m^Z=axd(;hA#t3hHt^A0EwYV)uTnXJtd@0@`R{xmXp) zY%ZcBbVj0U^;N1NdxkD-c*x$U0k$(mTzD%8m*~hrMfOm3rlFG#J9D01HWmDFp@W|H zngRUu1ZWN3f}d6-qtb0>w7hs6YKIK7b3Xrxnis0#v4K(a&`x6rDL4O64wS{_H7 zy@T&3@55_(DIjpC$Wbl|pRNkP?a~YJtkxx_W;z6xcL7ry?uw5BkD*h+NZJzRg#klP z;%>jJ&!^8A+tOjb&i=>uS zmVEw~au&31BHa2PguQ1xmw)*GE!m~4j3P-!h0-uS=W$qNG^BxVq(p;|hN2P?B3t%| zq%BffobN+JN~Iyv&{CArR#Y1Q^LOKS>-t~Ub;H9wpYw70oa1;Muh;YSVk*jtRe+nj z1ZHVFQlzOber^}u&r-*+bKRPJx}r9EUsWf`{nk`;N{pfh32YSG6);@bqqOdRLvPcj zLhXe_v};{QawF7PlQ5qqmF&i?zvQj{?DasIbX{Sm^%JF@b5`%a3_(%RFf#srf&aO) z8fBl}$BL?0e0^sfscf7Gk1mX4(Mg)Lm~#1fQ+Lw((w!8pe3pA%P|p@hS(2XeWKKap z2P8z_*|2$E;c8YYD!nqm74u?n|I!=GY-Blpey6|>D+@)B)tlgm#vATH;36F9UklyF z2J|i~j@x`RisT;uf$jIjvB_f>R$q!kQ$B|q;y)K$W?pB$iHi8WPO0|sy=+>pF`mU< zYot}qYE*KinGP-MA(=U)bm`?$T6%FD>7F`CBSQ7*%zo4H;)4*}{RoIn*9&LY-%mNY=NAAHIJNx5%fNu=6-A_&tp>hOflk z3qmN9D3}JNq-HfE@LLX_G#mP9U1V$UYN_Tj-`r6W8qwoGOhJ~1P^=8Ld%3K`1kIGXldhNRQTt|*1C?z zVHfmKFHVNF*%?`#>6?SQC-{&>@n#e+C`DuIvABp!B+Z8T^jy1$E=%u*psH2;+hQAx zl?$RP>pUR$kUMSBc1M|)JJbCG#Vc6gVDyR&PIsHd>&*Y2pHQU&oaE>Hz z5mB`vrX1ok$$=wlhTgO>K&>+t9p*YI82Sru)+Iqv;b>fmzrk}_G(OFW$79jPd|ZYTKXBcW zo_Sv4+9t=KXy}~U8|D|ls;?S)T~CAOhuL7iuZYI%MwI^>jtW~Q>g1mM2zry8Mb;*ZvC`a} zHP6tYmNuEXCqwgTu1g;!`MQ#M?|b20ubrOf$kbV=&!hZco;IC3K(k8}>k6-*CvU-X z5m>d0ykn*Ct@BCt{JslRwx`ftxj7JOe44A#5aS{}bZL={FUCyW4o_@d;Iy?eztZd| zKV0pZX#SKe)*rEteXUwXgGbGy-6sV0;}>)u>?Hgg77(>R!SJK6*XE+45W-IYcKGuOi93#H6% zZX8!TRF5#kSl|4q|{&DJ=M@h`CMUse67lZpx6y(0!LU*~yKyDVio^@bk2w z6`sY>CBT7h87SI0!{OQQMUz_;@mhxu>fQPb*JKYOta1hqgVp3`v=-jfg<_CH0rNa4 zLF3%*@WgdvVQ&+B=_HBh<7eBNZE3;q(!HBkwO>TbO{MYD)T!*&bscVV?I|#kaK$fPviMp3 zA`mA_xd%3aouRO+fAtMyU&X@s31#R%p;Po?$X~YN%uW$z3;l@u%^_9D;*OX)j%Es| zsIFLl-Xlt$-u8cC4&fKrpN*$k-Fu{M-_@CJyD?_B=&-vvI{4f+iVUW!;jm@{RNYyC za@Q82Lf{3~v3@g2jvI<2pA4`N&&}Xeu$+~6ap0j>%j|!Rgl1z`cBcM3NVd-;t6y)R zyZkasdnxpq>3?JjfxYa7fjCvvRSG>;A7O{-Usk*R5qtec1?THafWxu;o)wY zX&uZy3)#hPlPT0}Ii3!!JWi|9^l|Jxp^wYuE(`g1h9xOIg22R5=C`2-rY=9gzlvod zC6iFd8+Vs&@hTT3&vSr~ywf!J_CB&pc*s^b#FL%uT+*%DM6VJvX-!)m1>7?If;Rf9^h_`iB|s=nOMEZABjqqniT7shf8kz!@99O zV0L*fE*vYzf=>AIGgm9)$f3_c;rt_ZXZ(1KDT>7ymjVovUJ1JsLM^R=r7%ZKj|)$1 zLGjWU{5Sg&{M;4|V|gI&Wq+9dyJ*;vc)|Ij?YbKGrqc|-b}GJ?61CLszt#NKQNB% z>F}d)jq%v?JQ>FsX>c>HX<+Lzfkhb?hv8CUB!6=@87?Tq334Bpd;DWwY41I5?(tHj>2VB-pxbENRu7)1}kpbouZl%9)u--#-i9)2?=|6RL1j?rzSd z>l!wio#RAi4KVU{G~2oLA~a?{0kw)^sJrb%ih}-`Q5y|fk0t8%|Fy0AHo+1uEt(6M zdH{ZhZRS*7cvHWNHQjkvN&92WYMtL6gu5$M@b;~>H2%IYkF5)(hv7q^`Ti{m|LZ{q zCtFaS-8tsc)K#l8DT`YEK4M==uQKkZ0k&OGu^O}?5FG~AQb_(etgu$M+MO?L<*>CC zHGQ&CPh$tITWO6KYD(ZuhB*8(%&`oAD~obL_WT{&M&3=fgeLpVg-?RVp}FJ;u)z5w z^>qy;ub4@So}YyM-)(5Ay$bS2)0oA#XPiT;8%C~JAY{RcDfrF?+Vx-wHL5nV6AEwm zRrPL!{k61vYd`?6=Zmu0pTo~_pvDl#OHUwvN*&vEen0p| zJJP{R$7rNaC|A&WSS0Ryms8&sMn%d2kRDLTHpjQIIcodp!_-t}VtbD{zT8cVb4!@7 zQ#W7cASq~IsjNBa2K<)XMwT7v>__L{+8c6u6g}i5Hu!Jh7erp-rLNs&OPq7@g4I)8 zR{Q~62dbH3{aDogQHIsl<7hzNl!Dfjk>fpWT3ZCdo>-GE3OaZ*lT8*CFkp zMQlS|9-a&ND)ft1QB>dI+T@PG_;Ow+{@8XB_Z~OJ?hmq7&yHlF<@gDxpQLD&(&%kv zE$)J^kMpz<#Om5RJ1AN84-GG|r)c#%WNljk>3kh4xSuZM{~{<>-2(rtaHNrY>%kZz zu=U9nJZF3obR>+~y~u;?d&^yJI{yrNr7Kus%s!fOqKGC3wy~vZ3g|M&4bQqB<$C_< zVp8aNmJpxFp52{;AG%y<^79XD&CJ6vcE)2^Fd`jxuUd#ht>&}1Mc483m=^RrTL=dN z{1~@Cmi=6`4$tp8NEuq6$f2ir`(54hwRypzj_!P{2s*j zNO2yqS!A?InoaIXG~Ls*D?f*_rOxmgsTovJ zX^gF7W4N(qJaZ6BqBr{_X!O_DU}f}&T`9~)@#uf-=%u+>;+IMX*Y`4sCCPYqhY~1k zK2$3&bO|l_nhl>$L@`%YfqSv&SIvk|fLkvX)t=KiL#x#^a9LIZ7MRB3#-M#{vENS8 z`?8;Drj?R|y5PGg`Num+x?tJyMw-|3kIkQ_#Ch2cfufJknD_iN=$o~IV#a@msaXkZ z(!xtDc}5aVx04{N^anH~9E473xw_csnBUz3aCnvv;}6;6=dfSs zG&cov15DWZ0YRHHIstj1>Ue#V0@-KA3ZCI`+MlmP&)*!Tbgj?yVPZW!e4K`sH^#Be z*{yu5zk%0f2b*TQY1P*yo4eI^fz`4m#Mf6Gvyrd=M>J~Da+|%%IZamAME=C$N z7O^PWiU#Y@B>$B-#8cgY#`Y{J&I1tJ{jYR#Wwb=hEyR?xyi?+;nCXo+>Yy7cA=qQM@_swQs~hD~@1$(LVHVizCYxMM{nyXO*x_0$ts^;Z5;$`fL0X zuBq&RCnwi}+|?bl>o$W67^T)97+? zJ63ybMa3sTL8E0+A>b8;UeL0NSP6vWQ6KNob!%sbqwb~`Rtn%4R3NyO| z?ZszN*`@t_`Q1ufTXC7SH!mb_HiIP(w#VPfu~=)YOpz0W?A6*VEcp7H4a`p^XXkz{ zQ#^>0{@K9Oi^d>bFb-|b?qGB6KVhrfbEmw!y*YgSFq^e!&-a526yn?S3A#$m$ZLnz()6s-#_;DPQ&LckndM=+RFNVZ{Q|wt4722q2S&sj?u1ru^H!LSVJeM469*#MHcvW z;eBr6oC~ZXO`L7WPXx8{Gt9R*fquS{qeZ{Rg3c-@C~MTgj++UzQzL_V)x=oPvH#Iu zrihk`@8|P3uAp-_6Hu)3Eb7lt61>RA*`vsE9CrCE=Bs65=U!vHk}O00sXxGK64j=L zP9>M{e-M>03FoUPV#JI@7NJ|k<;pKWkIA2So9=vA>r;uB@{eP6PzGE-{u}D93VXB* z+OTXxHLKYVQ1o9aRV$1n^xJ_J2lWL6=~0$wc#i+NRDn%2yTmQ`+r$b+#FO$MM>_dR zyv_;l(R<+@@yv55*EiuaQV?jNZJPPb2ULm@G}j%byIZhs~@fCUyj=+uLYmd*SzGMrR>=0KU`&@ z;05kCB-hW9V4^k~XZ7yJ8`Uwue{ttWrpQuU{0;t#{0yA7UCzq-%qt8VWP)nH0x-Na zmNLsPFpVd_xat|jDEg~PlRbqw zT^IC~-K0O~GIhN*K#>zi#gmNDGNqSAD`=v*iZ=#p?n60?P<*gsDVs8F1x;OW8rsT7 zqJKgjJe<4(I{)2=Aqwl@kU|uuKXo9twm@Pb`rNyrLZ+oL9-C5%F(o~bUDVVgBiZTn zZPFC-aTzA)LGmpA=nnSEpbS@D?1taz2k4}43>nyFlYz27?pv6IzcmwZDKF&v7G0pK zo-ueYWGs%_!C>!$!F<`aOs?_cP%K_mf+G*Cz{Oti(0gM#IY{r|mcH@@!)#^2IQ^Gy zT))L6hAidBg*Bq*qyxBmyB=m=UrM`HSV8;CWZdq$4UevswDR_O14oXX!3~<@@bD`$ z%(?R(OP)<+^|~(jKFAB_k9p5Z?+(KC(c@6!m^HWGbsnoZwpYk1AEm;^D5kVv6j{j< zy)^v9eN0cMhr;F|qNj=ni(;oG?;)XaGRX4ey#|I#Ido!+bbsFnn_C_jqJe)HT? zwCX#f^l9ki9*gFeudoV}SC)DDgXyQiZcrAu1qvRmC{k}>igFj3Wnv=E^vU9i_hyS` zj87MI;}TkHPyy%nn^V$xRcv3=$`)Cb)6F9_Y@*~m+GLhT)8b6%xN9y;iG2(Wn~v8u zyjCH*b;I!0Y8$GuO6I?<+D*rF6zRXehuPmRBGeMz_g<=2@SVHP!az?BoDcEm2PZDY zHBP2@(QhhkzBrWBRT8o8Ss9m}Xht8yM&a|9dcys5HAoILvNJ>e(X~m(=>DoFbRw~m zI!up{)%85O9Wa~*wkNS`4HHSntAti5mr=EhJ1Q>7!_Z%jv@Pg2JA7V)M8+?;HTLsq ziTFan3y=%vbZ@YvNF_}2b7QU-9O3&~Vb1$wLi2u3;TJ3pwp=0XyX*!9a7X?W(xY3c z6dUACQ+61FcWD3xot{C?^O8wZ!}LT{lW5^Q?2ONMK;EwA4{4gtZ3v>%o$VzWmO;f zmF+F8!NL+7msy@a(Y~3Lg#LoHKff~D$!Rz}VGk~;p9L?o)^JCrDdXi6K4{lbOl2!# zX@#9O8pmDZn-l-Se|8;k&Gj|k9`+u$SKq_uv@UN(&j z8CXFhI=gA-t&#XjGSf2hhZN0mi=iRMj6pnEgPqwEgkuZ5h$M>z##l0$EH=ZUQ9igU zIgYJ+b(mR%%iz=bJ-9WshQIJnmGS~)LAmS}sQKR~r(ZF2ru`%~YCmV5sYo>wohVQt zkc_UKrcIkInD5cuyo&1voU&^?{dl6srArwZ8fvq1d#pd42T9lZ*^Ptar2VU(ScjrAMdaOewPSpH!+ z4i$fbaitgW*;-@x7Bw2@n(W7tmJsae`~!u)NswTZj4IcTqE6d(T=A{Ia_qkVVRtda zs&&2-zMs7x@9QU{(P0NXeNF``>~FyOfh?S0y`S4w=SIrw?AhCIVpJcuo5oM<0ww!x zblX}F9cA+9RYfP1OQ_T8P-*`0;7M@N<`4wE5m?kmV##8JKYdIoV-q)R!FEHw_N288 zO`GmV7Au$X``&jjgXsO1s}Fjixu6Zp*(sroN+vCSDn{cL^RR7yCInsFN|$vO(|<-D zT=U+3+OVmJBDC9R`QJ0tx8^;kGTDO7S@njEdOn-ItNjRPo))2*mJjZDFYIZ=_OSSg zi}1&rzwlJcksDNcgN*Cvkk^L2G%3)ZUhE!*@n4VAH|aEdh2YhpY-Q2p^g<}j?BWo&I{D@UZDuXZv% zi`>f`)z!#RYaJJ)y$cJgMzex64fr^02-!4BTg4mCvf8C=VdeI5iPdJQe3CltM9GmN zoR+o;UG&|K596LQT*2VS)fuH{q4WOoeK-33a?*a`!E+4 zSNrpwX;HYzXB-X@JdPb(`rvTeev+kG%_}sjQ+xW?VEh@Ig5nTd@tLOxm z{@ntTA1ct8-#0Mxp@gVvdm1fvIY#fai^Ycap3mGw z3%!ZDc4&cvwHjWWKZ(Xzp8#_Mb=2NCl4=V!)1KkTr1Lhgk<098_@IwalQxLrLl@Io z)fUm{6;afwF$^7>#LzVLA*nW(f^u;dCT)ma>tzM7*Db0cN7YwoI;c^NU z&Bln5humZb!G9?uUsp2X1x*aHx4P*sv>@i1LGFo5_}>(DtCgdVf?C5gL7$7DhcObU zULeokDtZQQ-xjmeC1<#AZ+}qZZ-ctkO`A|bQX04Lxn#3Kj(xj&05&Km@S$(^V$@d; z90x;jm9h+;oEFEPhVG;jRtNEC$UYYRcpr@`@W-{II>FXD9ygtECueRcx7Ioz?;lTR zA7AQ&*JuvA8fL?vjqjmopAPEn52LEOKuk>t!T;z!OZm5lhJAVqBNjb{%Pr238b2Su z7_6g9+p6GZqyc?Dxs!rp%h89W!G(Ydnw!X`y^Y?Yxu@OZAkCvkz+{5_WNTQC1 z?C68pC;i5q(cQ^|64|K8rIo>7DQ{CqO4vsDvM1mV@H_BAio(c%v+z{j#+o_W(=!vm}KAN9HiY zdWo8*7l*lP5(WGh(>Hwl-Z|#lx_aDbNj0i2h~a*W`j5-lcaXa%8H{tQ8Tekl%!i*; z#+s>LnTOO6dacEa8XQ}|D)u-u?(>KB`to=r>KeZzq?j*$I|}Q}s-}M{-^AaHbY+>1 zP0ZTW8Kza9V_omx;H1^!3gmJWFD_YXF0Y#ghx{*N0{{gz*DKaLy~ zQt0y9JzVnD>tHoR2ILm%;a#nCxZGw;ISvt`fSuPM_~94sY5ZXPcKA4-T@ugdut_ld zrx8t$R)g@me)il_&%=_-MyR9?%Y<~dDjW!_Y{%oq|co5_gl~r zumLWvw8Edk)-X6^Def3*j&9Op*ubA_tRr*~bFq!*@bVPPD!vXTX*SpPlukpFa%0*( zcrB(KAx>9JcYj59ihja%jL7cJn;e`F#__Ccvb z3GmAm@LQiOco&RE_h0TPcmFVRQ&Fv}`CZAj?2hMt8HSSogk+jl(#}-QnbW5Hu{iEx z5loNhV=jWH>u|FN-Prb(b6Q@)WWGDm+%MB`gu*!L%G=D|eS6Ga%q)TOW@HuaHS{?C zBs@H?MR7&CEWkY))MfP9!ZJk~eAY+gT(<_<^$|Gus2fyg7_#c@(d4SUfL-z4$%&a| zv-UODVbyUnd~(Pb0u4{Hl?$e`L`w~7S^uYIf4Dy_KUh&~Gn7vgP`|ZKdh<>8w!jbZ$w>XK(r#$wuv>?ooreo_K5Aa@ZNO20XDtVk780 z{>I(5eM^@=_|VS}C|g*_ z_6M{ud2daeRqX)Z%Ocn*WmT3@kF0%PDwo&%543i~(9--_pzt@GO|tdFk*7yd^p$7e z<-dxeRn0LN^w<~P2ptplu~&!1xWKWl>`;~>lu!E%Iz~Yde11C>3jRA^(LV6|tc?<{ z+-ZkqG`Y?k4La)!;a_VqH*4`1aIUUnr-b}<)7WOTls?bLEgnOj3j^qZ;4z%m-^LFN zzeamcHZjf31=Kjtm0C8Gkqr)_F?PG4Lts61ceb(@o!{68@A2qqA4>LdvMkCbpSA4G z$B5pS?B;kMHmoe3O<(nbjSGFkw5DBP0ZzzDPiP+0B zBL%-(x)`=aj=^rxaVV@!1B5#a|#?}ja6--f?{l-9fO z>xU9u=<2QYnmmnL@^Bh!+8aQVw=AT0{=@~rNR<7khqgBIbX;yMRrX4P>nRmnWTk+y zOJ-4QyBe;O?tz8sYFJe^j|JNuU=J#%*Q}pXgvn<#Y0N)aw62gNDfc^=zpoUY23-P! zTsyY-@&mSM(Fx|n2^!(f{rs{ks(4fATHgOKmpP{BV0h1aD16Za>vmp%;yv0pLhl)z z&zvA~IUWNMeTmSh*3FL&v}3O(D3I;a6s~0NM(mf;gtSp|=vq<+x^KN;V%$g;6TOE$ z9&-X4t={2lw~71`>w7e;JCAuphr(jz!?5(_6k(s0#2&x1g-d6q;hLResNH!u<~d)0 zwmEm$n8G~*gGzx8{fR~0zIvusIg5=L`G-FlU@BZ3UwLWUd*C}f0X6*BZ z{T}#X5x(*x`8`~Qoj!5+YOPe=#_kDJDE|uv-#u4bs~~VvKgQ$A>ftcu=SC*sc@4HZ z2jLH~F8F%>sK5@@rxQ=_P~e%<^u%&L-u{;YR+&-M{AC779Xp1h_U0JhG>&4T?!Y%e zZw*@^?0AQY@RExOCA|(s-3^&^z@>)uu5071ZI(gY4q--{orr(refepv9h`jbAlgzO zXln<|@OEAR$%Z@^ojoTFK51Rtm7)avmXm@}pA#|kR2o}e*#)M52T_lIAgy2JDssAN zg_*}!z^)0J)b(o;+mTVoOkSq3x3kN*rEAOazQ*U;H`Tvb{fQ)A_u?eDVBZMm)cZIE zsWkdyxqwt|EkoKc0o2+&XwDSD`){xhuAbMztFv@S?`;W&u`*ye-sF&PLdOORxiRho z(<*(+SKOB;d!H;`J0cg4)O*r?^K$UC(uKsOF}UW946PKq2_54*vFX1DxVeX8I-gI| zUj1qeK9Mi#R2@tWHVkOiWlR`Kt*FVb4@AY3=GUL zuKzjL=kOmygg4Ps?=~=&o=9b4QgG-(b?u5#HYgW93>CstLH6$w_;bjJ>HM2Q4S9p; zj8O>6XhLZ@&us$1M! z+q5pW*5%J7n(*HY`t>P^Rv1iV6JMX=Mr&{5rLAvM?HzS8uXqD{g^b0L<2B^_J&8G( zOvj=uXE^l1k&V2pM*aUb@vUnNkrR4a6jQCRL3unbd*96m<=%#bQblOYI>e@pAU>+- zKI=@_2=S47c!%>p_?3(7$mv)RX55(qiF$@9#p0Yi=9%jSci+jktq*qhv>8niY z^QYRrE%S(T3&dO7n}qMU1ipIl0h-&R(A`%If;$VyfYXJA!ugpMTM6|_582YQXQ4M( z55L?#g;9lrF#FCY&e!D&c)Q*ZvO6(!J83ZOU9^J?vR7i3trO0)9zl~nS%PxpuX9>g zg2DB7H(Yh`B{jJc{=Urw&}-=DRYoep(P^d3?VTl;f9f=Lx=T=t&ntHB;u8E}ZA7Q@ zRdLlN2PkQpgzvUBuq*!&kI&OYvPtH6U}Fefm0L@9muusglqPPFLOE;O@CzoKZQxG1 zxiYdGf=bTfa5v}^Oc2$uDOt+kyIGREw4<5b9cqg6)x_Yj=X18>)IF9x=P}HhG=nx4 z4Y%|b<%m{peasgbf+S3z5^Rp)6ij23A-0V&tC;L%#WheN)k%MDnKEi@sUtxC2X*j4l zgaXCXP~~+Xh2?g!;H9ged)+FUIX;R#kZb}q@7sK|Nj$>}9f4{42V8QMF+$)v&$$`P zUN04=vrE*-Z?-185t6~n_1cnx|6rWGY(3kbwH^XPWbm)KAFdsv$o|#ZQ25tI@V^y; z<45el_w|Fh1&>G2lH(jT^!w4P`$AuFlqv3YtUyP>4;wf7Fgl3M!S>&|B<+3~?8jW? z4Sfz$tF|OuZ0Ti12j<}eK}YFVYU50loB6!5MJT>$G`-gl{B_r4NyGI3>~XcB=U?)8 zUT+LFD*0fNts`C-JwniN^-#<|8q&i4ifSJVy85$xdTqG|D(qa)ajp?veDNGFm~OXR zo8rxK#+owg3T;k%*95W)nZu-xeS=jaOSm z5z`cEl|Fk?Wk4ox^)eQpjO=4?YdxWU=1PbkW6b8r9by|E$kWw5vmqvDajjWX3?2BY z!)M;>ga_(F@Iuxubo6S#do!+p-vlw5F(Vfi)|~{4kq_yZ;4S^36pz|^0*`4yKlh{` z;fHuKzjW$+m}6YT`iIEj@Ae21-#UymJc2N`N0U7|sDs(GW99kn*+o$0+#E0^vi z)IB+j|Y@l%dVSluZ+z^W=5 znS$pQzTwF;wlv-h&bdvXv#*WWr|OF=z;+*-_2dS-n^*@mlU}ed+V1o>nAnjT!Rs*R z3s<{F5tsBA!=dNXS+0LD?-g+jk+vm!XM!$vDJT>yRi37P!t5~LE z6qxAF!JS7Zpx-SK9jTnn4kst0w21`H_1%h%Tr{U=GJ|*M+RwBioM^$edn9*aHa|Gh z9!AW30Y&#XHn7f_St?Ycy__#bed=N}1V76_^eyhhfx zI(?d2#!O?e=gz>BYJn@`DGi-jd$IR}J}&ss4F{YZDDRy+zMnT74`v6kjl%yYW9Jp1 z`Jb3_TPDm|m`G)ED|oG4!w61h62GUE-*cpk{dhc&1~`3`Q!t{>znIXuavJYlnuEXY z?qj2Gy@aN^M_|@cF7#5jvJd-0nSGowdt_+|5~dqSQ&Qj&#-42K`(tt>pKzfxbF-E`?uHXV!PbSz@9GMhwcJr^bWe zc#xoNOEf^|<`R+)y$T+MyO{N+c>2CC9(;_V=)teu=<}-zeow9dJNe%%_=zb$V~rHf z$R7!h{^l^ZC-qD@q@0^;Fpo;zTuCWz1zeFUXFV+nZ2J)ney*zsJo8N0f#FJ`7XhQ; z{q|QZeXJPziw$P4$DiSv;=C#2U{vjUnJ{X)|KH80sHXJ~ihHN;w={EZz zctU2DcJsxNtI=?&G=BFmg8esKxUU%_@M={&n>irK{kD|j=vo7B-tm?*UK%3Ih~k{` z?VYSLyOg(5x>ZBoWpvB>3#`!EfjMU0WF7Cwb@gS!=XfD=lX(lAu1eymrC<2|uhJ~$ zr#Ow8hWNO{57jNs@E1?LgU8GKxe&w6WFB!&$cY-WnCFx4mVsrm82^2Tz+||Dw!tuNwoVTm&fu8n95b<(=y%x^N)vc0fT^zth?zLd5%?$JFH7$E$zE3oy-O=Cc#Zl4)NxOjf>lS8{h0CKe)9};NXFqJX(K%0Y0P&F zDdn`x-*aQVts$et2-6By&_LKFcA!gz?J!T`u1Md)?=ro(0G9EUyB(=GG@63=C(K&H zjB@7P;njAnp#6I`vaS2#*q+pUHs?zlXK?WhcDN|wr8Z?6*yF{Tqb~DF_D4Bmb2YG) zxWpG9EM@mqHsb8Loj@-Fz+>P$XLk1oJJgfUuCxmKp3`%MEN2!`)ajP)PXaP76LT+q>Ykbgdf-fY@UQ#7M7Y1KZg`z*_j zhYi8P@B)~~D}&LE^E7ow1k@j0H-^*xH4a78c};2Y}XE_#xcd*m$k{9)%Y|D_tb~mA!G2u3!%So zY#yBo*vL5+%3Hme?u~}i6X|uH1l`hArG-ZFIJZu&Zu^Qymd^&}^Ey+<)N=NL6d`4U z`_otAsqM$$@x9NW^~{3K+);)QK|3MU~k&PGnto-QnDwgKWyB>&!*2<7m$S&3|Fw4kM%4Z%g6nvl>uq59SBYy~a6NQC* z_n;woLP-i9T#q5`fvYgg_y}9%Am|;r{Y)n;l=LpnBDdD5IPA+4cpa_=YsEDwv*Zq( zdZR*3wGGg5(2})ODAST2XFh!AJ|<@!j>SEP>F_ZbGSX0`cU=jzrr|Erk5s3R5-*^| zY$0aNJPL2YSMVI%6G_N5u&vU0XsRed8PY(JFBzP;KOfrLv#_V`xZr_XOhZ2J!04J! zeB{UiIO7}&DK`yK>)<_h?c-?bT5$_6+RCtg(Z|}-z-%t+yy1 zf*K9eCp90#hT{F?R<6u;jKI_02;GV$FlKBa%~+~RwKJb$_o>UQI=zjJl88q$yW8x! zsy5y1*h}$&!!W$Cnr-njpv(i;A#bt3bZ?l?t9hpJ9`6Rz*JMA6%+g|)yRUOj`v5O@ zo&x`S1MJX1DQ8sOB>H=E54;K62NHKa!qMSg=p1$k7A4g&&6B>6o-Ip`Q{pJ@zbX)m zn#R6(ZbD@v4YI!>hwW-$)Q`0suMK09s<#lrj-x|c%>w;UmoZ^Ct&H}m6T z+iMqz2)=DU#B%*~;GKmyW(EuMky{`yqnOO%KN`}9#RFjQFOKcMew^uftB~(QBV4y} zJ@4_TlwIF&hW^*Zk4b+dPf{yCYmgDTUI`R_!F0#7#n1JB0ao`r{b z!9S8#$wqbTz=B@JT86Em|0YZz>$UG7%|sfT)rZioHo<2T=!1#%qiEdz8&H1i2RB^3 zhLh;)V+%$(Fq<47bd&UlmWQd-ut8GLi-V|lR42PXwG_^)Zo%(kWT;zN8jlXS#hMlg z+~;R%XdYt8cDXh~)ZMG>!bx8;)8VMX&<;Npi3^=Vk*rUq8N{C2lF!P8@Ub`zVY|SS z>$(L)7HG5L_qkN|@glo+c}nfc`#rqmp-8UY={oxrd=iGtA@aT_Q#WzOU*;I6z}C%c zW0}`?(zz2#7+*D)W_7Pbdm)ei%xyT93p3@bwNd19qJcZ|CY8PF+D@NqvUvOdzKS|@ zRIwHRFbS_?qMO#Qn2zfsNbe|O?^d2*W+|2|_Uusl&?{(GG7JXkc~EnHEPRa)1n(Vs zlyl64OHhyJv(MR!#=aI|p}>t(4PC=UeL2SIY>dFY0h3AQ>Pm=GUxqz_rg&l7SqjRS zNfTBFu;7aWd~Tu%E!y*u#d#@H;Ii-hY^9-~6*LVWNcOY-Y3bl|R`8OJRHJp-VeH4{ zPPkK*!70s`7Jcc;!0qB&XT|H*E6`SlZjTtc6g!Y-T#Ma=oqQr<_Z z07MSk*n==7T$L0-pSl8}>%l)r5xRaR<=QiID}DNI<3m0HjyNakH|VS}pks>;a`Rj&Z_;H^!JE>R*9bOK2ZTWI=wMph4_37cky&+)jmP%K>g!hbaeOi|q zjHd!-K-LEhc0#h<@`!vb^QblEue>T?3PJbaNrAu~`F)$ae1O9vT9NGShD-diFTUK> z{t>j(E(A_e5{uTo4WCuKS?zuYSZpvAY}yN$)9j0I+q8_GyPbg#9!g->4-ZfrYF>M1 z-#XS8sYNx;W6-}u@P#SNM72-qxN@z~iE~DqO|Z^^)w;3RcNU@lvkzqqE(D8-2VlWX zd3<2k4vSMaqSO;P?AH|hW!v8KZB?gPnuQKKt@)h|tZ}B+BZJ`8MpHI>rY(Xrha(BInmQHpsy)uxcrC~-EPn!-?Os+hoUnLr|RwEupvVw zX{1q#j3E-vUQ31~L=mM)rKFMO(ufQtLYjn%N=Y+`v)7W6N~vfNY1BkTN&PkV?)Re) zxh|c3o&7v({eJh2#xtjo*34aesnnbm4~)Qyk~6}rv%krz<7iGD%NV?*m$0_Fl*`pS ziYp}Uk@}~6vG9r#R}I#{4sj}+aPkP0WLyLL={k7ipFO`i5r_-lE#km^-EsatH6b#& z3&vP!v)=o3x)X5?Y9AkfCg;9*dEQHSII|BM*+}ffA5-{9dKwLi$)F+A{OQq!Vi>qE z1~2Olpjp{2yuWA}MZa@{um@e}qk%7d@4f@fQ}^-AiKD@=s#>s)I*Er|)uf-5q%gG( zi^&Th%zRey*T5Jt^MDRJg^m-_Cb!_j=XdaL=@N>r19nSLgZ1O*!2Hxl@X*SZdW0N@ z;;W-r<=+k39KM&-R$k&TLv0xN{Qz`$GYikGDI()3%UH`Y6;)NngYrL_Ape;|($pVf zXO1ObMQuD@JAxzLU!nB-z2RbK6IvYltV=*JhuaTX@;LW^sDpE10zJ zlVJU~JJzn>&GN5>fT=Fj-fyPddg?yeosRK%_iO}Z#(amln@hn#i=eBzDyPPtq%Eta zg2P*aoBJo@US&tP`*b$;e{oaDZ0(1EOLxH4abc|bavudwx(;fcqd4W%e6jJzUrKt~ zmFGB=!nDD+xL0u#ebqQgQKiGUeScr}Q1nC1U>ocn^cqIrlUQ&UEog~?5)S@yfo_`? zVg9cW+WvYu&pLM)Z;p|2;?Jag-iT)L;I;uAB-pct#H>r*k_9F)<0$5;rhL*J6`Xr@ z7`*uQ0B$H53v1q#)56E9Y*T)KOQgQs_b3%{+qW##^4^aJmi2`Hs=5`wTE3Y4oWF?f zJsRa(K90rV#nBK2Z|R40j!kO|QF91sq;Tk7j;I<)YsJwsXR z-Wln8I*%V_nSkP9JLFU~Y;(IIhMmo&@|G9$)+-6d`6=Rk^V6JMbBZh5x4@|0dvVR} z?RG_0(Fe5*t{r2ZJ%b8kbKjt2hW39^a~dC*hkFz*&M*&wJA#w+cF z;YCZprcxbEJIT<>XEE-!@Z*T>eq6RJj{h4w5RdsQ(WCE)U{G%Z&(~ORxN$y|zPKdN zDKqRi{U+V)FY&wkbz=Ko?xdV%FTd!g1K-}H!A9R&d-3{z5*sTTd_q&fYX-O=K(asF>uE>86 z)5Ni%ljI{>^rReAUp`Vd66~_(QQ?U^8uBfG#{}dEGYt%3zk! zXAACe`Tz=*I`sKG%lFik!{l3*c>J#h`>be%{ViR&c*zhf)!GY#@-!sYaU9S8u?DAw zG)Z&5BdDd*k4%<*mY3*Ayt^VbNIS6|RSK0cWaT*YJbr}M4!I3Y0gC)}vK4m99nH%P z^gv^+278Qo4J|G<;<^XFXfQib(cus%oFv1kHAkT=z=V%WED7Zi1!TM`l{8fw8FIr`jg(jWBtz7%cJPjQHCZ`L}tUkFdJqZJj} zV%&K}%vhR>Mx*25@_`sG$xoy=W+U;J+5uh{x&w*>yTHT%17UX;DZgrY5Y8P*qjNo@ z(TF_Jcus#dFLmaL^&@fdzhJNwn_=AF50s~;D_l$KO)O-^3;9Bl#)(}$vLd97R@*WK0=AC+$5>-Br!beB`` zFu;gyRRxagWX&5ko`v|Kt4QaOG?TIQBcC&QaQ)6#x|i-D?N2)hyT2~y{!-U*L!ccU z8PgR;kNiwyvl@zfWL@XD&B`UF>H~#5Ed|>M2a98sa742zo>+55*fu*&*iL(3K*2%! z9AShzavFt|dne-;=P_8GHCg`Xd7JdPxx&3;Mf|pLfzaCOMRO(Ry|cvX3A+4`jFz-Q z#I^)^!GF)`^39Iet=64w{|v^%(jGx4yaJO`zRP{2{Y%C3X1@7T$_geV^6nlgC28YS z@XJG~3p9Tq-jw(mJ?B~CuY}z~zeSO>FvL|_<}Z=|@Er-$%D2<)MWNjD&n4JDL|Z(3 zySMPge;1Z!yrmRlX-9jl02rnI=Wy7^!^SMsXmtG-+wC%yAlsApDsa2w+=XC z>3S)LR!P_0hX~`UQ~5%G3gdhWS01_I&UJ~R{IWAY^evXJt!jjJ;|f8yXgtereG=1q zSYtw83pSk+2Zg8Nz{gb?tp^$5?@^t&A#@uT|6Ywv3CDm^Mq>6`H~e_#f|&9rf~Gdd zz+(G6!DDD1{7rpAdds!ha8*C-*f^0C)ve%^H~?zhUV~<22QhXgVo+|6wDfs4a+@UOWPinrw zuTl@_Ja!DMoG^$sn@*w9rekb=F%)Ke{!Z6dX9(-t7DAe`K0Y*66i(U=!iEW%5Im{B z zBv$lR5(XcSC7a8OaLL>}Znf!v`C6ChnBG+i?p7@XAM~dUvmHt%&R{9~|57-!E4k>z zp)wv~{F^2fbjG^7Q|bKR4DvVHPaE`}N~~gEEXWPy&;gN9kP?Pn;$1l?@+B!8nT3m- zZQzaKbDHqL9&IInWVOa4!oL=Dyi@_Z8%jIs6jR~8mlFPn=mN<@kH}o7_JoRaE5UmB z4!rHLln+=P<1@QfgL}RX>HfGYM)#

X}Lyy=;z{c=IIvXJg85W(}g^9&@<#q7Gg> zo&=He&f?&pHc+293$~oN%x`Ti(WCDZS~XP}oA=BXJ|4XQdKV7x!_>VzG*ZS7SIlIM zA!7vR*q&mnH{twqo)}(q7+>6y?to>bl;2}3jv4oeE?)Z#M{4KrD!ab)^yeP#FnBfV zt+p&#Stf^n6UyNIhu!%3O&sW0{Ddd-enKykj-1_jB>#D8h&tH=1wr)`JinVk*@|Ou z@A=0ctF+6>gAJnJ!pI&~a3;(dtJP8k zhX)d?C4M>28?!(-bs6~4ty$uFUy$9Q3OgZw2D_^8E*6DB#x?R}{-){8Bn z_5(T=Y{S8`i8`0rVXMsontJsmO>qZyOiQK{f2VD^6~PbZrM#ub;zZ9zAgK>*E+c#}}%$nzF+8cA& zgdQEVd6~y)?4UATC|`7~xGuUQTlQYhhmFgHjUU6o0M3c}0R&;K26(pVl3=whgjL66 z@>RWwFf3IM)`xdQSEZ}`Sjyzw{5wcExYhw*J4TXG&3L@v5CJ+@4$y#pb8vo6&*H8j zuCVjp19|m@XF_Xa0Uo~NLbJOkVcCYGY~Og44h+jIh#Ch2xcr=~P?YnU#CHQ+&7klDF_M?kO+@p-6J<84r~DgK>E5{CCl< ze=04$I)$u09g{DsIfxpXXXs~`H&`p~#n#>#RQYAM2rEXT@^M4bOg&4#wx5G@)@ppl zCl;36`77?beu0KmFQAq;3s85sKBg^wFJe;xoKn3a`*z0@4{TDvqht0{_vOP#H!cVq zcgWCmt|NLhAEXy2Ex9r+kl$LT)As8nn9;WY>MHtxTyn^*3qMbe+NQj2jRR+zePD0J zII42e<#R?VG%le;aymSNGGAkyuyqcVJeRo?vSxC2)lIsBM#DD4wq1dAa0So15Ch?_Hqcn_E#i$X+o`#|QZ}_x z+V9ldCja)X{NMKX!nTF?D7)DN(<@Ujd;C(=KW2ydxfu|9w-IcQ3m-8!Ln$uk%gBd5aqGX=F7+W2#r&Lmz6Sx;=@!C1W?|kFkJGy`1syJX0ylG>A*78Sb8!_bjcB@wo#Ea zRqNrYof$l6iWFY^$8h|v*lHD3&eh( zp29f8Z0rh>d#2ZFXj@sqlhjjTRF=fg(I#4SrmQ%)bRAwedPX#CZ3R2my}a5oP$=7a zSXeOq5e&DerM6Ec>{ip6KSZ5E^(nL9YkLy~xzTsbJ-_xTTCT}N_^>CZp=H9^bMa?-mnoG(O1QTX?>;+M>LR36p> z2d1g;vy~%Q>x?qS7w(3wxfRgp=|bjTI&;GFFbvD>DY-~{iXBo$37TCasVOu|xIMZR z{M(Y?u8IS!`lZ2jqa#@HqU028GlC~K%TPVhAD`WMAeP^*5!S3}ptnj}F!|ClUT3S% z3JWzPZbkrk3@qeHhcnP*)=Esc(F5htjx2k#2`ays&^Yfeg13|@@C(vch9D zxR(b!XxK-oVF&2tnWu2;;VwGpok_FCD>(EWeOz$%nZrIkoO!(4C9Dm0qkWT{glVaL zIICZ!@GQasH6NLAVP}+hOz~jewTZkdhLG&VWuX2J81JD@JGWf~*Pbl?coaynb9M_~ zmu{z`#13FNTcu=INjVp;wSe9GyRcJ(y2Rv~C?xtvW8En&ocyudg^kmlY3CJbPnf30yZ4`mw03P;lWT>(6VAhIqw7?# zMx75QDAT)xN721}q!2o1J`Ktoi&;P4lKrd?)MrB=?wgV%yfI%xi%SR7-t)I<_=eed zJhLy@^@=5DGkZ36`9i~+9l;xvG4)3hXey>+(#BozI(aw@7-oWNPJ~cHjt*~`*NX=U zOT{u}2U@;hI<_qJ#+6|mAob5#SQYp{rug3;Sif-%9ZkFj-CVcB`N}$~?p;GZbq=D7 zvnnaYZ;(%pzb*NZJ_*SR1rWE}30`kqMhAv>W(9{>!NXYMz%Dr`WZcr>RbL~~v$_Uw z=6HNLe-BO`S0tJr-ay^n*Yo-=$!zLwfcgLWu+dxWjYm}H>W}@|G5szJ5$A2@w$`#Xt$oYOK-rkxEUw57trj~Za{j10E zwqZ^@XIT~wDk%j0pg4JHZ&&zIbyR$k*%Q?^Y~|FstE8Oo9L`T(0i-)OVUQbG%dKakR7QD6FgIyHDc=eqt z^w!Tp)bIa@F4v4g<STkUbJG}1$)UEd~w~Cp4ficne@9yvFzC{`IHzHbbm1u zF5cM97nZHy>$=~CqMv&FuUUl6Jb>*ES97rYF*c4=P z^xD1$J{)d@4yj?h#lI74N#AQF2T#L*L;5)RTLGN#H^Q-l_P~sr9eC;LdN@(rMyDg* zP~TL2%s(&i$?HAvfyPiCZ@XSNSJ78orul*L4NicXu0Iz?o}#Uzp2~F79jIf5KR1v2 zL?=B4@UcZ5;7#{vaYL3HJ@i+@E>9NY__rRy`a!|i7=D9mTE- zJvjKg4c(D;Pc!NjIDg|rI43cN`UNKP9j|1zZ`A@T@1=aBVhG-Ac7RAzCp^E%iQc|O zUOYz$o(ErLcYP+DZ4=hhO)u4YMQLF2g8+O(OK&p-CE|y_5Vc)QMZD4(aF6uWXTQc zyK+7cX^NwH&rZXX+6?L$*G$oB5)1W0xzsh)VZBaOLh$M>p!vc_Br7C!BECEr1b@Of08U>O=gq2EgQ$#4t2 zez1k~<$Fn`K19gq1=9209X+)Ni1cgXFG%YSgUZ-m|=9j zp;u`HOVDq#O)F3Fl+rPH2FM~zv_9Ryw@!n(WjV>{&9nYCwHT|Pb8&0)Z?uNM`>+D zcMi$6$D|`k!sF{++~;AfG>5nXeNOxE;e}x|sjW>A7w>|K`ULjsw31BP&BXT5Gjwfq z3RZ5(#eKOO`MlRE9yMqO>kTa+jRYIqu|EiwseGgEzuwWGwJQAjq$34yoK6=q&x0vU zV6&@ZSY{i}TG=T znXsngAMwpEXZ#^$;V-Oj;=(QUFexH}T8k$0s#hmj+0+#WPHiQ{1qr;;BAqWyuZDR} zz4?=H5VGB4!R>H5_%Bn$AJ??GZtpg@w^#|Q#(Prr8Y98&qc2&=zKIPV@@aWDInC66 z1}&>k3SYj}%Cqmr3wL_`5+l(WzGrmj836_SZp~BLD%ZyS;|5W_rxv>8^h5hCV`$x{ zKB!tW1kYNv2%B_9a&F#z@y#nQ3e9R43a2WIi@fcb?tg=4i*De5YL1xi+<`u4nxazx>SN)d>MTf|a8x)v^&k9a zHHj}5noE2IMQpm^#{8&^YmS-l*;Ij>-}ypHP%kWYGp1zgWIX*)6S~>{6TBPl(fEYR z;2-}8&aaa3O%F9*a6&-at&$rux18@mAD%Zc1D5o#jx;*uI;8Pglfu5fRWn>=IP{3P8^XQBW{+535Xl0770nja}4_gOnrTSGhYI zPBg^bU-r|-*LLE?3#xeWZ>xAEDUb^O>cOEyQf@%yHkowoB|CL=E{*SM1)sV_kW$4=mm5_630Um^@nN@Z`?Wd5wb zflm&uqHj8B_ttilmQ@Njj<%4_Vj|5z576o}jzZD_&BA=? z#llKa?aU-v_bU}F3@1a6?>@A7?q@o%X&d@?eni^aXJfRdlm|$xB(0DD+WB)9&Og+P zn-^V|d0R6TO^64_pAJ%fql5;YIm+SNUJ5;?oAD9-SYdI~O9~0e6KqSj!f?5OV@wi+ zz6I_StUX`s?;B3f{5H|FmMBb;Tn{gfbd{f|@Z)Rz4dQxjl6q~OIQWs|?C37d`E?EO z+n@DtGu(!|6}%H3d7LH%R|jGI;5BGyv=FPk+u_(RdroPIrLIH9V%kSfp7iQ7_wCvu zbWq=eGZx;5Or2rWM>UMaZ+g6Q<##CiI|;Aet$;K4?NM)jop|S9Htm`{o+1uU!l1(^ zg@opnErt|-O&F)cg*>CmCk4y^4{`7+WYD>e7;~T?T^;N2FbBxwQdS~P1?c_4Wp^s=H(vjFGZR#aL+TtY2l^7^{Ec?j}r5?Y-_XVQdt)Kme(q*{Q#sb$5 zOp?dH=+4Kw>Py{4btu7aLUxcde|~p}X!a%1&stlW$+&QS>}>h7v`AWEvj#Su&g4l+ zUf`$R7ti+1p!;)5x!~tVsA*H@TNlfO%*KQEg`2j~_O>pZp=~Pvrm2FT3#;jE$S*S2 zn$0s`^g+kbHh4jD03~$6L@phQ{q43vr;KvJvZEFoC9Go~?+?P= z;1uZK_Eyj{PNlGs6@099JM?}29f!!;Y5Si-UNk?7pMM5c_jiIO**O5cj=W~sc@nj@ zf@l2}NXeQ`^)_p9k=%sS3R_^DPXVn>-++zXQ_*ByGG|x5g838MNHp9_Rw4JvZP79M zTCLCO0iC(OI0l2PLfBv4g`WLz!Q8O}=<*axj`$FVio;G}w~%tOG`kOp$H#z$&gcv4 zs?0g>$bPDNv<-4HhVhUM$$YBU1dJah&0WuIqN`h@FlVqO>TTYJ7kzrO!O16N;1Uf| zBM9nmneemK#VoG-KqzI@HLQ;cb$O~ZcD)U^>yPC6xtiEz!&{i@d>)i{To;dCsuaH+ z-OCfl=+L|O`)Q-Sl=Dqapsa}gkRbQKo;jKr_%r|wz01KdBZl3ACEx0nQof+BPbaP0 z$t!gY&v?|6Qd$Bada{PZcD2El7EOmV|BYh!BaoP@z4_{mc4+CnkadIa(<{k8)4snW zzZv?Is)kB4^5M6ITJ=cW^(U4s=1;|^=AocqtV2V69nec|inBWnM!UhoC}j3j{NiLn z-nyUR=|343jM2dEy6(jcsV|4uOXxhHO!y$%}>E=1D;9XugF!b_w+v@ATD@oHz>u{s3q zXn65t>He>7n9g%&^+6%Jp3colgVsf{m>4{VhmOA_9zAPGCb#YiRiV?xyS_KcTwR<0 z^j(B;FF(Q$lRAh$c#VuE#?unLH_&hSSghHh!ij~0c+Q4ByuLy&%eIhZ-7GR3Ex5UR^P97U1w*9B; zIJEH$IJ|yGV|~r&^0$H9Yx^@Y_*PD?r3vI~8wH9?dLF|+m{=(Y{Fldbz>3@Nt!5XEY!xkdnRM zDeQ|O|Gm0ONa$0KKiV8{#yNfVUUVLz^G%3TaiLExRkVFjs_^iaBRedNgn4)U+2Zs* zT6MPywisuKGspiHO7-$NCnE&E*?)teT}NQ0pEh0{5ya`QBo|}v<6PolBXru8KsqD4 zV!_F`(3CcwPk0oQozVx#uYW04CeOiO7bAfO?h%Wn{@e13I}qpN1YO)@!em83D7-OA z)b~n)4D3V%{%Ld^c2pnb z#Tid!ml~tN^R^Lm8rM!~=K`>5i5gzgIuGk|9VGrlC>~S#Pb?{}Bd;Is>?X10WA^+4 ze6RvjoK3iS+6)>TXojb59t8D$A3^?gH+|eA?ZYB-pkGD=b<2JZC712++P9%(_9PLG z1cvauyA8tlh7~yYkTP%en8}ILd$RAPfn5L4m7Kh`L0s!wQA=1rOCLUf(zp+hZRUU_ z-vPXuFTv=zQy77jUmn!Mc#xXSec^l_@DQQ2F0xp0l7oiEw#KQEuwwi8bC59oP8 zkrx*4fm45Tz_^E$M|hA|Y^0b6ncL%VW4IlcoA1GIvu;ABr#jaC&gIPMY68E~#BCeY z`M`5mGP>Cv{ah|n-Cw|`7b|GRjN#(MmP)wuv!JL`+X9Kvx1Wc_?G=^ptHJFmOWOEr z25q`!g8RE3frih4P}WUb9$RF>?xh38Wwt3`qPs(u)2UGQ{&@`zJylK3f5u_jizVWe zdHuLoljJX&vxhP+sPp3w_uRvxn?g9<D(oD9wf}%AoaUsa#`+ixr?ggdo?>qZ`$?X=!7&llx&Dk!oumj zfKNvb}QUZV7n^c)o%8`@L^0X0e zg~={4;Qe1G?&-asH%a}bF_Uj%&G}{cXGsIJKetb+FyWkJI9 zTpB%72E~yB`7Umt*55IBEWiuz`r7cI>fW^NuH>$rx(63eu_g~2X0?xHWVkjF!}7A( zM*FMm{Z3`xuY83z=S5&+o+p|e@!`?xd#E=2mvG8(J0y+wg7hgJc%POdk8(|+osoUe zMg0VOG-Q$Dts~$d?1kddN%&z{H-6(ho1gnjxxfKZPb_C2G@Z-jW5PsERkoMj{oc|u zF`o4^cB5W=J9*TffT#O2g`#mo=+814Y8%G!q)bi&&$NJ$ISt z1!vV;p)UI_tt<-0kt=)Aul3_m__ayAlBdb#t*eCH6QgnWIZMv|T_FrwI8}0>y{4t5 z=i$5dT*{I^5gb>Y!Z(pF*w*tNEM4-D&K&$?UM>8YuoTJy2XIroHAmbT2_eNrBy;eGH#Z*%ZnX~t7vH|nmT{H; zIiC=fL$c^*XeZt`V-yD`dgGI!mh||10j=>{i{T1iVe9LT_`2GZ&#Jmow_F7{c6bwQ z=oLoI&gn4nSRb@{)d42`)|Fd~wqz&&Ym&8HkBR!?YT?7G>FFQ)>HE0ayS_H6Z$RL4_&3~-QVIYP|_a` zQ~Xy7M!`|+ciaf>Pw$GCPMw7Xt*Z2|{xEIqSxJ9-*`k|mDCIPa7cD03#-i7?5^LlR z^?C0GMSF+Q?oR{QId>k{+}Fkg$8mV0VKz#XSvaU)3r>}#kk&tkR9en*$-R!e?Q>Vq z*s&EGirlc}Rswu@xQsKEOeBA_9*)tNNp0LJ8XNfW_1nuK(C{odYuaOwQcM-PCLBf>9sijJ}9>Gw1OAjxLzi zQHlA8!3YGt+Q_DYHx;nQQ zEC*i^Z3??m*{ubzC{1F{MOMR+N#DgQkBo8dlmfasset~5Ph*)lK;ppYU;QT7$dLkd*Z_RmE1x{U>Ey@+$zlV*3`eCBYUGb#k0lwaB%k@7^*)nwqD__ik zIhQJE*$!(Ej?Tb$yN`@D@@-kvjupBE26-HK^Aq;4~!e}N!Hi=q44MDTJTywoi?nq=ft*1uC)T0$=z)@%}t-&?>MpR zW_vuh_5#n|qDLCnx}v%FLmICAM+mg*V{cw~S2Ti4uw&5@?9)SoRReOcZ_Q)4te;I~ zSNn_3<#uGPyOwR|jUks1>E7BtUi`Q6gNXB{W8j=jh_RCk&Q~WxL|GO@f2?Ag8xdmF zpLg_3-%fz-N9n^xeLU0cDQ$Kb#xo)|nTH>YI^!e^lKeLvilfkKL@2s7TjR1F z_k?+mr8D)!NO3_jlIN5z*k|}L{`92@Hu&bz1il1op6(W&?z2JFx`_~7(1o%-_oU$l zOJSr_2kbJZ2<(RFQ>b|bog8}re$zO7(JzjdmK4#hn0@%Fc%PI%kT?@&C&|C8RdgO2 zDS8iz=i#p7Q2DS0*BHl(aZ66Xwq8Sd-?VSS7X=42*gq1-d8% zXN8N&n7k$ezTYzEkwHZeuwf6R^qWMA#S+&uPLGG=I&;#MEciKSi}=MnMm)8=18*Cc zOvgga(Y-xccJO{VN6of{Z4!&pU27ipyHLmm^YwAN`fOf1_7hw@d6(Bk+OnBzmT>?0 zV7x!8U3_p_TXeF2O*L&!=vU#-B~!NxAC`9EiHlEC+dmW0+i)VSoh|*_2`jFeKm64J<7Ae&w^bL6oCouB>CT~2{hz3jK7)2-W(nJBlqJ9PX41bA!T;=1alnca z`C{+BJgIRD*Tx)x^ogBu=9PP*Ppvvv_(q6AwE;TLBTDx@3HI}7#2^6O-!{PR>If|9PD6&bDTWZy)W7Jil{zYd8hviDJH*6G#<|G%-a+DlJ zk&8*uOzOTg_2%LstKp#d2`*GkC9NKb*ex@KR1}8stIg7$-RYmu_Wp!e-sw2~$sWkx zCHCD9*RSG*ftFmF*`MpD*j(l*REs3h*QE7I|qbVJsxzifF5m97NhMF=gwZZ9aT`2EE7LV%~L=wpr5}VF)_zoi;ykr*ZtRI9Udnw@5wd&a4AcmXm zUeL9QvHb0MJ3v}3Ox1o$aUWjNVI>oM*)GHTjkUtD-o9-7VW8xq>JRT!-pOyDi-9)i zD)pP5a+Ogo&zo=tN`Edw)ut7={I)q{4OS4Q9+)bwIM$bo!W3}b^GM0}Rsg-f%w^fZ z78usk13lix;EnQqV(;$2>vaui4~o*N%>BLu;kY!xEh4q z_ytABgt@fL&X6ZYN&o*U5?ABJX|`QyBR85}Per-s@VEM0R5~{fTh?3^Z%^F;>z$v% zo<1S)eAHMz+-D76bG}QPW=sNw3U%&91@=EipW*mqYxvaL0nKkc!GKFYf|E7qdY2qmm3NadZQoOtv~$^G!uh{vA0N;ijH5`TSHg05?m=)#E%*;p0A zO1;zUVXJ_lE1uH5L4KgC)DN^v6NL?4W4TAhC#ZjrBYUp<3R>5`r$kN#kB3({W#lH_ z?d-)>n$cW-{wVitUIp8)j>cCbOyN$x5%ga-AyDqYj^ftTOQLF>EyR^eorj&Z z!uZfcPKmUjcr!%hf0rOHUaiDOu`ArJ%oN0;DAX7GVEFgH6x;o{7}xR^Y(6IoFOm}B z>Q_5i*2^${xTj9)>&J5CxsO6s=2JYHKT^o(p~PxEp24JzyYazgLlEnesHXOvOmX`; zg29c@ZQE+TWa5ju=5bu!Q3LfZ|0RbvrKF?r1J*-bA>k8tyc&OEj* z9eoWlsN3n`;^+J?@L-`QIz)t#^1=Ibiu*G)&tWHn!Sv(5p?ss(70uilDM;rBEzL8f zfhNu3Zree^?@nVycaz0@;%g5uf8d1yz7IsZqHD$XPOPC5JI6uz^KzI_6%G0pF*t4Y zOH|x5kZwkWfMx3*8oApKSMO@@tA94QeT*ek|LubB_xbW<7ay2Z)t>{SyQ20Id#bSBMBZh= zX!84#ux3skZ8MU5&a>v@NR>7kyYvjz{5VSsPOYW)%|&E4B$A^xkD;b+4)OyNBBkEm zVA1klEH3eocE|6!;GWHmG-99RVeNf_4rc9ORp}jB_RImJZBOHrA5n}J{b1MXRgivd z7_XnGg!B6A!`)TNbYYz;+M9{6V^AR1l}X(5HBq>vtq_OlZNS?Mg^^?(Kag^2jIm45 zATei833V(NP&o9OzQ4Oe{+^dOeBL>(>Ae)g+d87pQ0d-o9R#~GnsUL%4!xW%2e2YLNEp_c?8YfZj$!f1gcs78e+SI zv!ZIXc+YP=hEKRfQL|p~qr@|OFl+=5==Cir+OHVPSe1EiKn2oZ5+gyy_5uRi*ZIv6c1C)<{vr9%uaxc&muU? zEtwPDngy3v^Vuj`6MIB|gmSxb%pPq+^LN|h#is5K(;li}s=qyl9kyhx-|A@4{9ZQk zkCbz=>xA$BxTA0HClC_2sCfEwl!ZEk(!j7q6goia-k*9&R!Mnq=Z~vsu3W**j~|F( zi{@fu`cc#=F5;fIbRg-<7E*9Y$8iQ*;bz5lHgy_;SI0euzy3$bV1NV5@-6W0@Qb`@ ze0TOcD>=iYT$rO(Cw^|T6s}(n#VGrJ7(YXSJ}(YM_wSu~%K9oHJLVf*X}%2wQb*tA zK{~$*h~*V+%ILM@0c<+zBfMRcM;D*F%aXEtp|{s#SQ;72i3c{rjlybh9siXS6nY5_ zujUK+*RzE?r&UY*E8BVBn@8lXqs5*RN^tKmE4;$(bmv97*fv0c&jv|3X!~(s*&yvG z5)E*#BXH3xLz;j6G53FZj^})RN4tW3+57HP@x6kj?Di@RA@{*cm^CGkQrvrCr|Kq% zi!Y*Ly`lK-Y&k6ceVA7@+`z3}JK%@Y3LIs)j|bG|QisR$VVZR2Y&B7()izRR`+Z;T zddQQ$^;72^)AwP)GHb|*-!4p(a_W`hVth4J%5`@wht;pY(0kQ3Qrfc^(prwgyn7L_ zp0Cg|AsUJ+FwO? zM+LB{fgCOK{HWA5k^1e(flo^a*KM_=mIn#4F>f^3ZEPc0mesPEjY>(+@d4PU=Ky|r zt5_%*Evegvy5pYz)}!v(r?hZF8b9hJ$`hAppwK^wo+h86j!m6uLYNj0e;+G1ZB4+^ zX#%7K%%z)s_eqZMrSxBj87=yjEv`||6la}Gr6rTJa7D^inwB{Q_x0+D9}az|m+FV$ z)VNc;>)k@0JiVuEQ+Gew)8nq(Y_vIsN}VsAHR?FfY79Hfz94MKngX^{wej()IC!6X zfORfLa=<6aJz`t~>(3tJ*2;r4?2{MPv<1+pcfMGg?JArwn~EB*y>X~fA92Oo7nGxw zO0Qo!^Hj$;VQ-5T{u?}7;u1<+yL=_+eP#!&Y#+&PKQn||v!y=4ITL)>i1kotK{c!0i%@Ti}WFf+paO|MqaAe2d^TcO_P5 z{DV~i6Tv4a27apgppo=G7$9*wPju_c=gdaZ{<;p>%M`Hp)D`$Ge*q3%Ys9Zc7k znuK+ZLp$fK!mD8mXnS8xC_b%;w$31VJ3PtzX#(D_8bQ5AXF#=rK0l&n$ox1EwKOD_ z`1=vU$f;Q{GGjV;E|juWS_ElJ9HLI_~gVaQkmWzqskh2geWmL zCU|4Uz24mDsfb?ZN5huQE#%*FQR<%<;n~Xf!s!iX>E!)?Wb=6+o>z0HM*}-!ai4m4 za4dwzbvw`1=jF6~@egu+)tRF`%Bc8gJ8{JP0&+%Ppt%dem{iJ8hRkXKNrE}vwMJ?&vQ@U{@y~v*qp|ebx?n=I! zJBNALwG4dPG7H;cUqExFA^p=^4@xs6_lHI)y>W};frIyQ^7=zC(m#z8{zuVyKlJo} zaXd*WCCLb-L4_76)%%_!ibQrPQOSrzvO+2}C0Zg4y~oEYDDNzDoVW>lmyO}DigVm=a0Vy**@?qu zbcO4sBhe$L6RBEDzU&_-VXU0t)D%B#`S}XY8*f7MWnJ;(o}L&MvlEJ?jNlaYOr9%w zYGoH@fwB0Bu1Wo3on;l&!?GHmO#TRYg_hW1r2&>4`3a_B`^Y7|j9?(aDeqQ$C*+UXJYszdq=lqgZ?zE=E*|X5p;FYka z#SOHie*a(nP1LP@6Ak+l54D41ILmz(C`D+4d*2{9y6^_=Hkp9R<=usc%>gpAXcDq2 zkX&N}>Dh&=vbineL3rX0I)PJo!$f~n9_xV9>W2%ScG5Ya`9U}mm_#d-JFu6@D&Acc z&9+5XCBDQ0A^3QmFn65~ZK)Y5IEJ1U{#+i%SMDW(-iJ6ku&5JXejG~0QaA7LxhRZq zn+`)$!r88&Sy+=L2%;V&``Sr>3+*GaquSeOJQ-~_Z8IIUlaS?y-j7+r6fC62Oj+o zQPeKE@NmBuj+MSk>sP4q5M6bA+slfFjv0trbNk|%#m4CKdmfprjUn&Ha_ZDH30J?n zD?YtyOFmp88ochvp{2*rt;qprzDR{0TMw{p&pNnz`Yd`Y-;j6u)qymEvME$M8lz*n zqd~Wu@aIdEkaKJR+C@}|vrRT2DfQ2au)N)%$5IlAP!`O3aqJX zA|7fV2>*)j!trcZE;c<%9p|@+EfQDZuxuD^u+&9=B_}a7U7+kDZCo}=Mdl=RY_3K5 zl4+Zo&~YE?0R%4Izoy`|KMOhR!(RTV@*gGL3n24{M(7kg z1h2J(qI=LY;h5(*&Kr1z=6&*l)=`F}cix!urW=Ue7fQSABbzBK{ufw39ZU6>`w8Nz z!FVd`2W|5b>F2b;h^tTYlV3UH-3a2Xg5{9&Ia$21X94aT1vGX<2O2l>EYI3G0_#5u z*!gq-J?wjj!de{RR`5z}s8s-4V|ED7G)1kUk@P{@nRq5Ez=0J<`SQKnl$~nEo9Er9 z#s*XVlv+T&Q*wCBA0rIuf0>7;ZRbbvpQyS`6*riC6HKE&(7mE^$lT)1olPs~ns|_( zycR?w&FR?EQwQ7cP3DW)NffS~NG&fku-nB3SgQU(KILK?oz~5QD>sJV*+(aNQI*8T zJ93bJ-Cat>^NjdWm^0K4H375JouRWZ8v|{Qu|eukbaXb7l`EYPW_L8iVa- zYwKn@^8GeAwcf)tXHnwBbl{ls{z4zAFK&VAAb|&9{hB&(lAgPvPo(=`s}A;w{wi8* znCsx7IFZuax510~yDKcOs_~bcK@@n-n#+V3GJql@)rZg)7J+i zxGL=wv%U-i9qV(D6s*90`JGV5^#c`OOaUeRZL*+sUUY2wSlSb*#UtOCqfbl@h3>LO zdw)loIp`Jtn${PObXOIRk5?w+rfo3tTaS_dB9$w{4;RTUUee=te+>DirxK=K8-lGd zpJ`>`U|fFCS9Eoc;wd+ze(bki*e&-Pgna%0u{&3DjOq_zd~Z|iT@!^yexlIzVLcCe zo4~2FH(`x>J%uKY#n>K-TRLEH87!2{&HJCr^x|$P1Ivv!tiA`#(;D&TOE8Qxy21 z$vq)I(h?WU-zzkCt0Y-)019h+N{pym!i-d1sQKQT$4oo}WB#6l%ZE<&@dO6_B(f8r@WxXM zafkl`o_?&BeIqORw_+S)X{4ZUn8-^XX4B3IPF&7Wc;bsLB$>;k^F9yk{RhxH$s_b- z>uese*+pn`Ge#H725|{$@dg!BvYTMTLtU>xY-=xEy3(57b=iY)SxcG!IOD>7+bFP~ znUqVHiQlUuAhG9G`ZshwxQ87FgGc)K<4r$Wc()w(PFW7!f16QzJ;Q;TEp%r@yTkjP zQqN#`G%wUWKnr_^OEZ8AkU3^I)>}rQtcxvY6r6*3-NwteJp2f;fqQvI{VLqOd?>b$ zpGY=S!r|OuEs5D;L)X@?;E_kYIraV+PO+JfBhy#$tEWGxV1X7}U8{ijk=KNH?Q(wD zL0#-!Z7$CK&sD+#^%PfIJzE{%mn>D6W!nQy1%g!PW*hn5Z!TT7Dd%5hI2ObISh-TdrgY>TRclCw;Z> z!h{|4azh9n?06g6S=wEUm_kw0Ezst7E99@ANAdNBte|6pRS_QaU!@EGa6gF7vcWj0 zr5BI#^}tSw=S73Bnmpt9C+g+j1nQ}aPemO$OZTxtwj!zTuA$^z zzMS0<32EbUu;%1ja_m=D@%->N$TBzuX#x5(xIycpPxP~WCOp_)M!L@>hEC1^n7PT2?_P_Cp7EpDed=&{D%-Rv~W&_uy2$TGzKLDJlKTSMk%3|zcNleIDqv_q>Sg< z5@BBBFUpj-C4G(^7E`8spwj*<>Aw7%yzlFP^%^J4ZyO4eep=E@Srx4fi$tBE9*8;} z`N6(tbnH$jCwz&5DV^uzh4s5}on-~q+Qjfp*X821k9n|5KO20H9N>LrgZV~-I?Jc` z1OMjs?%vyCLu(3Y20P30GJ-knt2+-+1?QNYQS1szmNvBPytNDIz8a5TSZXJvF9S>m#r)R=>g|5{9PY3F^ z;uPq)nv?V83gI9MH0_if#$G8EHx3?+`}alS`5nmnXPxHfm;Muz>(Agvg9v;wWhq4k z)j-IZL+BKwg1;sQvAO*KHg9o7?})qN_$Yf}?d^+TVQa=$+ruh;YR<)0=YecldzEv} zu40{!XXz%|qOpPnz8P%}7d&plw={2P*BFTBZkmz*oJGR*uq9l-{4VME8KG*^4&3)g z29GzT${Mm;XnsjJF79e9ZX33rvhLY}LwTI&9r9NwYFHu^s#M{}MlJNN9l|l;l1pH! zG4?mUODl4;c=fc4lwQ9HFa7J!D&4l>p1GlU52bn@#yp=o?tO(n^>+)Stfv zyc5Glq(b}WOYrw`5tbVt!iQ~J@y>N0>AY(dN~OEk*|X&&9@J&G-KOyDOenFv0)M@e z>~Nym31hz4(HQGKm_A0v9i7y$w9!@SsYS!B#60p>8-|_Dwuv3ncT??_R6grh499L2 z(VVRsIM#hSdc@Di7r&B)3Dwi7!EYbUuh$&8s+T`+jord8H6=cw-$oo}q|L^o-qHGk ze5X{T4CYCtiCrvDQ)LK?^}<}U0hRuhVx zn*`md5<68ef>Qky_GsBEW%axA{9+9blNdWz2kmIf#z5%5s9sRIoJ+?{w*X}h!t{NE zq24@9T)sAm!!iU8T6=`mr8#SlHG2GP?+>cA8V#-Sm14L2LI^kRPgh*ivD?=+;n1pF zNWNvu{u7sws>d1}^<@hAOCGz2Dz|7?LA>xWKUrd$T9DGoN_g;VHfx4W#y{s5L3yqQ zgsGK4lyeC8v<*cNFh`u!r)b{rKBJKfH5qf%Gl0hlb4EFOJz%NB4Xa zDJ4w-yZ-9JZ}TkKwxgXG_|Z~+#mfYR{yo5G%uh-k)F>L+9zaiFK3utO!0S#fX1gj= z+}Ngs`;9YbYkV<7NgjVtH^OgUqp4!&0~n**DDD^+L_ab^ILT9khb%Xw)|P`5uTds^ z6HD>Ly4_e{UQbW^sen$E)G6G%jFm!lAx++u{9gvqr?HXP`dkzrnk;39MPdBx?E+3U zEur;CcT>UixxC@90gF=C$v?FQ*IR01|7$kb+xxox{m=U-srwQbQuP74ek>GqAM~S! zFg5&nW)1cp*cZRsETr{okBFvuI_MTv0NZa#?v8lHO1alg9<=HL^u2vWrc@@W9oKFa z8f!GT^43i5oc)a+PHdGK&78&R+j_F`+8sQ&NuBf5Pr(eJ<6P0-eyIh&e zJAO-EU|BhL*rh1;)R1}}XNHMmi?@p%)=Bq?#L+PPmw;Z+vaosdVe&t-124UsCAv%y zVPerg8ee%ImJQjCDn8j@ChQWEULK}d6^5V~myD(h&eFPx^C(T4@vMnf6o&hEz_Sl7 z(xb2s+)_P(Q@zx2v7f)xdtb<=tGi%KWGm^58mt(bK|bGSW0gt0cz%62M}$;B#%yJd zE;hn@L0hRLRhvKl?u1EYo}|3xGQ2rG7&q6{l45bD9Bz7Yd9f}UkDQIQS5CpAQa#Qa z5<^M@)v+jT8!Xd(2@AW=5oOL7pvi7FK73UN2R|jk;trNry7iLWJfbg$_(*5_?Hdqw z`wI>2aG1*SlZCFIw!^!YAN1m;KE_S&A^8W7(FG%I`t$H5@u!JomAO^;yP2xIQmRINbd&Ty)um<2x1!tG{YNX6$Vte!Vi^xIL67 z&l?S{BaX{nocJtEkBz6_L%Xng<4vjxFz38ja}Kn05GJ&VVEyiubjBN^=(Ad$*XJEX z^irmfwX^tk4{LI7_GAm4R`KDj<1l+lt{_ORt++Tx2+;gM=Z93_h>YFBx1;S8?xBo1 zw>?l+8IQj9*WqZxd>CBtoFY9nCHGe_8cRK?l&}&SH%fZmZ-w%w|Gd}(CgaN@OV&$F zVb|pQlu|bwHfb2Z`7?8P<2wo|74FMtEZ!lyj(P#HXY2&05H&`{mGTWP z9<;emm#1AjN_QNmK)}nl1yYtg^6fatx>b7YbKjy{B(y+vMs~&awOCKui|)!Lilqf_uq-^dY^L zFH$Zj&sD{cRsF^B5#MN(^$(~@Jx|xR1hK*XTyoTv{@%6KoV~iYIC#$-oZ~i7Op`dv z!Bg{KZmy1K=A?`sJ}x|E>m#tORzmA>?!4?kESD@&gzsA=8Zu zlf(JsSSi!BA`NxEpQgz}BU$^|TeuEsII1C+F6~)@FUKfRT`JL?j{D(Z_=$@46KlEg zZ#JD+l*-GSr{ICldaR#$7EVfC)EbFFpt4cw36;c(XMZSg@%p{k&|ruO{`JtX`ydP- z{+XP09zlRjH&$u3g>=JI`NP(t% zaFUe22cyRD6Vx=eGxVrvpa)wlc~+hmj{lR&wL2DxF_9-RyJWo>5+?0dMKF8%aYVRcnFK!j6wrHj3MW+ytRfzUl|yIogMqHRx-AVa&JUmicY~;2@f?4c8OKfA zjo5eTX(6K73J)!xB{6!&L*Iwp(4@u>ceu=gS1X%osCvCTFX{k!J@J4!ff+*icyB(f zWY^G}wUoXeIT#=K#tD-GA}OecpVO1mm|%UGd98V+ASE~ zepZ}3O=2CEt4PfJc+_8#K=Zpll)M?%?7qH{+S>+k%fA79;r$r+cFY77Pk^wr(;2A` zph8x5z+2)H>3CTun7w)o9@%n&GiL0Bf4A14>E9GqaLOjrc2|6FIDwx(d`ROZ@9Qrs zbylCG!s+kT*`gr@`i|YZ(N~0s4pw}*V?FPBA?@jenRMr5 z6+EBWk;!jm|$#Mu!4ozde|r)})(A1$`e z@5vF>iz>DbD@B_-mxX`^4K#9C1SXo&9md`f^p9Vpu-tsuejVuaM#ajGQHEU8(3hKI zB+o-rCiHkW1#7P-Lu+UsEHcf5msidTmTu>SiS6U*;(%y~balp@t08psLEnn@c4I6J zEEjAeRU|HBCkP9Ez+Y}`5egD^;2F1r@I;+uf0p&et7|$@)!dEzPbZtFE`3S*jWc1T z{Yic<&0~yW57J}785N6uQ%b@yPJK0zj?|lDL4F@Dg8pb8yr06eEICzPj>=1)hz|F= zfYP_qv_DgogSz#?y5~M%T6Icyz)*OJauL&T)Tfx)56MIN`2nT~H@G@@+E5)YKqv6TYyL&PI zz26Q=vy8B(RA`u3K3}NOmzWZ2v7$lyMPbZIE&T44#tPlSu<~IIrFZh@t1C{Dk5V3_ zR6oZ_r@p}2^rd`W%9UK0`v>a$z7pq^(H-yK(0p?Vf%MLJ`C$d!J@QbP{u$_KaFw|G z!+JsI?<5?wY&kAHI3F`74P@J^tGUR@0*>uZpjo$KLCn1&E{%%hu4DbNF|QNdaM=l? zB2p=!%7ZRx$KtmTNtN4j9WKB51V>L@ppmD$@%=H?)HWrL3>$T6!R!QKWA$6%j!&g5 z+C7-&&zHeZs}a0+)doJ=kOW7`c^~&7@ z(|*s#{*4~8r!}*AYL*qmmyPCV^D>!Yrz8ri7)ZbSg-Q&~zL@Yr5f83B2+Jg=)aF6< zJZzjVaEPdKSgQMpHpJD@33Xen@qI`} zy_LkEj{_;)*@~@RC5uKr@%*x40BN~}!;RQXDmmqk_Xi}SYgHu8=;nkQZl%h5^?U9iq|0H1m4z_Zrxq__Q>XvV{ZGyKs4j7)V-j8vIlI1?Zd-W9{dsVF@^HM= z%K%0!UW=Fa{Rf9ieaS$tjF$ELh(%iuvR+;)^%rOGrVA1S=lngmuBeLRjNLe`?_qS3 z?l7u^M{lz)KYUCwqR#|)qEQY7doPwsvfv*OcBCwfiOwhAz9mhgpI>LQtsT% zg2JG#IL7=u?U_HErZ(-x0p}V>U3CJiwpPbs_A_B@wG*fBaD#YhhTvzrpTO`h)MOap z1&Qa^b4Lkko6e{5TOVLfL_OWjUPmm!+>m`v~UR&zBTf$riKo@97M^vXU20Y7fTM8!-RZ5&9yvyA9|pmZ)+ z11|HGGLKIacxzZ3_t>$7KW1v;@-Sn3ajZLeIy!Pj$ZMRnZ4h=51>EGGiWm2H7R?eS z^NVl`PX5^g#yUjMzt;*JRlZZgNiE=wULlyDxgR#w+Tb&Yfm$^!qJuhMWPAk5eMbtD zmgaHQz2EftLAfk3a|z!McNM0*)nlV|q3~gnl*!i81FO)-wEmYDIv&_A|L@r%w0+qC zrgt=j$&W@*!-7eCYQ4EQOVx@4qek-g{j2ck_rX{>pgWH9+JX;v=Tdo`5)bKmwaE*e;??Nh$a?6XV~p2NR^a(t)lgU6K=+D!ve$`QGJ|Mi%In$foUo) zMV*w@Li`9{KBL(z8lTE%pC?*u^t@DFAAE}}pGjU)KUZ4weG%OpCwV6JCEA zi{B#8&}?&E-eaE5KXPQOk(Y)}Z#Uxcg*Wj~k1~iT9nNF zXLBi-v=-34_|3wzw-T?!dnAa%Jka;%bxPZiLzS)qdq)gqmmEvMJ*EKXt=<4-J{95? z1*ia>T4C>z4Y7B||$x0tKYy4{u)+B=K(ztZ_z`~-=!tt}gOcq$i1 zza-nA`P5u@hkk|6!lea@TpyZ>kAoeACr@p`dw6F^{=FOR=Uk*&ybXSuKcXuwqp)@V z5X_8vCpcRs@sp^tRPmt)_wE+OE2Qw$vN0pr{M}*bw%|NH@m&U4Z8u=r%ry|IDC0f> zt+1loPSh9n(28y8Y#0|Ju2fL_>e9%#U&HfZkCiN>nje>chM+@xi}?74h7#6X;eG!C9A;a9hb- zKKxCUS5C>6U-j(_=lXTwKkw5}W!+XO!VLG7JQ79Y!4Ai7N zW7zq(ayMxnHRF4muv+RtX}Rnb@{~rywZEItrX>fry*?uNj`>chg+UlH)JNDS&1f&0 zW{CZDC*%7;5_jxUn0Q=EV)XUU=I3KlC@@GJKaZ8zj9=E^j|EpjZ+#MetLjD?OA{&8 z*q4s0ex`i03vkZH8TM=GqE2@c^efB7?;Y}Jc)>7P-KkN0B%rsDvDcF}MO)C*8P#wx zsv6uE=8)PsEA(6ZgvNkA&2p;|_a)|odzZIh@>rc)yKR6kZ!010^<@Fx^+L0-!?f+z zSr~94g1V~P!qH9h@a~Ic=-Ib3zL@ut{N6lcZJWpBu>P%lVMYf~Tt1ao9R4kcH;;ow zKU2QHbTQc_w+X+^`t!mynW&RG02l990gE7suRcA8I$G-SC9QCD{LdC=TiQbQtxfE< zVF5eL+(aptI??s6mGJAG4zyi9i;-WJgL?ZTGFyBUa#tVVNhkKR+KYRnq8hq!ODw@;CjiQH$<3{#99dFi$eY*;dY_m{}zh8>OI%|b&SV!8(3&r%q< z&$R>Zdf!nv87XxiX4KKs14Yz0a~&y0XyWz?e?flG2ZtRptoUW~SulL>3*zMsv}D*a z;q~#ev_WEBl%}76mK;;qyjDY+B?ZH=hAlkm#UEJmx-UjK>9fV}JkWwp+_6ZPA9(DR z*{MAfYkj2r=7O$#|N1yS(WOYXR_T*?Zj&R%tW)F5`v$Y+m29?{bcWQQDWlGKbAIvG zo%i+26z30IO1AGaaaYt)aq|>wNILr;9bfqp?5A{>_Rt0#qcHwwrL--WA-jdOVtpkTOWkGs)R9Q}oSHrl&_c z2!?ADXyuMpN_pKK9$uJB*RR^MNAX%vJTysERmj3qYq~>V`(D|O&(WZ)mxvaH#!#hk z64sAfE?A-;w{~5N`%avMfTi6iK0;vU^*>~l_IF6fy%Sd`lW zP!f>>hBE?5>-kaIdO4AInxt}O+cIf>dkSV(kLLEhmZ)9!6dJDUfpOkkXj-ZQ-e-Pc z(WLbpJm;q{w4(-mSWqnv9_9>N%-6%jzDZ>9tArf3tfvdE$6&(gYvRpRTRQx2qcC_; z7yfJ?17j0E(8@XHJSito2!AVMi|j6-F>5fZ#dqWolSZ+5=;w;U=J`0Tx0JIk?Spsr z9*4BmX;j^+h+101c(MN$>Jjmsj(wVgUV4#~(#Hoz>maG5C!==7Q|X;~NVZhU#oG>^ z#NpdRSgtyeIt)q%`^(C3+3`PGU-c21GzJK-x+I8ZA^T}*ezR=Bt8>tA)pardfVMcT zQ!318OM+tx2gSWBiaC1HN4mdl3ydG(!&Oqp?uYSG$lacSRm~T0X!#&+ZTKkLoj#G@ zJ()|+nst=?I2rrRP33@|sraA5WXxQqg)4qMly&x#xK3+&3J0I*@wx2bdKfmw1pSu}!ke~(;jK+3I&UwbKmF%nLBVxd zxLzy8&mD?4&(6j!ze3O`<1yY-lln{ZwnOzv8(#iTazmzFkyqOU)9z~NOthT?MKv#l zExsSAdd4YSJ@2X5Y+p$|Er#*mC@-A&LkVwPPsN<;9m&058yTt@vGRnzyg2VZee4=1 zn-F$F*fXaH;t!?aw9@^sve}G}e2o^?=}Q^;DOS?mYZ!Yswb9g(c6`$0|MRI=6h2}e zbz0nuO8!lwug;Ts`oA}@I8dwgM z4@tfB)ultk!l}ym;!YCXe6<48XI++0ED3^(#;VlzYODB6>`I?H=8CVaE%>3kxr0T~ zQd*aDjNYH>&+B0bop{s<&+NS_s%<&KU%j0~h4*!QU3d!$`Qsr>ZxJo;-4CPO28{eP zRvE_X8^ev1EY?oam7A%!!8X0gw0GrCSgQ0=>}B?XPI@XptwoM>oeam0-%`c;>lZ{% zbsNx!YPjv=h8j)w7}uvOzH-qN>TR>x>dG0>=IAi!(It^y@99M!#7H{r8cnm@C6w8r z8W?7fPWS5q;QUl8iZy;D=G7#Fayh~N*Gf1=nhl;?@5!}a`$!o0seDbgfejv>pc88@ zi@~zd^g(wfnm-;(oqj;rnHq*;oA*=Mz)V;Y7D0+PjSyl-L-K)yK)Vu`hgoEvLx3BzU5` zj_34O;TGL%)b-*u^15%zEwRUG%+-0+(MXvFnb}b7f=%%9MG>B~sl)%0bntt7x)?g5 z39j@o#$>Zx9M+ae{njLq+;2TEXbk4urvZE+@E8|M{?Cq62a{RKYhlZMMeL~`E8ipk zA#}L$Lb!Lolok~0^Y(<5yt_h$qkP^`V16Rc0?A?gpEG?-{tj>Uvlw~I4g*3zo(~rC4bq%oiaa(^E%oIA^)mLY3X6=l&poz4Ymmj8V8CO{?0_1>I;an zn9l31mf@m9x!{$d$XB0r<)CE+$klV%xh|8!CIn%P=N3$q`mhx*{!!Jjt3We7X|q!T z%_}OQo9|aabD%QY705Uw*d9lCtd!~ZyC5DPCG~wKgz(3e+2kHR1s*IaB#oC2{C1x{ z8xD%4d(YkBoJ$Iv88VhC>$h>X)bE{dcMQJWDB#Lvt}K)KLn@uM;DGiI(la%oWwl}G z`zwZaN$9i+6Ze?~TDgOr*LbwDkVpj;{<5*IK5^nx55wZ_jAd zxw>6)%q? zpMozA=}=Cmy-#9T?E?%rR1Y#AADZRZOPueV!>YyMyv0XZn5z6x98>jAIJ|KVW#M<~ zZ03vU`%Q2~4@V4kcV_Qj8C-Q-%Dz5R1>1fz#iScrw9a%7OEp*ap6`e?XLYfEsT{yx zgs{)+;YqGNr%X$y{_<~hMJrhRW@{p!vT`Qrd(Ra54L%F~GdICu>HB%Yt_|RmW{ts< zeQ0@v0Z$ld%@r<_r8~kxRI;lTCtKd84(B8XW_&KhXQhEgaV*_RkzC}L8p$I5Fkf3f z411X?@-g=&;qV|A+`Z=n4qKtfa}!QM`eIM+FL|VEJN%&=p*wlR>90aat_E%!(*&w_ zMX|~+kZ&ewlf1JlryjEr^s+UvyVXNTIukB*PPF2I{~l9=i=XV!5`FG(e?q=+eNWa@ zVTc z8+$RroKHq6(hjqyP~M!6T|)P;&I2cGgvCgoH}i<_IPSSpgG*+P65iZ*rl=ct=!a)M z21O^3IqZe|-{N7}>S(A++DV}?7Wk^IjglIpAy0P?|H>Ay@!VdXb2SFv&F+GIKH8$< zMLP&{@`j;{eOUew=vjI*&3BuM3AzJ0>YpZu?Aal7muvF-f_lMgz!AYdwF2HYPk;xc zhQ%s9#reDXaF-3GB)(7-M)&QA!}9k*-NjA3xiAtZoPI*?ZC(-=Zk2FgnI9S2H*u@} zR<>~bPK$kvI3~J`J#I`OZr{Tmt{HUP>kVlhdnBr9JcUt9B=TRsZ2ssk2pjd^!x(R8 zocAIFCRm*%UB_OMyJ8#sS1P&4&h5e#TW`rvwS>z3N(XXK=PnoxNb^s^cT;-kK%ax6Vgfwe%?5*>oK8MM-`}u9nBlo&%%@Yv`{m2 zE8F;9mw2FOsq?~-&{Q5IM(Yj|`Z%P)ft#g*Zh$AqCHG-w$y!pA-h(|%lrU{pZ~8bp z6pWz`p1m+7-=Wd4B%?RjeY(m+{iOWFW$9TydYf=wHL7ZM;(rz?d~59iir`HoZq3It z{#!VBVxipNd;#7aN7TCPCsa({A#d&*&0F&>@sYYuGV9Hs>DHBWh9Z8G2#?8e`SB960)7ENe#Iskqq?MhAMQ!%675#X)Smgc*2#Q8hrLyhPbV#Bi!hgDl99vV1sIXHof;2 z4_-*&>E5g5>uztxv6_{XoArtl5>C>eRCjQ+lb*SYD(pVGGjFojLVd+tS|&RL=1GsC z^w4a~^;y6jt0wZ{!UhT`_(BHn`tTutM{drYh{lHAqMCslX5KwOq2t!zer-9q-EX8x z!H(GMG>TKU4x}aOPoVylS-X4`q^AP>Ik|3gl;b9qg33vHdbRv2{m z9#nrnO4jOXJVv7o&i>xQhua=d3tpykxnJN_aVEamlrD7ady;wv-V!1B5I#ytpx1o@ zdEDrU;^wmls8ITbe{T1ZeU=O(_gU`vYvyVBr*TyjtB{KRJGErydzW%utT9JUmP^@; zLAXzKFfMx<#ffi*V7F#-REyQ7p}#C?=ZFD3udtXt^?w6%3M5bK{r_lclm_>zsHEv} zATc}tgM*c7yhp>4zphb4+bc)0qwQWQ>OGumA9fZYDi87%dm5a^;J&taAA( z47QJkO?@or-Z)G8+*!&n#twkWrcUr0lOXwACoU|$D|$-3c&#oSIdPsIKYe{x4EStI zL(iJw0-sI1sMiI$?~{xH+hl09cp@10lDzT~V*?r=@}xCqIRE!r@+wni1MhHHG(i=6 zWE_C!S7-3;>6P@jPpaTMVioS%Ie?Wkd-M746VNqJ%Ao4Iv!mNj@q=FsC2qIHH=S4T zyn#co`=8AaD7hSlN;nLimM&~{CtsNT^OCUV_Fj%q9YVqRhWO5KvFujpecpB~kg~s> zM3=Ff#4hWSdGxcd^uc>8UOT5vUvJAfz^q=l9$|^9YBl0?=^Q+~zB4P_z787;6Cvbk zF6>nYs@-LT>D%I{b7&J8ubYj!Ph)6OWeR6X8R@c$Dysi7k%laOFPxQju)#CBa`B-& zw!9}NuP?b2HaZUTEPCRuukwS@j) z7;&23nFP~=#9GoXNnp>M1su?PMl|bQ0OQ1m^lVZ;u8!7#Ptza4qSu8&y!}2*hzn(} z&Jts0stA!IB0(tKiFNj$V8csKnK&v5w&drKhK>?Ac*ntkTi*1xX91OI_=#VdvxVE~ zdf+4NwN9)u6(;mM1sNLxvuNI)ITzk`2^Al0%%@6i$pJiZ zs@S$s4R5EU^Ovj%Tvd>UTWzlK!s1h`jv72tIgVR*)qv5QVGuF5wPF`$z?$aQ@P5++ zYHeqz>J&#RT@(bL(pTX8VIGXGT?8-x2|Q6K5-!y3qw2pNoOk|^d`WXJ9N^=~k9GeO zD-&!{v{Z(Bmn9xK8^GvtU*zsS|51;m;};h6ju9iHtC9qK=kPrE;T3kro~@e3G~ z-4Tq}odKH%!Te`*BYfy3})?u+{kR`vX zegKzdZQySUPm_IA~+&U;EO5YaR{vW9*ZU8Sos*U~{UD&i~ zC=I={U-AdKWK&y9qw33&CM4_ljwT#@OfA3>FKtQ7Oq2I+dlv%_;kY{i^1y z?O{{#cWZb48F-bRC*Oc4dh_}D;LX%=-AFJy=*w{dbJ+F!blEM(8xFVBmcrCw4($6x z9ScV*P<+)Se&8I63(jfce(Ptn|5sOznm3W{Yg5?yxDJp1YJh{JEa{51<{Y9380wKu zQ#KY*h)XdmhE$4PzBXh!$Bx?)iTsE)A^f|%wT3ZS88v;esAI7k6001MWF*Ha^9*qd9wB)-ZJVF?HVzkSE#zlEQcFO zoIQOsi#5cObzNv*)Eb&`;FhrcX&5&jQo#?EGB!%;O)LM7rBgR0KKoI3T9m#BZ(nr7 zb((P$@?i$-HuMD3Zuy`|sjGrHCi147ms6!x=xc(F06Kidqqe&ut@uao?A z`Ui45aX^+5a!LsJv65bHs}nyoY^M50FGzk<`hTxHM$e*S<%Rc@!EZ!wdM$BvPnHki zK3%e@;o=~$yio=RHJ8Y9Hd|st$bFf@(eL!~nn-o}d&m((cuCPPeizh6R`-%hU z-0vk8)>XhtC1XstNf5;s1L4otUog8-fj!z!P*q$Nsmd&%m$;LT-uWgRzBB`7Dpbkc z`uD)hK^H*7L7j4@v;w4u(EQB;9W~ua&gcPaW*gbuIB3t-1Kn_-??ue*c1IZFr73il zy0Q_~lG!{Wh&FdTL|3W@uvy0*JoiEX_xl&fRlA>%zQ<$?IW!2pe-45{BewD!(|A0u zVgVZi?$L%aKRWiyLgFcWC%ea~g8h#_l-B&c;$woHI8XB2VB-%c>JlW5-Rgp6l|^vV z?1udQfof3Lz7IT=9?-6s%|hzOTKI4#4g$Wv7e*`}fzv`fDYNVl{C=_zoHiP65@tp{wUelvQa_Kt0`zh{nsMd47|5_?+Wxh*8c zZUgwH#afGa9o+%&xxnilGD<)aOFSM{B=10yQ< z?GAs}G-6f%Ph#i+O|+hHoVt8_1*L*6Pr_=#J1KA=)6dBdR_0s7RB-GSXICym*%q8G$qdMY=X;H55SgS z1uVA9V)qJy?fwTPr-lLd{2xW<9ar=J#qlI1l{S%zN=VY6bU)`5DiK90D}~IE%8Eim zQ%VXIWrWBk8uxQfWPZz@B^lWxB4uR#KEHqa>-M-|BdLr|Cc^-|gYZ zx*Tq)@BsYL6~2r!%DA|cwyi^a_rw1?Ytbw`nC_=QGP49zw$=)m@J%CaTKPx zFXk3aH!k@WClA=X6C(7^fNRMSQU6vd`IjH$2kM*o_h2hb)U}0~=Q9NrL623sOTJgh zH}J35ZlS1KAHH$7nlsWXpvS%4&`I-H`QrNJ{B6xZ*_i2u{M6S3|BWe!Z%^Bx?YRs( zNxAo?KH0Ef<8`XK8BNBmj@)IcBNvB`PJTEq97%M(S@*6j6Q{h#fis-aBIX|b@t@sJofnmNA9UKSGi z<7v-wNN~6XsiRKf!fUH&)e(2HJJcQT4`_$h#6xTm+Jiz33~|WH7FyZon3&LeyLjut z4my2P7ebysgSw$7$zb>wL0IuZ=w7`YJ2fle&3s$=grgboZrfK`7)Z{xKQ9HJ4hmd2 zc`9BU3;Zx_ISo<0i@nue3Cl7y*>ceWcpREbZ{MFlqm4_rhsQt~9azJ&UaW;j68Exy z%O25X7m$K@lN2d6lHbXyqqII{w~|Gq)$N;+cH6fIPmE_u#WoVb6= zM$u=N5_%t*$zembfivj~L(KL;R-zG)4eWuB{PN)Occ~*YCxIV+cIMMge_)ex8F=>5 zzJf~nUOe(uY^Os6otcl}s zyWK@vzVsh#^++M*<-xFP?owLpa1dC`TBv5qNR;-!SB;N3UL~|#qVDFA97`?)l0)~E+6-AsC>a6#O z)>%qmyL|^th}cI#fAXCs7As=S=BZ>K63uIS>S0&Q5rRpiAD;@Zr1^VKP|d}!aOUL$ zYCMLhbFde!TlNzwYgSOL!Ada5^X5&fozP^#Ug6N_YxHHS1Lf3az?Z}O;LoFHVp-HA zDr@@#+d6Kffzd_uTW0|3E5!1vrTbWWswI~_IxTj5(GD$sm&6AP{m~~;+Ib4YS>w}Y zzB|DYJ2Zc!-!ETN=9Ds0j%}sQK1Mie)i_esjsuVTGB9h-!Y-FFjc z4$0|CwJ*%4QDdL_-m8bL2aEm#)BD zcX_g##X|h&x04o`cjutf8L09s7Qbc2kaMqJa6Y$)*5q7CFPUTNuVb(( zRgVwtJOxAD&w{spn&_Dn0x$n|&t%-XJ6tT9sm`~b%AtMM0V-S+E8JXSi+ef^0P~tfy#G-$W#0HmQ}kQm z+@*Z}O8W)dx%ooUVPg)9K1bU1=b?P@A=>_LHUE2cvHVH!Cfs=8EZX<$k8xLg(Q(>F z{&jOGof8>ZFGDS{`IGZw@^u3Sqwvet2y%j_reA+(NlFtN zHUK3b|rWkK+pEl^zniQQsf45+mq9 zTmm#%m5WCG4aJgC#T0p~MNEx%z*7z`sIYkx9sMe~A3_#@=jD-L`k{>`U;YExI)`BG z+(A$L0^#E~&+l(5>GN@I3H z=*VMm^V(e8D)p_xz9?f;OeT->%M-;}gE{J49DRJ5DgK!z^&P8U2nD`wc(0}d59(!r zPhP~scQ)8+hrl6m0gIhx(sn zu<@}oI4xL*;XP}F%wv@_;Pwq@UO$3$+9lsxhz0hF*20JHEzz#qUHRnmvw522IP3b7 zWt(G?xvP{VHa!$7_$eHQh01yq+rfoC+cZ$NxLTOjo>&?_`4B8CjpW%QtXadrS5`4q zgI{QO!w3Ceu8Mzg{k^J97+eALT1Vc6>&~Uz?$}qXh@dHD{f{ws_;Z5-VI72A^y{ zbA4eEZ!glOi^tw_#6QV>{`57>>A3=3KKH<}gC1c(jW+%pdshf(pTW5YN=WZ$GwrkL z%7M?PafaUi`DkbOz@eM){qO_ADGIEA)rEO{16}w&jlO<6DF*(t#$>%-Sahsd*xgp? z^rxj8KKi(xd$j74?Z|%IT+>X~Z0e!z;ZHhi8i)GoR-C=Fqp(=xJ6xNbj6rc0ocHt$ zgx#9WpL13TZ=N_&;+k&wq;(t3k8;CxhxPGPr#K23O2X63Qu?cDh#H;tl43OQW@yCmv8xDI zt=)w1ZxN_z@R1MS8HEqfqJ9GOTE!zJyowar!r}!@o!rF-!;GE(` zINozO$EDj~!B0b+GHM&eMb046<`^jzRt$S<;zwr7M)2=~cviCe3^g4R*kVjN9HFlC zb7Fz)%e{C^{Mt>-Oj<%0omyd@)Q8!6!vcH$+DeyY3*h2EH%?Xsj+;_U1q19^*sLNr zKAi>+ViIXyof2qO?7&HfJZJ%BfbUgre*Iw{dq%2brK;2kTs0iC=hjQS{ulJQifC=Sf*k$O-Xa)d`VDoID;=T#cEwU)`CeFZwVe9)PT`sV zIk9%V6=vBF!LHLR&^2`uFNqkycO?#Mi~e=$es(HYcCZ7dpm>;c(Ev5~IIv}i4%b?} z2KD0YqFTjvirH^RZ(Gw)?el%Qr&EnZl4G&ZSnA0RiiPVL0emnBNv$Q0bL(TpY>uGE z>q_Bz%VJ)1(u7ZU9gn7$Mp2uGIrLlF4)Z1+Vbc_Cn%O;;+u!h1zu2rt((ZvtnSaRD*n-0@ZKq?}#}TACWO30PE?t`e zjmpch`;JqPy*_|)etn{|*Ha+UJxcs`s0WD;mHBk7B6@ax4MV1<;rx~Y95)H6^r;{A zo3tG|jb1|T856npiOJMq#%NGdYy#8n*GZ-O6!v=Y79U;PFU^!vh3r+iWFKqBFB0r= z?$jn&f9$Jpcf@??v``=@yAOBWwAg4`0hyRh75jbI42w66;+hpE*sysvguQhGb&sJG zRWlYmLk+Q`tPFdOiK5KY~oS)wSdy1Rnn|14@yzn&KlN~^^=l;Bo5_tHwUBaEwQ*n*@ zBhvKACAW<}+~QxxJ^uxX?8ZF@9tZulX32?L<$(_7AYq6vE}*246LE_mXE30Lc0 z<3!;E2lsY|jbFaW6D5A*ycxUb@zek|-KoL;DR;S|&r-=%u^o3+4H7mjU&+f%r94fw z2i%%xDO>eLfxK;pVcv|-g6+&>w9u{(YL(kSRa1b}V`_krFM8AHwd%6sxOwFP9|VlF z8_0iS56Wksc4W&AvHYQ9SKd1;i2t~}me@&?@W9VUP(R})XgV*Y*1_G#@YYc}Joh@y z*f1F?f~ZzJpJ!3db*D$%0vPZ=&iSWb^g``4r`fd#2x)pL~*z zscMy^CHd991f8JOzCq~wDwlE+C9n4NI5sOK=$(5@IM;P6KM<^VQ{NDrDf7eMYmU-V zMH6n_H&xe?2e4}?XJu((B*{U;c zc#;FL#%r;o)W7(1<`h(4mSew+%?RB(bFGCRzn;642X1)JbN46kyM#(HrpOXi{NITe zFCJw7q#aaXs3qn{pTTeQo>hd7$mXM8w(u7dSIK?dNp|q84SIikA}X!CRWWEn7myj3 zz_2yN@+8Tt;+dSunfou2la{F%P$ap!-PKv#X#|&B1Ng%%i7lGvk5dE7!6CyK&qh{} z&E4-*yQWbXa=((UJ3y!xPtdYZ^7irYIIS~rxUSn;nA=-yjRgAIxgMB ztIYp`$0Y?mIrcRxk4+OyA60_N(4Vxfcs(3a+Ke}n#&Wm%Je<3U%BP)pEu>XR?E7x% zn4u^2Ar8;yBa@OavE~v~H989Oe_!C)+gw@wBBIf+IAK-yc&Jub4V%yIBg29-D5x%` z4b!(tWn(>_5t_^|lnjOZOU9h0vlyl*rh&VS5!jBL&VhUPlI|=OytCVY$6t$Q#n+|O zv{Le`8W+P)jdSqpsS}QTJq4G4Er5lVz4?4f9G*U-%WFm#!idBFiN5{S1rM8A!ECZ6 z+m%S2)_00fG++q%rhJE>_0Kus4?}vY^!;4wz!Qc($An+|FvnykXzk4-w_l$G9ZduW zMJ0T4Cy_=v8<4cNh7+#m#Z|*{@Xj3%IJBxp|V@m*m1j>+%?jb{=QbO{!kSk?o8)3 z>%YsgUmm4M-A%%j4M$0LaeweW?T_>OYw(>vy~O!P4#2>gU-H=!+fr9(eT6~ekU(M`x~1?{fHXYlEqSlYA^Bf z^AlKq;1yK-?t~9FJOIaYKJ;ATm0xu}h%u`_$;UsF(?4A=Zmhp5yjX3{&6T}*VxbO9 z`1yc7CN@G~s|!6puE9fA?3dlu?8hE0cC3DCpJ4ocB|bm7?<3q}J}jr7BNoY(p0+G;+l_ ziCIy*Mi8S%3`3Ps`>6S158muqCAd7);`9wsEco8f2pux#~7H}X2yT|yRlu~7~wdr#!|Iuu)R{-U`3&kt9&cQKX4HR=fh}@#V$93OP zRMR_XFxLYcTWzjQ^XBYM8{tL#NZe?#hRasokZ(WS71gwJKxNP{;rH)!%zx!T{X>57 zUe~^y@WTf~#(AT7-WP{o3gd1Gj^Mme9k0Jnr-0&v!iKp=_~>U3KI~zFJ3j8hEUgYO zC~Ox!Ix`*{X0PYn)}fHoKaA@-52Pg@y7Mw^9a^w52VYD{1D}thAaVCTF|2(Hsdv6C zE?gRdrWeNJyS&p>)ttb;hGaw9Z!f;HJpw-j6Adw%gDxj$pzX#*cuTz(#y$TE-Jjai z(5@0s;&K>Ng9WuFi|n5D5=P%&&bMu>+4|K;UNI>iZ98R(gS9j;Bzgz0kj}j8_RN6K z57eM8++Bzo)d9MfD&qmW6V%t|IQZMGCmWq1<$A{-vukuSe^@vVr<`%+r;QuA{c$rX z_6ZSB1SZQf%nbOjY%zPS(1cwF)G=K*hc~9#h?8o~u)cKdeZDW9Z?PwY+nb4~I%QT+{V8hwX@3Lz(@g=S*NK+8rDZLt+Xk zZh@jWV9a*1jv6U;?%pCM9l8&mb=s7aP)>*E9i&Kwup?5;|;(2&-Ul7*XuM~p2 zzJb=EhaqmK2lt#Cz;!R}d2M(&pItAxVGpXZZ=W#S{O=#AwdR9U*<8@^u%OVHEuwco z7UZr-l;w-J*{Q^vyS&ek)ox6|$?L;GE8_$-djm|5ZUn+o zTP@Py9_L`$^-WxGAqz8GuZv+FldyT=P`>?o7ktn&f`dDbiC1+UDAjWsy3LM<|5PqY zEkRA_duKYYulxn4qDtu4!!cB$zD#ncZik++W4Ur(xiCobcc^J3Vq2y)capLa%VWJ+ zcgr90{u0S|BbH%J>JjnO-Uzn2UB?p~mO#P3=R&XbpM+iyTFG{%7XF#M818p&7V}Sa zL!H-|bVqF){^^h|4(~URJ6~KxFZ!J)044EYxDS@c=n1ET9?{e9XHSgtM^> zjz#xov!G#kbBK)FV~<1SVMVrnZzJft-hrz844O1_y%==EgycGw{K0xVnq823a;eYZ zeoUcQ?olW%UX%>0-^~&WClBH6CNm*^_Y3JWnZtZtoz3qJq+4mkFAh1g-WhkUD_w~% z^958nC^><;IHJ|6j>tb_aQ!d^j4w+QlP*4owI*N1tEV*3%DgMJm<+;(y*EJP;VE%Q zRVp@3Jt*e)^nsh_kUO>a#x*9*s4C?RrZ3cEjWcDGGRcKre+~iV4n;U`PyzK`xKg^W ze*>?K92(>u4;}q_p|$cs{BU#4|UMpuMxQvVEp1wqu3Z{*aT4QEHagy2cn#Dw>A@YO_BsY~<) zj+@4Sox1e@$B*Nyl}m-cRUgFKz8QSUJD8tE)YC6LH~0`Zm=|AN!k2#-Q-ORV{{3Lj z*S%EHXjm4<@BS!ka!;b|RfoBMb!XZ2l>$#nN~faR%@tK@u{ivw21cGyW!Hj8{QEN- z9Wux8`oua2{Ll#;CwJ$7qA_Gry`y5<*;}$VyZd8uo-GWntr4DI$j6koZ4j3pSFx=8 zsra$lh^=nz7dnXFY3XK5eraSySH5YZYI9bFhRrJWU!;U8=^asVU!|}szMb;_Nbgzh z6mrXmku7!@C@~&X@nf+o2N(8-9#W?(8gcw^7+x=}|4*YKwru>T6*fTHrR}n;5$%Rx}<9ctPa} zoa~lDCCaO5q*b;|zO^^5l(-3xZL(?lz;w{J9ZThf@v@V3Ij~pqNYsBg@3bfRuCQuM zGCz2o2Y4WoWzRIRVOR*16pVz;VF8e%*#!Tcm=CmOgT#c0!*69TNH=bctb;WPwKV}! z=EFgzJ=u`cU5B&3;7%(SxM9=fFS2cFr>X3Yf^)%)pR(eKb^KwtDt^Dv4q;NJY^Z(% zT)dct^LQ4!N4|yl00q1>xf9yPf|MncX46@{cz9PIzVq!7)kZ%Pue!{q>#>Pw)_j{( zrm3=3Vh=FCWr1^EoARlNmi+nBAogG4j=l0#d7{fk^xgE8yrq2#S(WqT*d@3&Izs%V z=F4`64SB$+O|1Mykhf>e#hkt4z+|ZpbLw`Q6g&XCt!? zJbaqO6db$&{+O<3)2c)mHn#}Yz1PQULb#KT(J+jCC(Re@cJR)MKrATtw*%|abx+jo7KX7Ub15qC7Zs$0aGqKl?cMc|Mz7Px zGhL#EDpNVgPjtuckF0UR{4|IR8^+ZVOS3q~otvXOV8JUV>^2nHuSb7qx^@HxzZZo= z-F`yB-VnUu?8BXkwfTj5e|8ygo?d2j0{@+9SY%;=rzTybVa*RA$>pp#U3yL~ebG;-jdou?W4RGKsv5(oQhjcD zJCc+}b--u$GeP_8Y&bekV#fq@L2HK+@y@5sg7fnUl6N^)e51Y{(w^TCdr9{v?PGHC z4su}48Gh&=aS${Wmt%*Pxj5O>6@A2$p#EbeyvlXsonF@|wrRes_uC?;qdH|U!84R! z_p6c1q;t!^qapk?=L>i$e1v-%z1ZbRUvAU+j~w6srRwe_wA^+G$DTOOBaWrw>Crn$ z{@Rs?ZuR2ixdEWlw+9~W=ZfKDbTIR`x!}2f7rDQAFWmzcU|s)BFsUSiog$|3x;3LX zFv5ZBzW0*9zb&2f)jQ$%>7VI?%Pekfa6;$59l&5~4-RfN$Bs+i!sgEQ968?vt%ntn zyi0G4n3sm#@?6+o{XfW1k!H+=p0MmcEu3C^SFCIc;TLzU`1{5+kZaU~FPE#6sZ}<* zx!i+I`|iqJ^e%$cpL@9fQ#|wcDF1xbi?gKtUSY*UymVsGxvqKe$KRQ?pR^<6n)hHm57%|52uM9cj#YyF1=rw%Rl;s313dSExc7)7EVq0`FsM5&n`{YZ>U2rt zgO$`)>LpxJ8--bWSMtc~Q^bvH2cUn=W~kbpCI;1?X0t=;QU);r3+_YOJ?Jq5@T$R?Z~;;$Kj8CYT)6vhIQHw2+?(CsC%gbk5aa!rr-hu zwG#B08H>3)%_ZKP4ZeSKQT*LspMCGvv0I&rcyy~=R2wd3oqO$~%R}VYoZb&dt}ca7 z*FDHmd;?{Jw_-%g4ap~#N_iFQ#79FmL2lYUNa?tWTK!hh(;pr1s8tXItk#74J+=7I z<0)7cqKUP8>UgYaI>#UUObuJoc&Bd;9hf9N*9Y`q<+(55s=F3yc6Fc?1+GwasS`v` zxB+Lx|0r#VE*o3LV#3o}@nO*rJosod8fgaNsoDd=&46#@=AO#WGsaVqsu!1hErG_T zrEuk^#9&wH!z*4@%HK9`ldr3)h9wtw(Mzd2cGIwc7sq_%Lvhzxb?O7Q9rF!3Z;7A* z;ut*Pm?1Q58jbPW=Zl^X{DtTB%}yg;8B!Mw6Ra)GzyNs@i*XOd)zbOMfzl->!AR16 zcN!8euYG`%i{nBNy_GysS*VWEBg`D!2g3HX(U0W`6u!t9?3FUakt1j__jNkOxGoJknL&ZS>$xg53EBLKhf9w?`!i7;^ z>HV)*+>Y^}G;Ab2`EegEjk6=!Fhk6&uq5~2$+R&ewW6nvKOf5739;Q2c-~w!x~Qkg z=kor+z%WG`XmedY!%U42N`0%LvoFx!zLW8yxjj61IgqWA^vUu`2R!tC5l)&r6GPve z6kgn0Amt||&%(Gw`fK%1T$!GMdomNj`JUt@(^M3EG~!6d;;NYIc3)!fnxk9rPO)3qIQ!!7~Tlh9P5Ynbx2UGQVyfecL4@IWKQPUDJZmkz4CXVA(YL~?g)(@zl=?GbR z6_Nc2MVvS!0WxYrX>d*_)=crm_9kiHAaO*>Lk42!4%Ynd!YFKhvp|^B?*e(f|4vRR z-Ff(|6y93q#8sO-`M8$_hP?6S-Bmekq1~Z!+ZT2Bsn2pr~Y!I8V2IJkr4b-sT?`14E+A9-De zqD|6o?u`X_N#ad*o~csVbJiPPd@zmDevIYMEoS1v=;M%eYXY<$>VomJ5;6F5fiUj! zOv?Uq2>nx4V662RP?L5dr>o2``fCxq=%PpAI-lS`=PUZu8}P$GA|+&KNH_BCPejDHN@< zf~QHtVc)2}p!_3ISia|i_VsOXn`!)kE6_f~ z4K1g?r-{FiEXKCnHA1n~ zcW^eFL3*w;xKC{k_x%!wE`zhk;jN+6o0st{j|8#mi<__?JL2y6y|nT`cP^<~4#$=M zQf`?@1uu-z`qEPJ?WD_)HIDa*2gH%QmD0^!DYYVqlgqVW`21cJe>Dc*7<9lVSB-H= zn8X8d$V4xxA2?{yOG@N?(%LGArcp!L#3g~cc#L%bZGNqOhP#U0l^ zK>C53OC7w)-;2aWr>F6gcw1iosT&;L z-(NUqt%sUZH_*dM3-0b`g7st0LPBOo&fv$i;;0U#{CB3JAT$iU?zm(8!dU9xXC0*2 z=JVw#J+OXWBTek_je^JY#%o_r3N_EKlZ_Q(Ojc($*g66~#dd~OMWyU6>&kbxWWdv* zY1A-x8r)jzA~rNgZokm}EUq{y<~SzfUYjm-Ce?s<{tZU!0si=-B!l+NTEUhE?_p8L zRA^n)0s91X5}(Rle5ap8;2@K%;XTON0Z!ODGvQdyf29yXEo`*_ml=W4=_8)4XT z^bHJ%+(K1z{e-L^A2^_?E6)7Y=5$Bu7CEh3EhyZ-Dc0S72!hj2^!1)Y#?QR?Vz((g zZBm;!JU3M;}aB!1f`I#B7ud%Ii`y4Neg zsDvXzUcqr1Uyw>OS>9*dg-s$sDGBkH}ZE7W=HVa+){g5B$Ul2?wv6k!oR9X*G= zLU+>V=wafC;vIBEc0fq*Fv2T0(_z#@1xQHhN)dr`U_k9hI3Dsxeyr`KXx-xg+14cp z#j)-5;CY@5JH(;NnfnlT>nM~@DW&U$yXf51k-W_jv0&DEjt-G3pOzJc~p_;k$Tc#b+4)(q3ZMRKbdbl_WQRMC(6pSK>5u53B^grJxh%{gYbV48~| z8kNjK`F{z}p=}E0`72Rnuq`|@Di`)$-^dR)o?(p%lEdilFa&FRUXrq&{zS~fiPHVB zuhbW^uIY(K$Eu>)?Q%Xg_CH#b9#6lc#?Y77I+ze0g?AIyal_x;P|<$^j7kclVbTQE zcJKfpyNfl4+2-&?uR1z)y#mWSGhQjIr*g4EpdWX^K06Pluf9q1+`F+-L#wRMlr)K{ zzEfOct;R>e4;0@zah}CHn)ld;OU)uF9)M+p)w6#CBxs+v$1%y=)g1hO-L}O#aiQBjTD~xunUyF=!vgv z<>;>xK<_oX;6l?T8r3`iUu=`+5B`qAsAWZzmXX2{0Way1a!)k=aZfam*pl&W)4_O- zDn^y8Mk5DdF;1=Bq8C6OMlSoTav5G;55$7oPvK3cH#EYM%;bgo~A0M za8B%ey5{>-C~E4#ca08kKcDlUXVw+(NPC+9PF;ltZ#BGXc@vBh#`2xd+hK&<8!OBm z3Eu4iKNkxg&}S9e6~xl$t0_>~Jcw6*36WfcIuzz5-M@3U(XP{7#ZRhop--PonEHMY zSN!Y4x=Ot$O5!rGWX%>VIM zxF2>HLe{BjvFBp;sL8u7fs zb?9XNiHa1Qoo=Y?rdi`o3kUp!q}o(keG}>8*(tblwbZ3OH<}$=7J%&Z z0RCn$6hq`p94RMYEq7+ zZti5--1#F6+?qgo`HCF4Cj~lpHx{lIDPYW;saV^*8)rNZ#pKo;{yE4PcMa`GIdMi9 zoOYP@eh#Ok(oJHNl(l@mN^<_S>}P`p4cgLcF+7@@Au-XJMjtGMhGYT%cAU!R^)J$A zr7dz}!*?(WnP}fr`n4w%e;Rn>uz|LC)g}XMyLaGq6OuT*$1(cZeF6P(ilPtijj+n% zsrY)*H?hmCTH%q$Gm8AyBF;QtN267g#kv8#@WfO{>|&w7|3c$o=BLxpn&iSgCftUf z66^l(+5hljnIfD7@PpN_P9+kbF@Y%-E!k z>taq(&#&*DIz;+l;kXvsdB%|UA1Q-~?V)V8aw?KR30qxo<5O!#;k5}8kG0D%tXTCQ z<;|&pCxKbGuy8f+F_w62x>MNRMGpr0`Qz@8SQz2w!A&Da(Y`~rm?ul%mTBAJ#iSy9 z`+g~|PCtpS?7MKdiW%p<=*RO@2GY{FWjNsIQJnfmwK8b1L}+@MOqVVf!Ml$`IVMDj zX3_{S53rJ*I~%}@f>b#L50ObM|x7y!UnH2Gh&(2To-|R9J~@b{_@Qnr|ci&Jx&a z*AUwH`wLKUS2WU|iaI{a#K&8AiMlEZY=#U8jX_`%WXzMd^yL?wH*xrvG+Zd{j~6!9 z!;UWJ1f7Y|IHT1c-6LZ7?bJ=ID4l70pVtBR_;Et@1wB4DVHB1wI7S*ay&;Z*sLX3S zr3gjzwzED34#|Rkk}GJtwvWW@+Q@%2bRh4;2*lj=813{0>W|-%5AD|gLmY?W^Q}GM z{ePR_d(t+Jx^#u9s|$uVy3;Sa-E?!O9oTC`lfCL)*wWh&XZ#w#7unrdF3yTX;`s&X5V+g!L?2-YnJy z)sxvJP38$j_^_xRPWqh3MW-V0ho!Xpe|Wmi!sAWJAO@7ki0!Zcp7eO~9@B%5-Y28ar;6sNvRC zD(ez~xs88hi)F9L&-E)@rS81xi8jxcU#0(y{=)j)8rc4OBmESuXobuXZ+{39Xc$ub?7dJm zM(ReiCt<*A3!b?rjSe^;tC(mt66X%I=l_Q5vts59zW>?^BOX12XKtRPxaT}{o*jzI zQV&Dh<7&uI%A|Q6()hos-cn}#jr97hc+Ab8)bGwc(dO}8q!<^vS$k4qpf-b~fQMO1 zMKGtU#T#Y`Z~a#_M=c-cbUHUZ540EOcr7~ zW?}B}USE&5WMIRRPQPl?Bq|xZTl`zQt%XfaBv~Sbx$FC>G`xQvM(Jw zo`!Sm=3+o!TiT}Th_yfN)2cTSe8ON9R}AdN1`mEf;mz;V^v#OX)3rF>@;PKkbCE4J z9(-<2B%kvA2gMRA;mGc5aO&>>*6tXHV_R3_W9i&^aMD3^lj~x7h8>L@y_nvZYVnDG zr8IWwSGoS-P`;?M3}@~4W4%A;#qh<_U9$H^__sn`?mAr&*8I(Yp0zK)Ro#{K%{J1< z1+lDhoCS|ii5vUxxy(6dChc?_&YlKG*jZ&P72a-^wM;xhXRqEz2gkW+vBr(&O#McG zH%~=}@pr`agLguqiYgWOuji21Cb%?{BOBjR)O*8^G=hJ z15@Rx_5x()xMOVJ(=^peP2w;wlbx98AWZA}Ms~R$!;!3QB&@R$M!TMud5l+~(?&<3 zb1#Y0xU`+F#x3Fa_zb)d>5bYsJ4mJG0-Vv6L&&U6WSDQr{l*rOn$sQnX#Wq!Ys|xj zs-I%RNEf;qrY!YaG(@wj74qB0MWC^w7gp~U!~-3N^RB?v_@Yb)pJ^_m)m16{Smm-X zCe;q&uFmB}QjfRM=^$M=Va&T9E}$9qr4X}yFlRaU1;vc9SaPF<1}qCAOXU>myUUlh z)>(*cMM`3H&r&S9xB>LcXX2jo#vC*#p1P~Ka_2*9z~lU1TCR~zCma@H@{s`U^U{g( z|0eL+a#Nl)F^jea_TzM=1pIXStEl5T1iIIJtaw;^6i=`3!l#r*2(62*!Y1tm8W`=rDB7^uvXd-&-NyZJmKa+{6 zFYgPmp((n$3CzR<0#6;$6M&GlC9 zr(X(*9Ny54cXmHVbw0MJo}?su`CT1{JL&Nr?fcY0_KBQzT~Q%9ih7PXDjcJAFxFPU zs^^+$xqCTh_p!yu38Q$GG>biI_)|=Ma|7I=2sX@LBV5`1gFK@eVMQkkG2~hpUv+en zc@I7=yd1NExBT}==utjGxP8AJR32G?uH9Vtp)nlqU8{k%+M$HmF_#4Mnv<}*v_<&j-2?5;SmC4(UAc5*29z{yr0p|n z>B-r7sQPs@-5Ih$^vrl78^8F7U{#b4r_L3_(-(?dqq$4W-)oCo7ATU0pofPidwLAP76-|>^}<)ko0|?6J*-%7gBlkll+u2S z9yrU&8()pLz@1zAiz~0KXQd2N!O*{)PV5gRrA1Qi`+yd!s=7Ocr#ys>W2VE2z>aut zNjN9Bj)xyHJ-OtX3~y(5!CMPdA*?Ac@x_1x8@&@-hs`d zMHn-9I_v5QLP^OkdB5>ySbHHELZ3%-sd)nE`nHg--c=f6+?{SlC*e=eyMn5swfMNN zO=|^SP(=4$$H))8GoWKWz8IYoMCb0j5gYQ_#pse?Hmm8* zqU3^Uzh;RcuZQ68tt?I%I*nED2EoXGy;1y}mMzti`LPJ+S$3eW%OIFN%pj`@qjlThZl(0~h#R zr4w_e^4+OZQPcd5FiUkBm+thE_tal2i%L>u1>VA4PMLG)gDd!?b}0LHlDbwiHwatF zfcyWivoC?Cs%zUfCCV5gl2V2y$?WX4mRV#@hEORa4U&{7b7U$+Qko1YWT<2~d#&B1 zk|qsOrqokPRGOru{@e4u&-Xp5=ly@*w|~F&a}H-;d+mE2_g?E>_kCT<`AO^Sc(P7k z4(3{>!KAN^EHB+;wpQUXUT>}ss9FA0eT@Y7?yU*LU~wiWI+#o5o#rKpG2+CdMiFSJ zmVhJ=j4cO~V8qcL%)zVh)3Dzq&Nhzaa{OL?Bcs*a2S73Z)I?gUFSK_ko394mj%|jMvrd z09=11kUa*_TJN&SigXvK9+eGts+|T!;Q-LqJ=kXG0RH}c0@2znh}Va$1E>5x8piLB z!@Fhn;%)~MIOy??^k$12UMT8@z3{t!m&9c*O0X6S66Hv7@~PbecrNjWF~if~0qQ)S zqL2#4O-q53*=VoDWGAqyub$53ya&7GRbY=vBl&Jt4(ii{VL;||R=+oicrVSzua?JA z)-ob6uWv6|9J!u2V%Js;?@(-coBA4ji$cowutCc|+;%kX0hA&4dyQ+>Ht!A>i7 zZFKnnr*QF791!xwpp?!3x_*5EF|$sB*1`p_pnV5WeYk^Y4L$-MuF;_P+G)J!!E!QA z`8keNO2hcgGvdkBgc&Mvlx(^%u`YiHI>*k27}T@eVgh*f<43q=W(U_nb?hUz-mM8oL+r?dTqsF&~N06R~amXp4S@i8*^J?Ib2E#?O3dtA@CwQ zX4%j7bLQx=8s8h#xJQI1z?aGFeu8ui6u#F0Qoa4?v&(cqFV4odcg-UT;o8Ku>kiSX z%Ey-<4^Xcg4*+v1LwG3S0O#B1NAx9Q6)^cCo3pH{Od=P_;4eWMb$j?kiRvRu!%mGV z966zhxHJclqpU8VL)svHIW(2yx_lQ-o-va=tuTSzRmJd}UNpFWcNHvYkEixqC4hd3 zgCI2N5eRV@169UELsPL;FzI44zFGeTSc<=;jI$ESF4+%Ytpi(YuN_4^o!BMCd3{jF zM)a!+b`W1SXUF%j6ZoFHjYtS25f#_RL^|dGFw0Vag@V_pABqsBZ0GuBOj?URymYG5oh zHf8rTlGA|vusAuu=AT*O0^0aw6baj!NfH&fV4INuT<;wSPM6LmAGqv(>pc#TYgtP; zM;)>JP$Cs>&dcsEwBlUBV6r0T5|Gap0p*_)fQi{TZ0VImrWb1x_ASNjVYz{;d7zcdQjSW$DLlJbjni^a3!flUDN(;({B-F-u&F?gY0mPThEc@6GPGl9Mml`Qu~805)o!7HumNj=sed>KKYW!qIO zaf9V)kjWwOPkl(i zE?EcRdG8#~?ZffFs^T&>T(*zw+PoNsS!j{3Q$tve@^r#UssQWkx57bA*$AH(v{AeV zCJt&-DGQh2`xDc^$GOgU=pD`0P8kAsyFu{A9yS)R>Mclq!0L;_K9I#5 z1RiwI*p{0~9d&LXXM%RYV=Nazrg9d^Sr$#2J7n=n*J-f!0n49WJ`VWQr+~wsF5qM1 zRPp9qA)*+u0g@0oBBvN|<+4?)5%H(0fH zKTcYq0!j*LY^;$7fMgaiK06P(h=h`3lXaj$WDI#*Ndu!r-!bL8A8zE|LO5%F(EF~Y zlX#o^pxb!~ER~MJ)UAC)e4{2@fBO`;6e|rid19y<^QrLGJ1(AEWI^UsCBS(di^z^I zY+NdRJ~=dO4Uc7QGtA(&Uyc5paW&`g& zF2hQno&sU(DP$vCt3M+sm0TsaL14)ka_D#%{?-+ZUw3wZhDo1kBi>bz3YY|}CVN4f zBQN2FTL5k{j>pzzuGEd3h49Rf2hpVA=F-LX5V-MRp;jS!byww)2&R+wcjOztXDPd$ro;vLyW=edJIN5kJ3FMw~AfaKp zz}k5d5gA$!?08$*n&fzJ@yiWddntt&oOUCqdrauNmFOKC8TOZ(=ME+Z7B;SkeJ*a}Itz zg%6%o$i%buAUOEsD|OcI9Eh~3#W}95_RV5D{K{XRJh4@Q2^!)cN4y1RSqtGCmht#W zXEhEOItBe^-N3CqmEhCMmsF(JIlOIBBc7mfz~JUH0m^L4xnaDGeC_Mz*kKfbZW~IbZs@+zfBUy;$nD5J)NTOCxWI6 zQFJ$Tf|j(&r%J;Tz&##GvTTw&>AHIvpQRoGxhE{A-q5{T(>3dJ4D`sRvR~9QN6!0MPnnB+>9*0%< z%TW^`ek7ha*C&$sbCfnzc-oicJ8CBg!N$Q;m3e?Pb4_BKA9N5n#wskqmRn-OeTuAi?IKG zHEwHVJlOsw5e&YZOBNlDC38Uw(BHa-zN-)fc=rb2iQD_|j(eiu!naUXKfDD$lyaeM z&&mRc18Z4(7$2@h~>QxV}UQlKM0%CcNmZ?Qq|05zt47HM#Eqjri#gTihvZcRxy zE%5q1evubJ_Qcu2I)w~4#h0PJ@mJz!T_r%VxR8VCm;TtaA1bF!p;X$SUw8G`sI2n4&@A%|-C+HhG{rK@6V0-byL#~bvlP*InGWgI8KxzAEybCkd0TU5b3?tQ3lU4!l~F=y7MTuOJWibu+-WMh`BNbpSRp?DMDM7T3bU76#g@ z!uDQ5jcuMp-psg69Bsx5S(}Bm@O7K~#p)M0R$U!>AbD0yDHz2zU9>kzsl$m00jHJ3nnew$?$?84h8U53O%)&d~On8J1 zax0Z*oSJTu1i2LSo+lrKez0JS3+|G~4dWTNCp={K`2o^&$`eWHDKk_36`2Wymzg(U zOd3+`k*xMNT%2)&zDNehmRD2Q@_|UE^6GeI%GVB(IAbGnGEA&<;PZnW-7nyFUU_6M z&5&jFe-QD#=BP$4h&j9UE7a(XW0V>+(6sld=#T45(2mXs%A;L}xt(i=TC5ZZuZ#$D zVs!`1KQNA27j44$dRZgn$!f_~k7piMw!_9~Wn`+hiIf?6qYs~RpyKHoDm}xOIlf#R zHDBl_SIu;g{wF)6+$F{=o=!1>0dHYcV+ss2wqR^mw!k}2rBHzJCpbY&km<2cX0|&m zVMM`U_;`UIS|%sM&|MOYs-!KdxR`==Pv6VXCkvQ&Ma{77NE9rLoWLyBJPdE=PeUE| z9T?;<0{eRE$urIEw8J?k~Z%7vjl#WH8r>sDCDmOCb>rTQ_Oir4AG4xBj+y%8A>_upAZ;--Z1!~yYPIK}Zvrb(`%G)ZdCzhR>j>ca zEl0?EZYYxLSp~MO8zvpbJjlexn(+!YU=l1RAnTY+NI#i}D)v-EI{YkoYN(IWPMD+Z z;xkd(XGy5vH<3whtcA)wij1#dG0Er2fYT3)pdSZop@QEwq{f>>%#H!3Z?-8)h_M4d z>O;}XKMc^zIkL=bDMyr8Y=Ruv{MD6rRhg)PO(?YE3~qYo!WafCGs;hUNw&E+i5BdG zZcDt#9qD^unOYGPl)FThte(#(z0qU#WR?;oZC55!>LYboDu?B54ntnk-@yy+UQBXC zF!RA<1+q0dM0h5tF&<%BNcWWn6WxOt=T&ATgKr@kmY&3{JiZjF#5)kznjz?FzXWXv z^k))>-@e?BG1%S(2>YOlKP{7*e;Dji35-c7CS_SeJg;s<_{9(%g?Ou z>?H!yp6IaTY`ikm9&NVU$!hC4AgN8(WKXsv8mBLb4vM6JfIH(EXKQsP^Ohspu2@bi zj#m)9b^B5NXNoZhyh_Z|E)kncd`y3_5Tg4^$;pSWVOK;pdD?7_oGZl9=S_>4JrU6; zNH!C-FMN&DyyQTSW;zjgG>=hARb|?Jy5I|`Nz9EUnsD;PRHl2FCd$s^k=a{5W=r8C z@bnI=clzT8R^Hc2Hhm67UzRUnsy_W7=NgomZK957{N%6j?R06BJoYhZ*9b@a9k!^B zGmA;m)kJp#BABO)F~j$5HL=f7B?9r*j9x*vFBJCxTH`!+vy6Xeu)}wu$(xx5Jua`=N|<8tF_BLQ_mFnA!_Y=%}RtbK7Mq zI^n?&9ae`TqX(JjfY((LP_+b|h)qTN6#^L%X)|OHZq01E=!({8i!mii8fXSvXZph2 z4(VD-Ga`+y%rj?Ow0^QbD%5jldDRBtaQSX*uJr;wZc;_ly4{FrtT~Y8N;5GY{7e}% zVceslP=nYvywlj2&2`;R9y!P|!Se2mles6;5cv`2rrab#Hs`RevlH2=s>$T&sFN6H z1MFLKh~pvV$Xt_~i^v%fW{+w&KL2qm*%Pu8x(74Ke1JoiG?) zxQlc~*Rs4oXE0yGck;(V7nJoTkVIG)LRTLFEL3U-Ur72PTOK16*4$2Ng>{+tK9Oi~ zfr z0o?$#bgq+CM1~oYf0taJq=PnX{)5y>$&&)c3^gAcg2Y3N8GE0=vDwdw?UYKmSwtVr zbUa8z&-KFxemRh2ilX4#vdC)AbgcbDid`S1kZDum(QLIfu&PfJ9gbpk-wNg#MenTH}~+N$fzSh_IiBSVVrC=x_!CUbcO5h`kLa(SS=ls}hi9byr+LKO-_9fS zElp?s&*u?xkvG99X+v^iMJ6+yk!4mWJ|q0evPi7hn0dj(5b^s8=wx}ypYz1>!52>I z?A&Ig{ilA({I*{PmZp-w_RIB&9iDs>;jl64czP8-9%D8E%YsAnD-jWT&-Gm#A6YSO z=$cMCHXw>CT3N+eH@Tcs8+4pLXSWbnw>oeGtF!6gN)?QQO>s$|0)B=iSPoY=+;*ZE z$1Tpm;KXyfSV9(i9X*7W=mOp`BBsl}~Tvx4icvDGzTJFVJbCDkh zo{)!9dN-;1%q;G+Bz0QnRxfAER9|pNWIo6W z>86smpQE03bWv!fI#}5y3#P~lfEV&2;G}XFRqrtuz@nvK>#HOX*cc98zqA5z{Yk)f zX*EcFdkSRT@&)OY_o?B!8qn-q4Lm+C1krP@)V)ZGrpJG<#)@ZV;H~e!&=$)QXv^r7 zx}2Q5T)}i6N^kQ{&X2~E6jbB~TTTm6OOKXOcT2d`#bF_Eb^S zKIo^Wi3e#wkPc6D~MVsLjZ(GWV+^3vb-Xg!&DfrOtRm`!XFVH( z&unDaln!J3a@r1jW4a2q512$J6wTq9ho#Z-FUDc>3tKtWJ~5R0_|24~PXK3oHXpU# z;Q(b7>qpgkp5nZhs^FIQgwoCL7h{0)@L7=<{Pm?8HEGKQ+PM8P-Sd1d=9Jabg6SIc z`&&2ZaP^(^&B#`I)wg)M!1DvmBgfX0bWNqlDwlH$101-fPCN0|+X(BcDbWofRfZA< zSGWfvJb?Vl5)k9H7d#4N*N_*-0+VnPkn=DTcs(`+LXMxP)Y-b=Tu&TeI`Y7t4nuIV z-v_u_l>olp%iyg@9@w9p4Y(<{fcDTu5P$6vI3`#RX6$YOCJHrRMnW0@MdH{sOcJX+ ziKWXsuW=X5spH;{T!2@!i{aUQS#ScQTSVy~P(u&()cSUKoh}1H>=gIF+|p(~&)Qq>aX?^!x{mU;1I? zE$RPPaAq`yx7R;lX#IRIv(E;@2DT3)Z8S!cEAP+a@~1`TYyXTfq#U0A*Q8`LhMfC9 zVCejeaf{zg^{-(Wjp6P64;Z>XW2AMB7yD~mMi1@aKVbY~4$sHCUH%%D(GWouzwgE` z*7$qKY&68w%HKo$VvN5>i$+7Fm+<{|P=2w+-zSQrAs&|g9zySDbN+o!HyUDU)$bwn ze}?$GTOSQk`wtKXKSTW8GmnNCto?m241b3By9*xu-P&6B3&g)2`p+1DcaSRV|H^+K zcYnqBz3X3_fa>!J~aec7s<}~&;9E?FZ Date: Wed, 13 Mar 2024 15:59:02 +0100 Subject: [PATCH 038/171] Update distributed.py --- src/itwinai/tensorflow/distributed.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index 83eb4fa9..a89b47b3 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -2,11 +2,25 @@ def get_strategy(): """Strategy for distributed TensorFlow training""" + cluster_resolver = tf.distribute.cluster_resolver.SlurmClusterResolver(port_base=12345) implementation = tf.distribute.experimental.CommunicationImplementation.NCCL communication_options = tf.distribute.experimental.CommunicationOptions(implementation=implementation) # declare distribution strategy - tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy(communication_options=communication_options) + tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy(cluster_resolver = cluster_resolver, communication_options=communication_options) + + # task id from cluster resolver + task_info = cluster_resolver.get_task_info() + task_id = task_info[1] + + # number of workers + n_workers = int(os.environ['SLURM_NTASKS']) + # list of devices per worker + devices = tf.config.experimental.list_physical_devices('GPU') + # number of devices per worker + n_gpus_per_worker = len(devices) + # total number of GPUs + n_gpus = n_workers * n_gpus_per_worker # get total number of workers print("Number of devices: {}".format(tf_dist_strategy.num_replicas_in_sync)) From c1af5423e3181fbeac3c72403d5a2390e8ba6a20 Mon Sep 17 00:00:00 2001 From: r-sarma Date: Fri, 15 Mar 2024 12:34:14 +0100 Subject: [PATCH 039/171] Added TF tutorials --- .../tf-tutorial-0-basics/README.md | 18 +++ .../tf-tutorial-0-basics/tfmirrored_slurm.sh | 65 ++++++++++ .../tf-tutorial-0-basics/train.py | 113 ++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 tutorials/distributed-ml/tf-tutorial-0-basics/README.md create mode 100644 tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh create mode 100644 tutorials/distributed-ml/tf-tutorial-0-basics/train.py diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md new file mode 100644 index 00000000..b4715895 --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md @@ -0,0 +1,18 @@ +# Tutorial: distributed strategies for Tensorflow + +In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. Other strategies will be updated here. + +First, from the root of this repo, build the environment containing +Tensorflow. You can *try* with: + +```bash +# Creates a Python venv called envAItf_hdfml +make tf-gpu-jsc +``` +Please note this is still to be added to the make file in the root. Contact RS in the meantime for environment source file. + +If you want to distribute the code in `train.py`, run from terminal: + +```bash +sbatch tfmirrored_slurm.sh +``` \ No newline at end of file diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh new file mode 100644 index 00000000..7d6dfe2f --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TFTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +set -x +unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY + +# set modules +ml --force purge +ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 + +# set env +source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/TF_runs/testAI_hdfml/bin/activate + +# sleep a sec +sleep 1 + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo "DEBUG: SLURM_NODELIST: $SLURM_NODELIST" +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +COMMAND="train.py" + +EXEC="$COMMAND " + +srun python -u $EXEC \ No newline at end of file diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py new file mode 100644 index 00000000..6c9e6d7e --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py @@ -0,0 +1,113 @@ +""" +Show how to use TensorFlow MultiWorkerMirroredStrategy on itwinai. + +with SLURM: +>>> sbatch tfmirrored_slurm.sh + +""" +from typing import Any +import argparse +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from itwinai.tensorflow.distributed import get_strategy + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--strategy", "-s", type=str, + choices=['mirrored'], + default='mirrored' + ) + parser.add_argument( + "--batch_size", "-bs", type=int, + default=64 + ) + parser.add_argument( + "--shuffle_dataloader", + action=argparse.BooleanOptionalAction + ) + + args = parser.parse_args() + return args + + +def tf_rnd_dataset(): + """Dummy TF dataset.""" + + x_train = tf.random.normal((60000, 784), dtype='float32') + x_test = tf.random.normal((10000, 784), dtype='float32') + y_train = tf.random.uniform((60000,), minval=0, maxval=10, dtype='int32') + y_test = tf.random.uniform((10000,), minval=0, maxval=10, dtype='int32') + + return x_train, x_test, y_train, y_test + + +def trainer_entrypoint_fn( + foo: Any, args: argparse.Namespace, strategy +) -> int: + """Dummy training function, similar to custom code developed + by some use case. + """ + # dataset to be trained + x_train, x_test, y_train, y_test = tf_rnd_dataset() + + # distribute datasets among mirrored replicas + dist_x_train = strategy.experimental_distribute_dataset( + x_train + ) + dist_x_test = strategy.experimental_distribute_dataset( + x_test + ) + dist_y_train = strategy.experimental_distribute_dataset( + y_train + ) + dist_y_test = strategy.experimental_distribute_dataset( + y_test + ) + + # define and compile model within strategy.scope() + with strategy.scope(): + # Local model + inputs = keras.Input(shape=(784,), name='img') + x = layers.Dense(64, activation='relu')(inputs) + x = layers.Dense(64, activation='relu')(x) + outputs = layers.Dense(10)(x) + + model = keras.Model(inputs=inputs, outputs=outputs, name='mnist_model') + + model.compile(loss=keras.losses.SparseCategoricalCrossentropy + (from_logits=True), + optimizer=keras.optimizers.RMSprop(), + metrics=['accuracy'] + ) + + model.fit(dist_x_train, dist_y_train, + batch_size=args.batch_size, + epochs=5, + validation_split=0.2) + + test_scores = model.evaluate(dist_x_test, dist_y_test, verbose=0) + + print('Test loss:', test_scores[0]) + print('Test accuracy:', test_scores[1]) + + return 123 + + +if __name__ == "__main__": + + args = parse_args() + + # Instantiate Strategy + if args.strategy == 'mirrored': + if (len(tf.config.list_physical_devices('GPU')) == 0): + raise RuntimeError('Resources unavailable') + strategy, num_replicas = get_strategy() + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + # Launch distributed training + trainer_entrypoint_fn("foobar", args, strategy) From 786e79adfa70ff76480686a54cc325f890549f0a Mon Sep 17 00:00:00 2001 From: r-sarma Date: Fri, 15 Mar 2024 17:25:44 +0100 Subject: [PATCH 040/171] Fixes to tutorials --- src/itwinai/tensorflow/distributed.py | 24 ++++++--- .../tf-tutorial-0-basics/train.py | 50 +++++++++---------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index a89b47b3..e6c5f28a 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -1,18 +1,21 @@ import tensorflow as tf +import os + def get_strategy(): """Strategy for distributed TensorFlow training""" - cluster_resolver = tf.distribute.cluster_resolver.SlurmClusterResolver(port_base=12345) + cluster_resolver = tf.distribute.cluster_resolver.SlurmClusterResolver( + port_base=12345) implementation = tf.distribute.experimental.CommunicationImplementation.NCCL - communication_options = tf.distribute.experimental.CommunicationOptions(implementation=implementation) + communication_options = tf.distribute.experimental.CommunicationOptions( + implementation=implementation) # declare distribution strategy - tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy(cluster_resolver = cluster_resolver, communication_options=communication_options) + tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver, + communication_options=communication_options + ) - # task id from cluster resolver - task_info = cluster_resolver.get_task_info() - task_id = task_info[1] - # number of workers n_workers = int(os.environ['SLURM_NTASKS']) # list of devices per worker @@ -22,7 +25,12 @@ def get_strategy(): # total number of GPUs n_gpus = n_workers * n_gpus_per_worker + # get total number of detected GPUs + print("Number of detected devices: {}".format( + n_gpus)) + # get total number of workers - print("Number of devices: {}".format(tf_dist_strategy.num_replicas_in_sync)) + print("Number of devices: {}".format( + tf_dist_strategy.num_replicas_in_sync)) return tf_dist_strategy, tf_dist_strategy.num_replicas_in_sync diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py index 6c9e6d7e..ee29bca5 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py @@ -9,7 +9,7 @@ import argparse import tensorflow as tf from tensorflow import keras -from tensorflow.keras import layers +import os from itwinai.tensorflow.distributed import get_strategy @@ -35,13 +35,17 @@ def parse_args() -> argparse.Namespace: def tf_rnd_dataset(): """Dummy TF dataset.""" + (x_train, y_train), (x_test, y_test) = \ + tf.keras.datasets.mnist.load_data( + path=os.getcwd()+'/.keras/datasets/mnist.npz') - x_train = tf.random.normal((60000, 784), dtype='float32') - x_test = tf.random.normal((10000, 784), dtype='float32') - y_train = tf.random.uniform((60000,), minval=0, maxval=10, dtype='int32') - y_test = tf.random.uniform((10000,), minval=0, maxval=10, dtype='int32') + train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + train_dataset = train_dataset.batch(args.batch_size) - return x_train, x_test, y_train, y_test + test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) + test_dataset = test_dataset.batch(args.batch_size) + + return train_dataset, test_dataset def trainer_entrypoint_fn( @@ -51,31 +55,24 @@ def trainer_entrypoint_fn( by some use case. """ # dataset to be trained - x_train, x_test, y_train, y_test = tf_rnd_dataset() + train_dataset, test_dataset = tf_rnd_dataset(args) # distribute datasets among mirrored replicas - dist_x_train = strategy.experimental_distribute_dataset( - x_train - ) - dist_x_test = strategy.experimental_distribute_dataset( - x_test + dist_train = strategy.experimental_distribute_dataset( + train_dataset ) - dist_y_train = strategy.experimental_distribute_dataset( - y_train - ) - dist_y_test = strategy.experimental_distribute_dataset( - y_test + dist_test = strategy.experimental_distribute_dataset( + test_dataset ) # define and compile model within strategy.scope() with strategy.scope(): # Local model - inputs = keras.Input(shape=(784,), name='img') - x = layers.Dense(64, activation='relu')(inputs) - x = layers.Dense(64, activation='relu')(x) - outputs = layers.Dense(10)(x) - - model = keras.Model(inputs=inputs, outputs=outputs, name='mnist_model') + model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(10) + ]) model.compile(loss=keras.losses.SparseCategoricalCrossentropy (from_logits=True), @@ -83,12 +80,11 @@ def trainer_entrypoint_fn( metrics=['accuracy'] ) - model.fit(dist_x_train, dist_y_train, - batch_size=args.batch_size, + model.fit(dist_train, epochs=5, - validation_split=0.2) + steps_per_epoch=2000) - test_scores = model.evaluate(dist_x_test, dist_y_test, verbose=0) + test_scores = model.evaluate(dist_test, verbose=0, steps=500) print('Test loss:', test_scores[0]) print('Test accuracy:', test_scores[1]) From a55994da329ac6591148561414c62f9687bfdcf7 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:24:27 +0100 Subject: [PATCH 041/171] Add files via upload --- env-files/tensorflow/createEnvJSCTF.sh | 101 +++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 env-files/tensorflow/createEnvJSCTF.sh diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh new file mode 100644 index 00000000..977427f3 --- /dev/null +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# -*- coding: utf-8 -*- +# author: RS +# version: 220302a +# creates machine specific python env + +# set modules +ml --force purge + +# get sys info +cDir=$PWD +sysN="$(uname -n | cut -f2- -d.)" +echo "system:${sysN}" +echo + +cont1=false +if [ "$sysN" = 'deepv' ] ; then + ml use $OTHERSTAGES + ml Stages/2022 GCC OpenMPI cuDNN NCCL Python CMake + cont1=true +elif [ "$sysN" = 'juwels' ] ; then + ml Stages/2022 GCC ParaStationMPI Python CMake NCCL libaio cuDNN + cont1=true +elif [ "$sysN" = 'hdfml' ] ; then + #ml Stages/2022 GCC OpenMPI Python NCCL cuDNN libaio CMake + #ml Stages/2023 NVHPC/23.1 ParaStationMPI/5.8.0-1-mt NCCL/default-CUDA-11.7 cuDNN/8.6.0.163-CUDA-11.7 Python CMake + ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 + cont1=true +else + echo + echo 'unknown system detected' + echo 'canceling' + echo +fi +echo "modules loaded" +echo + +# get python version +pver="$(python --version 2>&1 | awk {'print $2'} | cut -f1-2 -d.)" +echo "python version is ${pver}" +echo + +if [ "$cont1" = true ] ; then + if [ -d "${cDir}/envAItf_${sysN}" ];then + echo 'env already exist' + echo + + source envAItf_${sysN}/bin/activate + else + # create env + python3 -m venv envAItf_${sysN} + + # get headers for pip + if [ -f "${cDir}/envAItf_${sysN}/bin/pip3" ]; then + echo 'pip already exist' + else + cp "$(which pip3)" $cDir/envAItf_${sysN}/bin/ + ln -s $cDir/envAItf_${sysN}/bin/pip3 $cDir/envAItf_${sysN}/bin/pip${pver} + var="#!$cDir/envAItf_${sysN}/bin/python${pver}" + sed -i "1s|.*|$var|" $cDir/envAItf_${sysN}/bin/pip3 + fi + + # activate env + source envAItf_${sysN}/bin/activate + + echo "a new env is created in ${cDir}" + echo "activation is done via:" + echo "source ${cDir}/envAItf_${sysN}/bin/activate" + fi +fi + +# install TF +if [ -f "${cDir}/envAItf_${sysN}/bin/tensorboard" ]; then + echo 'TF already installed' + echo +else + export TMPDIR=${cDir} + + pip3 install --upgrade tensorflow[and-cuda] --no-cache-dir +fi + +# install horovod +if [ -f "${cDir}/envAItf_${sysN}/bin/horovodrun" ]; then + echo 'Horovod already installed' + echo +else + export HOROVOD_GPU=CUDA + export HOROVOD_GPU_OPERATIONS=NCCL + export HOROVOD_WITH_TENSORFLOW=1 + export TMPDIR=${cDir} + + pip3 install --no-cache-dir horovod --ignore-installed +fi + +# get rest of the libraries$ +if [ "$cont1" = true ] ; then + pip3 install -r reqs_TF.txt --ignore-installed +fi + + +# eof From e8c349ac4e3de22a5efe05ae4763e151d04fcd26 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:28:23 +0100 Subject: [PATCH 042/171] Update Makefile --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 019fb452..206ce597 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,10 @@ torch-gpu: env-files/torch/pytorch-env-gpu.yml torch-gpu-jsc: env-files/torch/createEnvJSC.sh sh env-files/torch/createEnvJSC.sh +# Install Tensorflow env (GPU support) on Juelich Super Computer (tested on HDFML system) +tf-gpu-jsc: env-files/tensorflow/createEnvJSCTF.sh + sh env-files/tensorflow/createEnvJSCTF.sh + # Install PyTorch env (CPU only) torch-cpu: env-files/torch/pytorch-env-cpu.yml micromamba env create -p ./.venv-pytorch --file env-files/torch/pytorch-env-cpu.yml -y From c718fe2dd9c9def0e7dba9b6fd6ab809f0b3e590 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:29:08 +0100 Subject: [PATCH 043/171] Update README.md --- tutorials/distributed-ml/tf-tutorial-0-basics/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md index b4715895..983aee69 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md @@ -9,10 +9,9 @@ Tensorflow. You can *try* with: # Creates a Python venv called envAItf_hdfml make tf-gpu-jsc ``` -Please note this is still to be added to the make file in the root. Contact RS in the meantime for environment source file. If you want to distribute the code in `train.py`, run from terminal: ```bash sbatch tfmirrored_slurm.sh -``` \ No newline at end of file +``` From 428874e3569f9c0d053f9b5623b67b83b3edd96b Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 18 Mar 2024 13:52:13 +0100 Subject: [PATCH 044/171] UPDATE tutorials --- .../distributed-ml/tutorial-0-basics/ddp_slurm.sh | 4 ++-- .../tutorial-0-basics/deepspeed_slurm.sh | 4 ++-- .../distributed-ml/tutorial-0-basics/hvd_slurm.sh | 10 +++++++--- tutorials/distributed-ml/tutorial-0-basics/runall.sh | 5 +++++ tutorials/distributed-ml/tutorial-0-basics/train.py | 2 ++ .../distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh | 4 ++-- tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh | 6 +++++- tutorials/distributed-ml/tutorial-1-mnist/train.py | 9 ++++++++- 8 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 tutorials/distributed-ml/tutorial-0-basics/runall.sh diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh index 8cf0280b..7e293321 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh @@ -5,8 +5,8 @@ #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err +#SBATCH --output=job-ddp.out +#SBATCH --error=job-ddp.err #SBATCH --time=00:15:00 # configure node and process count on the CM diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index e0326c98..89c88fc6 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -5,8 +5,8 @@ #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err +#SBATCH --output=job-ds.out +#SBATCH --error=job-ds.err #SBATCH --time=00:15:00 # configure node and process count on the CM diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh index 32e8112f..4b2279e5 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh @@ -3,10 +3,14 @@ # general configuration of the job #SBATCH --job-name=Torch_HVD_tutorial #SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-hvd.out +#SBATCH --error=job-hvd.err +#SBATCH --time=00:15:00 + +# configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --output=job.out -#SBATCH --error=job.err -#SBATCH --time=00:30:00 #SBATCH --nodes=4 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 diff --git a/tutorials/distributed-ml/tutorial-0-basics/runall.sh b/tutorials/distributed-ml/tutorial-0-basics/runall.sh new file mode 100644 index 00000000..b197f3aa --- /dev/null +++ b/tutorials/distributed-ml/tutorial-0-basics/runall.sh @@ -0,0 +1,5 @@ +# Run all versions of distributed ML +rm *.out *.err +echo "Torch DDP training: $(sbatch ddp_slurm.sh)" +echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" +echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py index 4fb71cea..dac07f09 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -167,3 +167,5 @@ def trainer_entrypoint_fn( # Launch distributed training trainer_entrypoint_fn("foobar", args, strategy) + + print("TRAINING FINISHED") diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index c4784976..07c6975f 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -7,11 +7,11 @@ #SBATCH --mail-type=ALL #SBATCH --output=job-ds.out #SBATCH --error=job-ds.err -#SBATCH --time=00:30:00 +#SBATCH --time=00:20:00 # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh index 585308a2..48bf41b4 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -3,10 +3,14 @@ # general configuration of the job #SBATCH --job-name=Torch_HVD_tutorial-1 #SBATCH --account=intertwin -#SBATCH --partition=batch +#SBATCH --mail-user= +#SBATCH --mail-type=ALL #SBATCH --output=job-hvd.out #SBATCH --error=job-hvd.err #SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch #SBATCH --nodes=4 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 diff --git a/tutorials/distributed-ml/tutorial-1-mnist/train.py b/tutorials/distributed-ml/tutorial-1-mnist/train.py index 84645587..975be604 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/tutorial-1-mnist/train.py @@ -288,8 +288,11 @@ def seed_worker(worker_id): print('TIMER: initialise:', time.time()-st, 's') # encapsulate the model on the GPU assigned to the current process + # device = torch.device( + # 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) device = torch.device( - 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + strategy.dist_device() if args.cuda and torch.cuda.is_available() + else 'cpu') if args.cuda: torch.cuda.set_device(lrank) # deterministic testrun @@ -372,6 +375,10 @@ def seed_worker(worker_id): model, optimizer, lr_scheduler=None ) + print(f" DEVICES: DS={distrib_model.device}, " + f"TORCH.DIST={strategy.dist_device()}, " + f"ENV={os.environ['LOCAL_RANK']}") + # resume state start_epoch = 1 best_acc = np.Inf From b9538ca931eec182b5d9fc8ad76dbc5f4c1d66c0 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 18 Mar 2024 14:15:29 +0100 Subject: [PATCH 045/171] UPDATE documentation and improve explainability --- .../tutorial-0-basics/README.md | 15 ++++++++++- .../tutorial-0-basics/ddp_slurm.sh | 2 +- .../tutorial-0-basics/deepspeed_slurm.sh | 2 +- .../tutorial-0-basics/hvd_slurm.sh | 2 +- .../distributed-ml/tutorial-0-basics/train.py | 3 +-- .../distributed-ml/tutorial-1-mnist/README.md | 27 +++++++++++++++++-- 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/tutorials/distributed-ml/tutorial-0-basics/README.md b/tutorials/distributed-ml/tutorial-0-basics/README.md index b3f121d4..a081f954 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tutorial-0-basics/README.md @@ -1,6 +1,11 @@ # Tutorial: distributed strategies for PyTorch -In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and +DeepSpeed from the same client code. +Note that the environment is tested on the HDFML system at JSC. For other systems, +the module versions might need change accordingly. + +## Setup First, from the root of this repo, build the environment containing pytorch, horovod and deepspeed. You can *try* with: @@ -10,6 +15,8 @@ pytorch, horovod and deepspeed. You can *try* with: make torch-gpu-jsc ``` +## Distributed training + Each distributed strategy has its own SLURM job script, which should be used to run it: @@ -30,3 +37,9 @@ If you want to distribute the code in `train.py` with **Horovod**, run from term ```bash sbatch hvd_slurm.sh ``` + +You can run all of them with: + +```bash +bash runall.sh +``` diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh index 7e293321..530733a1 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Torch_DDP_tutorial +#SBATCH --job-name=Torch_DDP_tutorial-0 #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index 89c88fc6..81a7b3e3 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial +#SBATCH --job-name=Torch_DeepSpeed_tutorial-0 #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh index 4b2279e5..620241a9 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh @@ -1,7 +1,7 @@ #!/bin/bash # general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial +#SBATCH --job-name=Torch_HVD_tutorial-0 #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py index dac07f09..648f05e4 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -140,6 +140,7 @@ def trainer_entrypoint_fn( if lr_sched: lr_sched.step() + print(f" - TRAINING FINISHED") strategy.clean_up() return 123 @@ -167,5 +168,3 @@ def trainer_entrypoint_fn( # Launch distributed training trainer_entrypoint_fn("foobar", args, strategy) - - print("TRAINING FINISHED") diff --git a/tutorials/distributed-ml/tutorial-1-mnist/README.md b/tutorials/distributed-ml/tutorial-1-mnist/README.md index b3f121d4..f0466a2b 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/README.md +++ b/tutorials/distributed-ml/tutorial-1-mnist/README.md @@ -1,6 +1,11 @@ -# Tutorial: distributed strategies for PyTorch +# Tutorial: distributed strategies for PyTorch model trained on MNIST dataset -In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and +DeepSpeed from the same client code. +Note that the environment is tested on the HDFML system at JSC. For other systems, +the module versions might need change accordingly. + +## Setup First, from the root of this repo, build the environment containing pytorch, horovod and deepspeed. You can *try* with: @@ -10,6 +15,18 @@ pytorch, horovod and deepspeed. You can *try* with: make torch-gpu-jsc ``` +Before launching training, since on JSC's compute nodes there is not internet connection, +you need to download the dataset before while on the login lode: + +```bash +source ../../../envAI_hdfml/bin/activate +python train.py --download-only +``` + +This command creates a local folder called "MNIST" with the dataset. + +## Distributed training + Each distributed strategy has its own SLURM job script, which should be used to run it: @@ -30,3 +47,9 @@ If you want to distribute the code in `train.py` with **Horovod**, run from term ```bash sbatch hvd_slurm.sh ``` + +You can run all of them with: + +```bash +bash runall.sh +``` From b7a456def0bdfb80f7fe9fff6d279a7ff2b3964d Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 19 Mar 2024 13:50:20 +0100 Subject: [PATCH 046/171] UPDATE SLURM scripts --- .../distributed-ml/tutorial-0-basics/deepspeed_slurm.sh | 9 +++++---- tutorials/distributed-ml/tutorial-0-basics/train.py | 4 ++++ .../distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index 81a7b3e3..a76625ed 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -7,13 +7,13 @@ #SBATCH --mail-type=ALL #SBATCH --output=job-ds.out #SBATCH --error=job-ds.err -#SBATCH --time=00:15:00 +#SBATCH --time=00:05:00 # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 +#SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 # SBATCH --exclusive @@ -57,5 +57,6 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed" -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +# srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +srun --cpu-bind=none deepspeed $TRAINING_CMD --deepspeed diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py index 648f05e4..9a17bdb4 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -117,6 +117,10 @@ def trainer_entrypoint_fn( # Device allocated for this worker device = strategy.dist_device() + print(f" DEVICES: DS={model.device}, " + f"TORCH.DIST={strategy.dist_device()}, " + f"ENV={os.environ['LOCAL_RANK']}") + for epoch in range(2): for (x, y) in train_loader: # print(f"tensor to cuda:{device}") diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index 07c6975f..c4784976 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -7,11 +7,11 @@ #SBATCH --mail-type=ALL #SBATCH --output=job-ds.out #SBATCH --error=job-ds.err -#SBATCH --time=00:20:00 +#SBATCH --time=00:30:00 # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=1 +#SBATCH --nodes=4 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 From 82621f4e5603e89fedb4ec2a04d89821b336660f Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 11:17:06 +0100 Subject: [PATCH 047/171] FIX local rank mismatch --- .gitignore | 2 +- src/itwinai/torch/distributed.py | 6 ++++++ .../distributed-ml/tutorial-0-basics/deepspeed_slurm.sh | 7 +++++-- .../distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh | 6 +++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 2d422c8d..d0086ae1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,7 @@ exp_data/ # Custom envs .venv* -envAI_hdfml/ +envAI_hdfml* # Logs logs/ diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 72447dd2..13b089b0 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -2,6 +2,7 @@ from typing import Any, Union, List, Dict, Optional, Tuple from pathlib import Path import json +import os import deepspeed import torch @@ -259,6 +260,11 @@ def init(self) -> None: """Initializes the distributed process group and the distributed package. """ + # https://github.com/Lightning-AI/pytorch-lightning/issues/13567 + ompi_lrank = os.environ.get('OMPI_COMM_WORLD_LOCAL_RANK') + os.environ['OMPI_COMM_WORLD_LOCAL_RANK'] = os.environ.get( + 'LOCAL_RANK', ompi_lrank) + # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index a76625ed..afe18f5b 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -7,11 +7,11 @@ #SBATCH --mail-type=ALL #SBATCH --output=job-ds.out #SBATCH --error=job-ds.err -#SBATCH --time=00:05:00 +#SBATCH --time=00:15:00 # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=1 +#SBATCH --nodes=4 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 @@ -57,6 +57,9 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed" +# Run without launcher: set --ntasks-per-node=NUM_GPUS # srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed + +# Run with deepspeed launcher: set --ntasks-per-node=1 srun --cpu-bind=none deepspeed $TRAINING_CMD --deepspeed diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index c4784976..727604e7 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -57,5 +57,9 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed -c config.yaml" -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +# Run without launcher: set --ntasks-per-node=NUM_GPUS +# srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed + +# Run with deepspeed launcher: set --ntasks-per-node=1 +srun --cpu-bind=none deepspeed $TRAINING_CMD --deepspeed From c6ba6f56e627e2a46be09a43dcf389b54ef4c21f Mon Sep 17 00:00:00 2001 From: zoechbauer1 Date: Fri, 16 Feb 2024 16:51:15 +0100 Subject: [PATCH 048/171] fixed distributed trainer in cyclones use case --- use-cases/cyclones/cyclones_vgg.py | 176 ++++++++++++++--------------- use-cases/cyclones/trainer.py | 11 +- 2 files changed, 94 insertions(+), 93 deletions(-) diff --git a/use-cases/cyclones/cyclones_vgg.py b/use-cases/cyclones/cyclones_vgg.py index 79e4136d..a2272505 100644 --- a/use-cases/cyclones/cyclones_vgg.py +++ b/use-cases/cyclones/cyclones_vgg.py @@ -582,94 +582,94 @@ def VGG_V4(patch_size, label_no_cyclone, channels, activation, regularizer): """ -# def ModelV5(patch_size, channels, last_activation, kernel_size=3): -# # kernel initializer -# initializer = tf.random_normal_initializer(0.0, 0.02) - -# # input layer -# inputs = tf.keras.layers.Input(shape=(patch_size, patch_size, -# channels[0])) - -# conv_blocks = [ -# ConvBlock( -# filters=32, -# initializer=initializer, -# kernel_size=kernel_size, -# strides=2, -# apply_batchnorm=True, -# apply_dropout=False, -# apply_gaussian_noise=True, -# ), -# ConvBlock( -# filters=64, -# initializer=initializer, -# kernel_size=kernel_size, -# strides=2, -# apply_batchnorm=False, -# apply_dropout=False, -# apply_gaussian_noise=False, -# ), -# ConvBlock( -# filters=128, -# initializer=initializer, -# kernel_size=3, -# strides=2, -# apply_batchnorm=False, -# apply_dropout=True, -# apply_gaussian_noise=False, -# ), -# ConvBlock( -# filters=256, -# initializer=initializer, -# kernel_size=3, -# strides=2, -# apply_batchnorm=False, -# apply_dropout=False, -# apply_gaussian_noise=True, -# ), -# ConvBlock( -# filters=512, -# initializer=initializer, -# kernel_size=3, -# strides=2, -# apply_batchnorm=False, -# apply_dropout=False, -# apply_gaussian_noise=False, -# ), -# ConvBlock( -# filters=1024, -# initializer=initializer, -# kernel_size=3, -# strides=2, -# apply_batchnorm=True, -# apply_dropout=True, -# apply_gaussian_noise=False, -# ), -# ] -# x = inputs -# for block in conv_blocks: -# x = block(x) - -# x = tf.keras.layers.Flatten()(x) -# x = tf.keras.layers.Dense( -# units=1024, activation="relu", kernel_initializer=initializer -# )(x) -# x = tf.keras.layers.Dense( -# units=512, activation="relu", kernel_initializer=initializer -# )(x) -# x = tf.keras.layers.Dense( -# units=256, activation="relu", kernel_initializer=initializer -# )(x) -# x = tf.keras.layers.Dense( -# units=128, activation="relu", kernel_initializer=initializer -# )(x) - -# outputs = tf.keras.layers.Dense( -# channels[1], activation=last_activation, -# kernel_initializer=initializer -# )(x) - -# return tf.keras.Model(inputs=inputs, outputs=outputs, name="model_V5") +def ModelV5(patch_size, channels, last_activation, kernel_size=3): + # kernel initializer + initializer = tf.random_normal_initializer(0.0, 0.02) + + # input layer + inputs = tf.keras.layers.Input(shape=(patch_size, patch_size, +channels[0])) + + conv_blocks = [ + ConvBlock( + filters=32, + initializer=initializer, + kernel_size=kernel_size, + strides=2, + apply_batchnorm=True, + apply_dropout=False, + apply_gaussian_noise=True, + ), + ConvBlock( + filters=64, + initializer=initializer, + kernel_size=kernel_size, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=128, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=True, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=256, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=True, + ), + ConvBlock( + filters=512, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=1024, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=True, + apply_dropout=True, + apply_gaussian_noise=False, + ), + ] + x = inputs + for block in conv_blocks: + x = block(x) + + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense( + units=1024, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=512, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=256, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=128, activation="relu", kernel_initializer=initializer + )(x) + + outputs = tf.keras.layers.Dense( + channels[1], activation=last_activation, +kernel_initializer=initializer + )(x) + + return tf.keras.Model(inputs=inputs, outputs=outputs, name="model_V5") """ diff --git a/use-cases/cyclones/trainer.py b/use-cases/cyclones/trainer.py index 2fb3c1bc..1c47819b 100644 --- a/use-cases/cyclones/trainer.py +++ b/use-cases/cyclones/trainer.py @@ -44,10 +44,10 @@ def __init__( self.regularization_strength, self.regularizer = ( regularization_strength.value ) - self.loss_name, self.loss = loss.value - # Optimizers, Losses - self.optimizer = keras.optimizers.Adam(learning_rate=learning_rate) + # Loss name and learning rate + self.loss_name = loss.value + self.learning_rate = learning_rate # Parse global config self.setup_config(self.global_config) @@ -86,9 +86,10 @@ def execute(self, train_data, validation_data, channels) -> None: logging.debug( f"Model loaded from backup at {self.best_model_name}") + optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) metrics = [keras.metrics.MeanAbsoluteError(name="mae")] - model.compile(loss=self.loss, - optimizer=self.optimizer, metrics=metrics) + model.compile(loss=self.loss_name, + optimizer=optimizer, metrics=metrics) logging.debug("Model compiled") # print model summary to check if model's architecture is correct From 70a09545be5a33598a928e3ae271980982c264ba Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 15:50:25 +0100 Subject: [PATCH 049/171] UPDATE launcher --- .../tutorial-0-basics/deepspeed_slurm.sh | 16 +++++++++++----- .../distributed-ml/tutorial-0-basics/train.py | 2 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index afe18f5b..d7b3dbe4 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -12,7 +12,7 @@ # configure node and process count on the CM #SBATCH --partition=batch #SBATCH --nodes=4 -#SBATCH --ntasks-per-node=1 +#SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 # SBATCH --exclusive @@ -58,8 +58,14 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed" # Run without launcher: set --ntasks-per-node=NUM_GPUS -# srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed - -# Run with deepspeed launcher: set --ntasks-per-node=1 -srun --cpu-bind=none deepspeed $TRAINING_CMD --deepspeed +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +# # Run with deepspeed launcher: set --ntasks-per-node=1 +# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables +# export NCCL_IB_DISABLE=1 +# export NCCL_SOCKET_IFNAME=eth0 +# nodelist=$(scontrol show hostname $SLURM_NODELIST) +# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile +# # Requires passwordless SSH access among compute node +# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed +# rm .hostfile \ No newline at end of file diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py index 9a17bdb4..3a19fdf5 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -164,7 +164,7 @@ def trainer_entrypoint_fn( strategy = HVDDistributedStrategy() elif args.strategy == 'deepspeed': strategy = DSDistributedStrategy( - backend='nccl', config=dict(train_batch_size=4) + backend='nccl', config=dict(train_batch_size=32) ) else: raise NotImplementedError( diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index 727604e7..c54e6fe6 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 From 33e7d73a3306de093a22d92663c7de0afef7b254 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 17:26:33 +0100 Subject: [PATCH 050/171] UPDATE linter --- .github/workflows/lint.yml | 2 +- env-files/tensorflow/createEnvJSCTF.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ecadafb0..54022934 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -49,4 +49,4 @@ jobs: # Fail on errors DISABLE_ERRORS: false # Skip linting of docs - FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.* + FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh index 977427f3..cc014cd3 100644 --- a/env-files/tensorflow/createEnvJSCTF.sh +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -15,7 +15,7 @@ echo cont1=false if [ "$sysN" = 'deepv' ] ; then - ml use $OTHERSTAGES + ml use "$OTHERSTAGES" ml Stages/2022 GCC OpenMPI cuDNN NCCL Python CMake cont1=true elif [ "$sysN" = 'juwels' ] ; then From da3696c5de3490c4bd291c6c7205aa8cc220fc67 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 17:31:38 +0100 Subject: [PATCH 051/171] UPDATE format --- .../tutorial-0-basics/ddp_slurm.sh | 2 +- .../tutorial-0-basics/deepspeed_slurm.sh | 2 +- .../tutorial-0-basics/hvd_slurm.sh | 2 +- .../distributed-ml/tutorial-0-basics/runall.sh | 1 + .../tutorial-1-mnist/ddp_slurm.sh | 2 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 17 ++++++++++++----- .../tutorial-1-mnist/hvd_slurm.sh | 2 +- .../distributed-ml/tutorial-1-mnist/runall.sh | 1 + 8 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh index 530733a1..fcae9ab0 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh @@ -46,7 +46,7 @@ echo # set comm export CUDA_VISIBLE_DEVICES="0,1,2,3" export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index d7b3dbe4..29cba4ff 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -46,7 +46,7 @@ echo # set env vars export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi export CUDA_VISIBLE_DEVICES="0,1,2,3" diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh index 620241a9..23682ed5 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh @@ -47,7 +47,7 @@ echo # export NCCL_DEBUG=INFO export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi export CUDA_VISIBLE_DEVICES="0,1,2,3" diff --git a/tutorials/distributed-ml/tutorial-0-basics/runall.sh b/tutorials/distributed-ml/tutorial-0-basics/runall.sh index b197f3aa..17c0f190 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/runall.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/runall.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Run all versions of distributed ML rm *.out *.err echo "Torch DDP training: $(sbatch ddp_slurm.sh)" diff --git a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh index d672183c..1821a760 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh @@ -46,7 +46,7 @@ echo # set comm export CUDA_VISIBLE_DEVICES="0,1,2,3" export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index c54e6fe6..8a28b978 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -12,7 +12,7 @@ # configure node and process count on the CM #SBATCH --partition=batch #SBATCH --nodes=2 -#SBATCH --ntasks-per-node=1 +#SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 # SBATCH --exclusive @@ -46,7 +46,7 @@ echo # set env vars export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi export CUDA_VISIBLE_DEVICES="0,1,2,3" @@ -58,8 +58,15 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed -c config.yaml" # Run without launcher: set --ntasks-per-node=NUM_GPUS -# srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed -# Run with deepspeed launcher: set --ntasks-per-node=1 -srun --cpu-bind=none deepspeed $TRAINING_CMD --deepspeed +# # Run with deepspeed launcher: set --ntasks-per-node=1 +# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables +# export NCCL_IB_DISABLE=1 +# export NCCL_SOCKET_IFNAME=eth0 +# nodelist=$(scontrol show hostname $SLURM_NODELIST) +# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile +# # Requires passwordless SSH access among compute node +# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed +# rm .hostfile diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh index 48bf41b4..1682c244 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -47,7 +47,7 @@ echo # export NCCL_DEBUG=INFO export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi export CUDA_VISIBLE_DEVICES="0,1,2,3" diff --git a/tutorials/distributed-ml/tutorial-1-mnist/runall.sh b/tutorials/distributed-ml/tutorial-1-mnist/runall.sh index 62ed1eff..b1470d75 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/runall.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/runall.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Run all versions of distributed ML for MNIST rm *checkpoint.pth.tar *.out *.err echo "Torch DDP training: $(sbatch ddp_slurm.sh)" From d00d41767271bbd7bde154c6c2e17bba855dee55 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 17:39:07 +0100 Subject: [PATCH 052/171] FIX linter --- src/itwinai/parser.py | 239 +++++++++++++++++++++++++++++- src/itwinai/tensorflow/trainer.py | 16 +- 2 files changed, 247 insertions(+), 8 deletions(-) diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index 7c0e7b5a..b74613a0 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -10,7 +10,6 @@ from jsonargparse import ActionConfigFile from jsonargparse._formatters import DefaultHelpFormatter - class ArgumentParser(JAPArgumentParser): def __init__( self, @@ -65,3 +64,241 @@ def __init__( "-c", "--config", action=ActionConfigFile, help="Path to a configuration file in json or yaml format." ) + + +# class ConfigParser2: +# """ +# Deprecated: this pipeline structure does not allow for +# nested pipelines. However, it is more readable and the linking +# from name to step data could be achieved with OmegaConf. This +# could be reused in the future: left as example. + +# Parses a configuration file, merging the steps into +# the pipeline and returning a pipeline object. +# It also provides functionalities for dynamic override +# of fields by means of nested key notation. + +# Example: + +# >>> # pipeline.yaml +# >>> pipeline: +# >>> class_path: itwinai.pipeline.Pipeline +# >>> steps: [server, client] +# >>> +# >>> server: +# >>> class_path: mycode.ServerOptions +# >>> init_args: +# >>> host: localhost +# >>> port: 80 +# >>> +# >>> client: +# >>> class_path: mycode.ClientOptions +# >>> init_args: +# >>> url: http://${server.init_args.host}:${server.init_args.port}/ + +# >>> from itwinai.parser import ConfigParser2 +# >>> +# >>> parser = ConfigParser2( +# >>> config='pipeline.yaml', +# >>> override_keys={ +# >>> 'server.init_args.port': 777 +# >>> } +# >>> ) +# >>> pipeline = parser.parse_pipeline() +# >>> print(pipeline) +# >>> print(pipeline.steps) +# >>> print(pipeline.steps['server'].port) +# >>> +# >>> server = parser.parse_step('server') +# >>> print(server) +# >>> print(server.port) +# """ + +# config: Dict +# pipeline: Pipeline + +# def __init__( +# self, +# config: Union[str, Dict], +# override_keys: Optional[Dict[str, Any]] = None +# ) -> None: +# self.config = config +# self.override_keys = override_keys +# if isinstance(self.config, str): +# self.config = load_yaml(self.config) +# self._dynamic_override_keys() +# self._omegaconf_interpolate() + +# def _dynamic_override_keys(self): +# if self.override_keys is not None: +# for key_chain, value in self.override_keys.items(): +# add_replace_field(self.config, key_chain, value) + +# def _omegaconf_interpolate(self) -> None: +# """Performs variable interpolation with OmegaConf on internal +# configuration file. +# """ +# conf = OmegaConf.create(self.config) +# self.config = OmegaConf.to_container(conf, resolve=True) + +# def parse_pipeline( +# self, +# pipeline_nested_key: str = "pipeline", +# verbose: bool = False +# ) -> Pipeline: +# """Merges steps into pipeline and parses it. + +# Args: +# pipeline_nested_key (str, optional): nested key in the +# configuration file identifying the pipeline object. +# Defaults to "pipeline". +# verbose (bool): if True, prints the assembled pipeline +# to console formatted as JSON. + +# Returns: +# Pipeline: instantiated pipeline. +# """ +# pipe_parser = JAPArgumentParser() +# pipe_parser.add_subclass_arguments(Pipeline, pipeline_nested_key) +# pipe_dict = self.config[pipeline_nested_key] + +# # Pop steps list from pipeline dictionary +# steps_list = pipe_dict['steps'] +# del pipe_dict['steps'] + +# # Link steps with respective dictionaries +# if not pipe_dict.get('init_args'): +# pipe_dict['init_args'] = {} +# steps_dict = pipe_dict['init_args']['steps'] = {} +# for step_name in steps_list: +# steps_dict[step_name] = self.config[step_name] +# pipe_dict = {pipeline_nested_key: pipe_dict} + +# if verbose: +# print("Assembled pipeline:") +# print(json.dumps(pipe_dict, indent=4)) + +# # Parse pipeline dict once merged with steps +# conf = pipe_parser.parse_object(pipe_dict) +# pipe = pipe_parser.instantiate_classes(conf) +# self.pipeline = pipe[pipeline_nested_key] +# return self.pipeline + +# def parse_step( +# self, +# step_name: str, +# verbose: bool = False +# ) -> BaseComponent: +# step_dict_config = self.config[step_name] + +# if verbose: +# print(f"STEP '{step_name}' CONFIG:") +# print(json.dumps(step_dict_config, indent=4)) + +# # Wrap config under "step" field and parse it +# step_dict_config = {'step': step_dict_config} +# step_parser = JAPArgumentParser() +# step_parser.add_subclass_arguments(BaseComponent, "step") +# parsed_namespace = step_parser.parse_object(step_dict_config) +# return step_parser.instantiate_classes(parsed_namespace)["step"] + + +# class ItwinaiCLI2: +# """ +# Deprecated: the dynamic override does not work with nested parameters +# and may be confusing. + +# CLI tool for executing a configuration file, with dynamic +# override of fields and variable interpolation with Omegaconf. + +# Example: + +# >>> # train.py +# >>> from itwinai.parser import ItwinaiCLI +# >>> cli = ItwinaiCLI() +# >>> cli.pipeline.execute() + +# >>> # pipeline.yaml +# >>> pipeline: +# >>> class_path: itwinai.pipeline.Pipeline +# >>> steps: [server, client] +# >>> +# >>> server: +# >>> class_path: mycode.ServerOptions +# >>> init_args: +# >>> host: localhost +# >>> port: 80 +# >>> +# >>> client: +# >>> class_path: mycode.ClientOptions +# >>> init_args: +# >>> url: http://${server.init_args.host}:${server.init_args.port}/ + +# From command line: + +# >>> python train.py --config itwinai-conf.yaml --help +# >>> python train.py --config itwinai-conf.yaml +# >>> python train.py --config itwinai-conf.yaml --server.port 8080 +# """ +# _parser: JAPArgumentParser +# _config: Dict +# pipeline: Pipeline + +# def __init__( +# self, +# pipeline_nested_key: str = "pipeline", +# parser_mode: str = "omegaconf" +# ) -> None: +# self.pipeline_nested_key = pipeline_nested_key +# self.parser_mode = parser_mode +# self._init_parser() +# self._parser.add_argument(f"--{self.pipeline_nested_key}", type=dict) +# self._add_steps_arguments() +# self._config = self._parser.parse_args() + +# # Merge steps into pipeline and parse it +# del self._config['config'] +# pipe_parser = ConfigParser2(config=self._config.as_dict()) +# self.pipeline = pipe_parser.parse_pipeline( +# pipeline_nested_key=self.pipeline_nested_key +# ) + +# def _init_parser(self): +# self._parser = JAPArgumentParser(parser_mode=self.parser_mode) +# self._parser.add_argument( +# "-c", "--config", action=ActionConfigFile, +# required=True, +# help="Path to a configuration file in json or yaml format." +# ) + +# def _add_steps_arguments(self): +# """Pre-parses the configuration file, dynamically adding all the +# component classes under 'steps' as arguments of the parser. +# """ +# if "--config" not in sys.argv: +# raise ValueError( +# "--config parameter has to be specified with a " +# "valid path to a configuration file." +# ) +# config_path = sys.argv.index("--config") + 1 +# config_path = sys.argv[config_path] +# config = load_yaml(config_path) + +# # Add steps to parser +# steps = filter( +# lambda itm: itm[0] != self.pipeline_nested_key, +# config.items() +# ) +# steps = { +# step_name: step_data['class_path'] +# for step_name, step_data in steps +# } + +# for st_nested_key, step_class_str in steps.items(): +# step_class = dynamically_import_class(step_class_str) +# self._add_step_arguments( +# step_class=step_class, nested_key=st_nested_key) + +# def _add_step_arguments(self, step_class, nested_key): +# self._parser.add_subclass_arguments( +# baseclass=step_class, nested_key=nested_key) diff --git a/src/itwinai/tensorflow/trainer.py b/src/itwinai/tensorflow/trainer.py index 35e544cb..ce6bab15 100644 --- a/src/itwinai/tensorflow/trainer.py +++ b/src/itwinai/tensorflow/trainer.py @@ -4,7 +4,7 @@ from jsonargparse import ArgumentParser import tensorflow as tf -from ..components import Trainer +from ..components import Trainer, monitor_exec from itwinai.tensorflow.distributed import get_strategy def import_class(name): @@ -60,12 +60,14 @@ def __init__( # get total number of workers print("Number of devices: {}".format(n_devices)) # distribute datasets among MirroredStrategy's replicas - dist_train_dataset = tf_dist_strategy.experimental_distribute_dataset( - train_dataset - ) - dist_validation_dataset = tf_dist_strategy.experimental_distribute_dataset( - validation_dataset - ) + dist_train_dataset = ( + tf_dist_strategy.experimental_distribute_dataset( + train_dataset + )) + dist_validation_dataset = ( + tf_dist_strategy.experimental_distribute_dataset( + validation_dataset + )) with self.strategy.scope(): # TODO: move loss, optimizer and metrics instantiation under # here From 84795173b051838f6bdda02c6a223f4ef4b8c526 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 18:22:04 +0100 Subject: [PATCH 053/171] FIX linter --- .github/linters/.jscpd.json | 3 +- .github/workflows/lint.yml | 1 + src/itwinai/torch/distributed.py | 945 +++++++++--------- .../tutorial-0-basics/README.md | 2 +- .../tutorial-0-basics/ddp_slurm.sh | 3 +- .../tutorial-0-basics/deepspeed_slurm.sh | 8 +- .../tutorial-0-basics/hvd_slurm.sh | 5 +- .../distributed-ml/tutorial-1-mnist/README.md | 2 +- .../tutorial-1-mnist/ddp_slurm.sh | 3 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 8 +- .../tutorial-1-mnist/hvd_slurm.sh | 5 +- 11 files changed, 496 insertions(+), 489 deletions(-) diff --git a/.github/linters/.jscpd.json b/.github/linters/.jscpd.json index 8a003c54..1a035770 100644 --- a/.github/linters/.jscpd.json +++ b/.github/linters/.jscpd.json @@ -1,6 +1,7 @@ { "threshold": 2.0, "ignore": [ - "**/itwinai/loggers.py" + "**/itwinai/loggers.py", + "**/itwinai/torch/engine.py" ] } \ No newline at end of file diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 54022934..ce2dbd83 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,3 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* + BASH_SEVERITY: warning diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 13b089b0..4ef4900a 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -430,485 +430,482 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: """ return hvd.allgather_object(obj) -################################################################### +# class TorchDistributedStrategy_old(DistributedStrategy): +# """Abstract class to define the distributed backend methods for +# PyTorch models. +# """ +# @abc.abstractmethod +# def init_backend(self) -> None: +# """Initializes the chosen distributed backend""" -class TorchDistributedStrategy_old(DistributedStrategy): - """Abstract class to define the distributed backend methods for - PyTorch models. - """ - @abc.abstractmethod - def init_backend(self) -> None: - """Initializes the chosen distributed backend""" - - @abc.abstractmethod - def distribute_model(self, model: Any) -> Any: - """Distributes a machine learning model. - - Args: - model (Any): a generic ML model to be distributed. - - Returns: - Any: distributed model instance. - """ - - @abc.abstractmethod - def broadcast_params(self, model: Any, optimizer: Any) -> None: - """Broadcasts variables from root rank to all other processes/ - - Args: - model (Any): distributed model. - optimizer (Any): optimizer. - """ - - @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: - """Distribute optimizer. - - Args: - optimizer (Any): optimizer. - model (Any): distributed model. - - Returns: - Any: distributed optimizer. - """ - - @abc.abstractmethod - def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size). - - Returns: - int: global world size. - """ - - @abc.abstractmethod - def dist_lwsize(self) -> int: - """Returns the number of local workers available on a node - (local world size). - Usually it is equal to the number of available GPUs. - - Returns: - int: local world size. - """ - - @abc.abstractmethod - def dist_grank(self) -> int: - """Returns the global rank of the current process. - Rank ranges from 0 to world_size. - - Returns: - int: global rank. - """ - - @abc.abstractmethod - def dist_lrank(self) -> int: - """Returns the local rank of the current process. - - Returns: - int: local rank. - """ - - def is_main_worker(self) -> bool: - """Checks if local worker has global rank equal to zero. - - Returns: - bool: True if main worker. - """ - return self.dist_grank() == 0 - - def dist_device(self) -> str: - """Device used by local worker. - - Returns: - str: torch device in the form 'cuda:N'. - """ - return f"cuda:{self.dist_lrank()}" - - @abc.abstractmethod - def clean_up(self) -> None: - """Cleans up resources allocated by distributed strategy.""" - - @abc.abstractmethod - def par_allgather_obj(self, obj: Any) -> List[Any]: - """Gathers any object from the whole group in a list (to all workers). - - Args: - obj (Any): object to gather from all workers. - - Returns: - List[Any]: list of objects gathered from all workers. - """ - - -class DDPDistributedStrategy_old(TorchDistributedStrategy_old): - """PyTorch DDP distributed strategy class. - - Args: - backend (str): Name of the communication backend to employ. - """ - - backend: str - - def __init__(self, backend: str) -> None: - super().__init__() - self.backend = backend - - def init_backend(self) -> None: - """Initializes the distributed process group and the distributed - package. - """ - if torch.cuda.is_available(): - dist.init_process_group(backend=self.backend) - - def distribute_model(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: Distributed model replicas across all devices. - that are to be synchronized. - """ - if torch.cuda.is_available(): - # device = self.dist_lrank() - model = model.to(self.dist_device()) - dist_model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[self.dist_device()], - output_device=self.dist_device() - ) - else: - dist_model = model - - return dist_model - - def broadcast_params( - self, - model: nn.Module, - optimizer: optim.Optimizer - ) -> None: - """Do nothing. Only applicable for Horovod. - - Args: - model (nn.Module): ML model - optimizer (optim.Optimizer): Optimizer - """ - pass - - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. - - Args: - optimizer (optim.Optimizer): optimizer. - model (nn.Module): ML model. Unused here. - - Returns: - optim.Optimizer: Distributed optimizer. - """ - return optimizer - - def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size). - - Returns: - int: global world size. - """ - return dist.get_world_size() - - def dist_lwsize(self) -> int: - """Returns the local number of workers available per node, - which is usually the number of GPUs available. - - Returns: - int: local world size. - """ - return torch.cuda.device_count() - - def dist_grank(self) -> int: - """Returns the global rank of the current process, where - rank ranges from 0 to world_size. - - Returns: - int: global rank. - """ - return dist.get_rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process. - - Returns: - int: local rank. - """ - return dist.get_rank() % torch.cuda.device_count() - - def clean_up(self) -> None: - """Destroys the current process group.""" - if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() - - def par_allgather_obj(self, obj: Any) -> List[Any]: - """Gathers any object from the whole group - in a list (to all workers). - - Args: - obj (Any): Object to gather from all workers. - - Returns: - List[Any]: List of gathered objects. - """ - res = [None] * self.dist_gwsize() - dist.all_gather_object(res, obj) - return res +# @abc.abstractmethod +# def distribute_model(self, model: Any) -> Any: +# """Distributes a machine learning model. +# Args: +# model (Any): a generic ML model to be distributed. -class DSDistributedStrategy_old(TorchDistributedStrategy_old): - """DeepSpeed distributed strategy class. +# Returns: +# Any: distributed model instance. +# """ - Args: - backend (str): Name of the communication backend to employ. - config (Union[dict, Path, str]): DeepSpeed config. Either a - dictionary or a path to a JSON file. - """ +# @abc.abstractmethod +# def broadcast_params(self, model: Any, optimizer: Any) -> None: +# """Broadcasts variables from root rank to all other processes/ - config: Dict = None - backend: str - - def __init__( - self, - backend: str, - config: Union[Dict, Path, str] - ) -> None: - super().__init__() - self.backend = backend - self._load_config(config) - - def _load_config(self, ds_config): - if isinstance(ds_config, (str, Path)): - with open(ds_config) as fp: - self.config = json.load(fp) - elif isinstance(ds_config, dict): - self.config = ds_config - else: - raise ValueError("ds_config is not a dictionary not a path.") - - def init_backend(self) -> None: - """Initializes the distributed process group and the distributed - package. - """ - # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - deepspeed.init_distributed(dist_backend=self.backend) - - def distribute_model(self, model: nn.Module) -> nn.Module: - """Achieves data parallelism by synchronizing the gradients - across each model replica located in each available - computing device. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: Distributed model replicas across all devices - that are to be synchronized. - """ - # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization - distrib_model, __, __, __ = deepspeed.initialize( - model=model, - model_parameters=model.parameters(), - dist_init_required=True, - config=self.config - ) - return distrib_model - - def broadcast_params( - self, model: nn.Module, optimizer: optim.Optimizer - ) -> None: - """Only applicable for Horovod. Does nothing. - - Args: - model (nn.Module): ML model. - optimizer (optim.Optimizer): optimizer. - """ - pass +# Args: +# model (Any): distributed model. +# optimizer (Any): optimizer. +# """ - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module = None - ) -> optim.Optimizer: - """Returns the optimizer from argument. - - Args: - optimizer (optim.Optimizer): torch optimizer. - model (nn.Module): torch neural network. - - Returns: - optim.Optimizer: distributed optimizer. - """ - return optimizer - - def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size). - - Returns: - int: global world size. - """ - return dist.get_world_size() - - def dist_lwsize(self) -> int: - """Returns the local number of workers available per node, - which is usually the number of GPUs available. - - Returns: - int: local world size. - """ - return torch.cuda.device_count() - - def dist_grank(self) -> int: - """Returns the global rank of the current process, where - rank ranges from 0 to world_size. - - Returns: - int: global rank. - """ - return dist.get_rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process. - - Returns: - int: local rank. - """ - return dist.get_rank() % torch.cuda.device_count() - - def clean_up(self) -> None: - """Destroys the current process group.""" - deepspeed.sys.exit() - - def par_allgather_obj(self, obj: Any) -> list[Any]: - """Gathers any object from the whole group - in a list (to all workers). +# @abc.abstractmethod +# def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: +# """Distribute optimizer. + +# Args: +# optimizer (Any): optimizer. +# model (Any): distributed model. - Args: - obj (Any): Object to gather from all workers. - - Returns: - List[Any]: List of gathered objects. - """ - res = [None] * self.dist_gwsize() - dist.all_gather_object(res, obj) - return res - - -class HVDDistributedStrategy_old(TorchDistributedStrategy_old): - """Horovod distributed strategy class.""" - - def init_backend(self) -> None: - """Initializes the Horovod distributed backend.""" - hvd.init() - - def distribute_model(self, model: nn.Module) -> nn.Module: - """Only applicable for DDP and DeepSpeed. - For Horovod, returns the same model passed as argument. - - Args: - model (nn.Module): ML model to be distributed. - - Returns: - nn.Module: ML model passed in the argument. - """ - return model - - def broadcast_params( - self, model: nn.Module, optimizer: optim.Optimizer - ) -> None: - """Broadcasts variables from root rank to all other processes. - - Args: - model (nn.Module): ML model that is to be broadcasted - across processes. - optimizer (optim.Optimizer): Optimizer that is to be broadcasted - across processes. - """ - hvd.broadcast_parameters(model.state_dict(), root_rank=0) - hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - - def distribute_optimizer( - self, - optimizer: optim.Optimizer, - model: nn.Module - ) -> optim.Optimizer: - """Constructs a DistributedOptimizer, for computing single-process - gradient values and applying gradient updates after the gradient values - have been combined across all the Horovod ranks. - - Args: - optimizer (optim.Optimizer): Optimizer to be distributed. - model (nn.Module): ML model to be trained. - - Returns: - optim.Optimizer: Distributed optimizer across all ranks. - """ - distOptimizer = hvd.DistributedOptimizer( - optimizer, - named_parameters=model.named_parameters(), - op=hvd.Average - ) - return distOptimizer - - def dist_gwsize(self) -> int: - """Returns the total number of processes (global world size). - - Returns: - int: global world size. - """ - return hvd.size() - - def dist_lwsize(self) -> int: - """Returns the local number of workers available per node, - which is usually the number of GPUs available. - - Returns: - int: local world size. - """ - return hvd.local_size() - - def dist_grank(self) -> int: - """Returns the global rank of the current process, where - rank ranges from 0 to world_size. - - Returns: - int: global rank. - """ - return hvd.rank() - - def dist_lrank(self) -> int: - """Returns the local rank of the current process. - - Returns: - int: local rank. - """ - return hvd.local_rank() - - def clean_up(self) -> None: - """Shuts Horovod down.""" - hvd.shutdown() - - def par_allgather_obj(self, obj: Any) -> list[Any]: - """Gathers scalar objects across all workers to a - list with size(#worker), uses horovod communicator - - Args: - obj (Any): object in a worker. - - Returns: - list: gathered list with size(#worker). - """ - return hvd.allgather_object(obj) +# Returns: +# Any: distributed optimizer. +# """ + +# @abc.abstractmethod +# def dist_gwsize(self) -> int: +# """Returns the total number of processes (global world size). + +# Returns: +# int: global world size. +# """ + +# @abc.abstractmethod +# def dist_lwsize(self) -> int: +# """Returns the number of local workers available on a node +# (local world size). +# Usually it is equal to the number of available GPUs. + +# Returns: +# int: local world size. +# """ + +# @abc.abstractmethod +# def dist_grank(self) -> int: +# """Returns the global rank of the current process. +# Rank ranges from 0 to world_size. + +# Returns: +# int: global rank. +# """ + +# @abc.abstractmethod +# def dist_lrank(self) -> int: +# """Returns the local rank of the current process. + +# Returns: +# int: local rank. +# """ + +# def is_main_worker(self) -> bool: +# """Checks if local worker has global rank equal to zero. + +# Returns: +# bool: True if main worker. +# """ +# return self.dist_grank() == 0 + +# def dist_device(self) -> str: +# """Device used by local worker. + +# Returns: +# str: torch device in the form 'cuda:N'. +# """ +# return f"cuda:{self.dist_lrank()}" + +# @abc.abstractmethod +# def clean_up(self) -> None: +# """Cleans up resources allocated by distributed strategy.""" + +# @abc.abstractmethod +# def par_allgather_obj(self, obj: Any) -> List[Any]: +# """Gathers any object from the whole group in a list +# (to all workers). + +# Args: +# obj (Any): object to gather from all workers. + +# Returns: +# List[Any]: list of objects gathered from all workers. +# """ + + +# class DDPDistributedStrategy_old(TorchDistributedStrategy_old): +# """PyTorch DDP distributed strategy class. + +# Args: +# backend (str): Name of the communication backend to employ. +# """ + +# backend: str + +# def __init__(self, backend: str) -> None: +# super().__init__() +# self.backend = backend + +# def init_backend(self) -> None: +# """Initializes the distributed process group and the distributed +# package. +# """ +# if torch.cuda.is_available(): +# dist.init_process_group(backend=self.backend) + +# def distribute_model(self, model: nn.Module) -> nn.Module: +# """Achieves data parallelism by synchronizing the gradients +# across each model replica located in each available +# computing device. + +# Args: +# model (nn.Module): ML model to be distributed. + +# Returns: +# nn.Module: Distributed model replicas across all devices. +# that are to be synchronized. +# """ +# if torch.cuda.is_available(): +# # device = self.dist_lrank() +# model = model.to(self.dist_device()) +# dist_model = torch.nn.parallel.DistributedDataParallel( +# model, +# device_ids=[self.dist_device()], +# output_device=self.dist_device() +# ) +# else: +# dist_model = model + +# return dist_model + +# def broadcast_params( +# self, +# model: nn.Module, +# optimizer: optim.Optimizer +# ) -> None: +# """Do nothing. Only applicable for Horovod. + +# Args: +# model (nn.Module): ML model +# optimizer (optim.Optimizer): Optimizer +# """ +# pass + +# def distribute_optimizer( +# self, +# optimizer: optim.Optimizer, +# model: nn.Module = None +# ) -> optim.Optimizer: +# """Returns the optimizer from argument. + +# Args: +# optimizer (optim.Optimizer): optimizer. +# model (nn.Module): ML model. Unused here. + +# Returns: +# optim.Optimizer: Distributed optimizer. +# """ +# return optimizer + +# def dist_gwsize(self) -> int: +# """Returns the total number of processes (global world size). + +# Returns: +# int: global world size. +# """ +# return dist.get_world_size() + +# def dist_lwsize(self) -> int: +# """Returns the local number of workers available per node, +# which is usually the number of GPUs available. + +# Returns: +# int: local world size. +# """ +# return torch.cuda.device_count() + +# def dist_grank(self) -> int: +# """Returns the global rank of the current process, where +# rank ranges from 0 to world_size. + +# Returns: +# int: global rank. +# """ +# return dist.get_rank() + +# def dist_lrank(self) -> int: +# """Returns the local rank of the current process. + +# Returns: +# int: local rank. +# """ +# return dist.get_rank() % torch.cuda.device_count() + +# def clean_up(self) -> None: +# """Destroys the current process group.""" +# if torch.cuda.is_available(): +# dist.barrier() +# dist.destroy_process_group() + +# def par_allgather_obj(self, obj: Any) -> List[Any]: +# """Gathers any object from the whole group +# in a list (to all workers). + +# Args: +# obj (Any): Object to gather from all workers. + +# Returns: +# List[Any]: List of gathered objects. +# """ +# res = [None] * self.dist_gwsize() +# dist.all_gather_object(res, obj) +# return res + + +# class DSDistributedStrategy_old(TorchDistributedStrategy_old): +# """DeepSpeed distributed strategy class. + +# Args: +# backend (str): Name of the communication backend to employ. +# config (Union[dict, Path, str]): DeepSpeed config. Either a +# dictionary or a path to a JSON file. +# """ + +# config: Dict = None +# backend: str + +# def __init__( +# self, +# backend: str, +# config: Union[Dict, Path, str] +# ) -> None: +# super().__init__() +# self.backend = backend +# self._load_config(config) + +# def _load_config(self, ds_config): +# if isinstance(ds_config, (str, Path)): +# with open(ds_config) as fp: +# self.config = json.load(fp) +# elif isinstance(ds_config, dict): +# self.config = ds_config +# else: +# raise ValueError("ds_config is not a dictionary not a path.") + +# def init_backend(self) -> None: +# """Initializes the distributed process group and the distributed +# package. +# """ +# deepspeed.init_distributed(dist_backend=self.backend) + +# def distribute_model(self, model: nn.Module) -> nn.Module: +# """Achieves data parallelism by synchronizing the gradients +# across each model replica located in each available +# computing device. + +# Args: +# model (nn.Module): ML model to be distributed. + +# Returns: +# nn.Module: Distributed model replicas across all devices +# that are to be synchronized. +# """ +# distrib_model, __, __, __ = deepspeed.initialize( +# model=model, +# model_parameters=model.parameters(), +# dist_init_required=True, +# config=self.config +# ) +# return distrib_model + +# def broadcast_params( +# self, model: nn.Module, optimizer: optim.Optimizer +# ) -> None: +# """Only applicable for Horovod. Does nothing. + +# Args: +# model (nn.Module): ML model. +# optimizer (optim.Optimizer): optimizer. +# """ +# pass + +# def distribute_optimizer( +# self, +# optimizer: optim.Optimizer, +# model: nn.Module = None +# ) -> optim.Optimizer: +# """Returns the optimizer from argument. + +# Args: +# optimizer (optim.Optimizer): torch optimizer. +# model (nn.Module): torch neural network. + +# Returns: +# optim.Optimizer: distributed optimizer. +# """ +# return optimizer + +# def dist_gwsize(self) -> int: +# """Returns the total number of processes (global world size). + +# Returns: +# int: global world size. +# """ +# return dist.get_world_size() + +# def dist_lwsize(self) -> int: +# """Returns the local number of workers available per node, +# which is usually the number of GPUs available. + +# Returns: +# int: local world size. +# """ +# return torch.cuda.device_count() + +# def dist_grank(self) -> int: +# """Returns the global rank of the current process, where +# rank ranges from 0 to world_size. + +# Returns: +# int: global rank. +# """ +# return dist.get_rank() + +# def dist_lrank(self) -> int: +# """Returns the local rank of the current process. + +# Returns: +# int: local rank. +# """ +# return dist.get_rank() % torch.cuda.device_count() + +# def clean_up(self) -> None: +# """Destroys the current process group.""" +# deepspeed.sys.exit() + +# def par_allgather_obj(self, obj: Any) -> list[Any]: +# """Gathers any object from the whole group +# in a list (to all workers). + +# Args: +# obj (Any): Object to gather from all workers. + +# Returns: +# List[Any]: List of gathered objects. +# """ +# res = [None] * self.dist_gwsize() +# dist.all_gather_object(res, obj) +# return res + + +# class HVDDistributedStrategy_old(TorchDistributedStrategy_old): +# """Horovod distributed strategy class.""" + +# def init_backend(self) -> None: +# """Initializes the Horovod distributed backend.""" +# hvd.init() + +# def distribute_model(self, model: nn.Module) -> nn.Module: +# """Only applicable for DDP and DeepSpeed. +# For Horovod, returns the same model passed as argument. + +# Args: +# model (nn.Module): ML model to be distributed. + +# Returns: +# nn.Module: ML model passed in the argument. +# """ +# return model + +# def broadcast_params( +# self, model: nn.Module, optimizer: optim.Optimizer +# ) -> None: +# """Broadcasts variables from root rank to all other processes. + +# Args: +# model (nn.Module): ML model that is to be broadcasted +# across processes. +# optimizer (optim.Optimizer): Optimizer that is to be broadcasted +# across processes. +# """ +# hvd.broadcast_parameters(model.state_dict(), root_rank=0) +# hvd.broadcast_optimizer_state(optimizer, root_rank=-0) + +# def distribute_optimizer( +# self, +# optimizer: optim.Optimizer, +# model: nn.Module +# ) -> optim.Optimizer: +# """Constructs a DistributedOptimizer, for computing single-process +# gradient values and applying gradient updates after the gradients +# have been combined across all the Horovod ranks. + +# Args: +# optimizer (optim.Optimizer): Optimizer to be distributed. +# model (nn.Module): ML model to be trained. + +# Returns: +# optim.Optimizer: Distributed optimizer across all ranks. +# """ +# distOptimizer = hvd.DistributedOptimizer( +# optimizer, +# named_parameters=model.named_parameters(), +# op=hvd.Average +# ) +# return distOptimizer + +# def dist_gwsize(self) -> int: +# """Returns the total number of processes (global world size). + +# Returns: +# int: global world size. +# """ +# return hvd.size() + +# def dist_lwsize(self) -> int: +# """Returns the local number of workers available per node, +# which is usually the number of GPUs available. + +# Returns: +# int: local world size. +# """ +# return hvd.local_size() + +# def dist_grank(self) -> int: +# """Returns the global rank of the current process, where +# rank ranges from 0 to world_size. + +# Returns: +# int: global rank. +# """ +# return hvd.rank() + +# def dist_lrank(self) -> int: +# """Returns the local rank of the current process. + +# Returns: +# int: local rank. +# """ +# return hvd.local_rank() + +# def clean_up(self) -> None: +# """Shuts Horovod down.""" +# hvd.shutdown() + +# def par_allgather_obj(self, obj: Any) -> list[Any]: +# """Gathers scalar objects across all workers to a +# list with size(#worker), uses horovod communicator + +# Args: +# obj (Any): object in a worker. + +# Returns: +# list: gathered list with size(#worker). +# """ +# return hvd.allgather_object(obj) diff --git a/tutorials/distributed-ml/tutorial-0-basics/README.md b/tutorials/distributed-ml/tutorial-0-basics/README.md index a081f954..5ddcd635 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tutorial-0-basics/README.md @@ -7,7 +7,7 @@ the module versions might need change accordingly. ## Setup -First, from the root of this repo, build the environment containing +First, from the root of this repository, build the environment containing pytorch, horovod and deepspeed. You can *try* with: ```bash diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh index fcae9ab0..301a901b 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index 29cba4ff..570159a5 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" @@ -52,13 +53,14 @@ fi export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training -export MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_ADDR export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed" # Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +srun --cpu-bind=none python -u "$TRAINING_CMD" --deepspeed # # Run with deepspeed launcher: set --ntasks-per-node=1 # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh index 23682ed5..91becb61 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" @@ -55,5 +56,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod" -srun --cpu-bind=none python -u $TRAINING_CMD +srun --cpu-bind=none python -u "$TRAINING_CMD" diff --git a/tutorials/distributed-ml/tutorial-1-mnist/README.md b/tutorials/distributed-ml/tutorial-1-mnist/README.md index f0466a2b..6f22d3ef 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/README.md +++ b/tutorials/distributed-ml/tutorial-1-mnist/README.md @@ -7,7 +7,7 @@ the module versions might need change accordingly. ## Setup -First, from the root of this repo, build the environment containing +First, from the root of this repository, build the environment containing pytorch, horovod and deepspeed. You can *try* with: ```bash diff --git a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh index 1821a760..2ca6297d 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index 8a28b978..e7d6b030 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" @@ -52,13 +53,14 @@ fi export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training -export MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_ADDR export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed -c config.yaml" # Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed +srun --cpu-bind=none python -u "$TRAINING_CMD" --deepspeed # # Run with deepspeed launcher: set --ntasks-per-node=1 # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh index 1682c244..db2ee480 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -26,7 +26,8 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio # set env source ../../../envAI_hdfml/bin/activate -# job info +# job info +debug=false echo "DEBUG: TIME: $(date)" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" @@ -55,5 +56,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod -c config.yaml" -srun --cpu-bind=none python -u $TRAINING_CMD +srun --cpu-bind=none python -u "$TRAINING_CMD" From 7b016aca06a2abce05f1b017b2aee38a4b6c8502 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 18:27:16 +0100 Subject: [PATCH 054/171] Update workflow --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ce2dbd83..2592ee6a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,4 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* - BASH_SEVERITY: warning + # BASH_SEVERITY: warning From 64953084e73538ba1d32cb6bcf1727af3721bb34 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 18:31:38 +0100 Subject: [PATCH 055/171] UPDATE workflow --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2592ee6a..ce2dbd83 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,4 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* - # BASH_SEVERITY: warning + BASH_SEVERITY: warning From 01af3c1845df5ac428de0cbb13ee32490e12acb3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 18:43:22 +0100 Subject: [PATCH 056/171] update --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ce2dbd83..acb8a735 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,4 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* - BASH_SEVERITY: warning + BASH_SEVERITY: error From 07e009a24630bece7747af9792318869aeaf5ebf Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 20 Mar 2024 18:50:47 +0100 Subject: [PATCH 057/171] Update workflow --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index acb8a735..ce2dbd83 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,4 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* - BASH_SEVERITY: error + BASH_SEVERITY: warning From 09f19ac0479d9bbc5fdc7e628570c9cb7b0ef716 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 07:52:10 +0100 Subject: [PATCH 058/171] UPDATE super linter to v6 --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ce2dbd83..245b051a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,7 +30,7 @@ jobs: # Runs the Super-Linter action - name: Run Super-Linter on new changes - uses: github/super-linter/slim@v5 + uses: github/super-linter/slim@v6 env: DEFAULT_BRANCH: main GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 7053817b19c86c9461564490ac6fc77422fe52d3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 07:55:31 +0100 Subject: [PATCH 059/171] UPDATE super linter to v6.3.0 --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 245b051a..3defc48b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,7 +30,8 @@ jobs: # Runs the Super-Linter action - name: Run Super-Linter on new changes - uses: github/super-linter/slim@v6 + uses: super-linter/super-linter@v6.3.0 # x-release-please-version + # uses: github/super-linter/slim@v5 env: DEFAULT_BRANCH: main GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 395c6a2b2393ad3aa13971b3f5d0cbee4ec2522c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 07:58:49 +0100 Subject: [PATCH 060/171] UPDATE super linter to slim --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 3defc48b..b67a1e80 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,7 +30,7 @@ jobs: # Runs the Super-Linter action - name: Run Super-Linter on new changes - uses: super-linter/super-linter@v6.3.0 # x-release-please-version + uses: super-linter/super-linter/slim@v6.3.0 # x-release-please-version # uses: github/super-linter/slim@v5 env: DEFAULT_BRANCH: main From 9478ff9a538e38361ad8e490029a2054608664a3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 08:00:25 +0100 Subject: [PATCH 061/171] Cleanup --- .github/workflows/lint.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b67a1e80..1d491f76 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,8 +30,7 @@ jobs: # Runs the Super-Linter action - name: Run Super-Linter on new changes - uses: super-linter/super-linter/slim@v6.3.0 # x-release-please-version - # uses: github/super-linter/slim@v5 + uses: super-linter/super-linter/slim@v6.3.0 env: DEFAULT_BRANCH: main GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From db8b9142ece9ab2e407293e781d64bd67aa32f2a Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 21 Mar 2024 09:36:09 +0100 Subject: [PATCH 062/171] Update tfmirrored_slurm.sh --- .../distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh index 7d6dfe2f..863fe7c7 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh @@ -54,7 +54,7 @@ echo # set comm export CUDA_VISIBLE_DEVICES="0,1,2,3" export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK fi @@ -62,4 +62,4 @@ COMMAND="train.py" EXEC="$COMMAND " -srun python -u $EXEC \ No newline at end of file +srun python -u $EXEC From 92eb8eb7366d4e6346531be9eec06dcdb63fef19 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 21 Mar 2024 09:52:48 +0100 Subject: [PATCH 063/171] Update tfmirrored_slurm.sh --- .../distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh index 863fe7c7..e1c8d54b 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh @@ -45,9 +45,6 @@ echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi echo "DEBUG: SLURM_NODELIST: $SLURM_NODELIST" echo From fac21e4ba87e7c9df78c325f296f1db80ff682f4 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 10:01:44 +0100 Subject: [PATCH 064/171] REMOVE workflows legacy --- workflows/README.md | 5 ----- workflows/cwl/README.md | 0 workflows/snakemake/README.md | 0 3 files changed, 5 deletions(-) delete mode 100644 workflows/README.md delete mode 100644 workflows/cwl/README.md delete mode 100644 workflows/snakemake/README.md diff --git a/workflows/README.md b/workflows/README.md deleted file mode 100644 index 900dd048..00000000 --- a/workflows/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Workflow manager integration - -It is possible that `itwinai` is a step in a greater workflow. -This folder contains examples on how to execute an `itwinai` use case -from an external workflow manager, using its own workflow definition language. diff --git a/workflows/cwl/README.md b/workflows/cwl/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/workflows/snakemake/README.md b/workflows/snakemake/README.md deleted file mode 100644 index e69de29b..00000000 From f9a94cff6d21447aca0b8c54be85a47cad811e49 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 10:03:12 +0100 Subject: [PATCH 065/171] DELETE cyclegan use case --- use-cases/zebra2horse/cyclegan.py | 507 ------------------------- use-cases/zebra2horse/dataloader.py | 85 ----- use-cases/zebra2horse/pipeline.yaml | 47 --- use-cases/zebra2horse/pix2pix.py | 111 ------ use-cases/zebra2horse/requriements.txt | 1 - use-cases/zebra2horse/startscript | 32 -- use-cases/zebra2horse/train.py | 22 -- use-cases/zebra2horse/trainer.py | 47 --- 8 files changed, 852 deletions(-) delete mode 100644 use-cases/zebra2horse/cyclegan.py delete mode 100644 use-cases/zebra2horse/dataloader.py delete mode 100644 use-cases/zebra2horse/pipeline.yaml delete mode 100644 use-cases/zebra2horse/pix2pix.py delete mode 100644 use-cases/zebra2horse/requriements.txt delete mode 100644 use-cases/zebra2horse/startscript delete mode 100644 use-cases/zebra2horse/train.py delete mode 100644 use-cases/zebra2horse/trainer.py diff --git a/use-cases/zebra2horse/cyclegan.py b/use-cases/zebra2horse/cyclegan.py deleted file mode 100644 index 5dc6b7a6..00000000 --- a/use-cases/zebra2horse/cyclegan.py +++ /dev/null @@ -1,507 +0,0 @@ -import tensorflow.keras as keras -import tensorflow as tf -import tensorflow_addons as tfa - -from tensorflow.keras import layers - - -class ReflectionPadding2D(layers.Layer): - """Implements Reflection Padding as a layer. - - Args: - padding(tuple): Amount of padding for the - spatial dimensions. - - Returns: - A padded tensor with the same type as the input tensor. - """ - - def __init__(self, padding=(1, 1), **kwargs): - self.padding = tuple(padding) - super().__init__(**kwargs) - - def call(self, input_tensor, mask=None): - padding_width, padding_height = self.padding - padding_tensor = [ - [0, 0], - [padding_height, padding_height], - [padding_width, padding_width], - [0, 0], - ] - return tf.pad(input_tensor, padding_tensor, mode="REFLECT") - - def get_config(self): - config = super().get_config().copy() - config.update({ - 'padding': self.padding, - }) - return config - - -def residual_block( - x, - activation, - kernel_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - kernel_size=(3, 3), - strides=(1, 1), - padding="valid", - gamma_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - use_bias=False, -): - dim = x.shape[-1] - input_tensor = x - - x = ReflectionPadding2D()(input_tensor) - x = layers.Conv2D( - dim, - kernel_size, - strides=strides, - kernel_initializer=kernel_initializer, - padding=padding, - use_bias=use_bias, - )(x) - x = tfa.layers.InstanceNormalization( - gamma_initializer=gamma_initializer)(x) - x = activation(x) - - x = ReflectionPadding2D()(x) - x = layers.Conv2D( - dim, - kernel_size, - strides=strides, - kernel_initializer=kernel_initializer, - padding=padding, - use_bias=use_bias, - )(x) - x = tfa.layers.InstanceNormalization( - gamma_initializer=gamma_initializer)(x) - x = layers.add([input_tensor, x]) - return x - - -def downsample( - x, - filters, - activation, - kernel_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - kernel_size=(3, 3), - strides=(2, 2), - padding="same", - gamma_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - use_bias=False, -): - x = layers.Conv2D( - filters, - kernel_size, - strides=strides, - kernel_initializer=kernel_initializer, - padding=padding, - use_bias=use_bias, - )(x) - x = tfa.layers.InstanceNormalization( - gamma_initializer=gamma_initializer)(x) - if activation: - x = activation(x) - return x - - -def upsample( - x, - filters, - activation, - kernel_size=(3, 3), - strides=(2, 2), - padding="same", - kernel_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - gamma_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - use_bias=False, -): - x = layers.Conv2DTranspose( - filters, - kernel_size, - strides=strides, - padding=padding, - kernel_initializer=kernel_initializer, - use_bias=use_bias, - )(x) - x = tfa.layers.InstanceNormalization( - gamma_initializer=gamma_initializer)(x) - if activation: - x = activation(x) - return x - - -class Generator(keras.Model): - def __init__( - self, - filters=64, - num_downsampling_blocks=2, - num_residual_blocks=9, - num_upsample_blocks=2, - gamma_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - input_img_size=(256, 256, 3) - ): - super().__init__() - - name = 'gen' - - self.filters = filters - self.num_downsampling_blocks = num_downsampling_blocks - self.num_residual_blocks = num_residual_blocks - self.num_upsample_blocks = num_upsample_blocks - self.gamma_initializer = gamma_initializer - self.input_img_size = input_img_size - - img_input = layers.Input(shape=input_img_size, - name=name + "_img_input") - x = ReflectionPadding2D(padding=(3, 3))(img_input) - x = layers.Conv2D( - filters, (7, 7), - kernel_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - use_bias=False - )(x) - x = tfa.layers.InstanceNormalization( - gamma_initializer=gamma_initializer)(x) - x = layers.Activation("relu")(x) - - # Downsampling - for _ in range(num_downsampling_blocks): - filters *= 2 - x = downsample(x, filters=filters, - activation=layers.Activation("relu")) - - # Residual blocks - for _ in range(num_residual_blocks): - x = residual_block(x, activation=layers.Activation("relu")) - - # Upsampling - for _ in range(num_upsample_blocks): - filters //= 2 - x = upsample(x, filters, activation=layers.Activation("relu")) - - # Final block - x = ReflectionPadding2D(padding=(3, 3))(x) - x = layers.Conv2D(3, (7, 7), padding="valid")(x) - x = layers.Activation("tanh")(x) - - self.model = keras.models.Model(img_input, x, name=name) - - def call(self, inputs, training=False): - return self.model(inputs) - - def get_config(self): - config = super().get_config().copy() - config.update({ - 'filters': self.filters, - 'num_downsampling_blocks': self.num_downsampling_blocks, - 'num_residual_blocks': self.num_residual_blocks, - 'num_upsample_blocks': self.num_upsample_blocks, - 'gamma_initializer': self.gamma_initializer, - 'input_img_size': self.input_img_size, - }) - return config - - -class Discriminator(keras.Model): - def __init__( - self, - filters=64, - kernel_initializer=keras.initializers.RandomNormal( - mean=0.0, stddev=0.02), - num_downsampling=3, - input_img_size=(256, 256, 3) - ): - super().__init__() - - name = 'disc' - self.filters = filters - self.kernel_initializer = kernel_initializer - self.num_downsampling = num_downsampling - self.input_img_size = input_img_size - - img_input = layers.Input(shape=input_img_size, - name=name + "_img_input") - x = layers.Conv2D( - filters, - (4, 4), - strides=(2, 2), - padding="same", - kernel_initializer=kernel_initializer, - )(img_input) - x = layers.LeakyReLU(0.2)(x) - - num_filters = filters - for num_downsample_block in range(3): - num_filters *= 2 - if num_downsample_block < 2: - x = downsample( - x, - filters=num_filters, - activation=layers.LeakyReLU(0.2), - kernel_size=(4, 4), - strides=(2, 2), - ) - else: - x = downsample( - x, - filters=num_filters, - activation=layers.LeakyReLU(0.2), - kernel_size=(4, 4), - strides=(1, 1), - ) - - x = layers.Conv2D( - 1, (4, 4), strides=(1, 1), - padding="same", kernel_initializer=kernel_initializer - )(x) - self.model = keras.models.Model(inputs=img_input, outputs=x, name=name) - - def call(self, inputs, training=False): - return self.model(inputs) - - def get_config(self): - config = super().get_config().copy() - config.update({ - 'filters': self.filters, - 'kernel_initializer': self.kernel_initializer, - 'num_downsampling': self.num_downsampling, - 'input_img_size': self.input_img_size, - }) - return config - - -class CycleGAN(keras.Model): - def __init__( - self, - generator_G: keras.Model, - generator_F: keras.Model, - discriminator_X: keras.Model, - discriminator_Y: keras.Model, - lambda_cycle=10.0, - lambda_identity=0.5, - ): - super().__init__() - self.gen_G = generator_G - self.gen_F = generator_F - self.disc_X = discriminator_X - self.disc_Y = discriminator_Y - self.lambda_cycle = lambda_cycle - self.lambda_identity = lambda_identity - - def compile(self, config: dict): - super().compile() - self.gen_G_optimizer = config['gen_G_optimizer'] - self.gen_F_optimizer = config['gen_F_optimizer'] - self.disc_X_optimizer = config['disc_X_optimizer'] - self.disc_Y_optimizer = config['disc_Y_optimizer'] - - # TODO: Define losses in config file - # Loss function for evaluating adversarial loss - adv_loss_fn = keras.losses.MeanSquaredError( - reduction=tf.keras.losses.Reduction.SUM) - - # Define the loss function for the generators - def generator_loss_fn(fake): - fake_loss = adv_loss_fn(tf.ones_like(fake), fake) - return fake_loss - - # Define the loss function for the discriminators - def discriminator_loss_fn(real, fake): - real_loss = adv_loss_fn(tf.ones_like(real), real) - fake_loss = adv_loss_fn(tf.zeros_like(fake), fake) - return (real_loss + fake_loss) * 0.5 - - self.generator_loss_fn = generator_loss_fn - self.discriminator_loss_fn = discriminator_loss_fn - - self.cycle_loss_fn = keras.losses.MeanAbsoluteError( - reduction=tf.keras.losses.Reduction.SUM) - self.identity_loss_fn = keras.losses.MeanAbsoluteError( - reduction=tf.keras.losses.Reduction.SUM) - - def train_step(self, batch_data): - # x is Horse and y is zebra - real_x, real_y = batch_data - - # For CycleGAN, we need to calculate different - # kinds of losses for the generators and discriminators. - # We will perform the following steps here: - # - # 1. Pass real images through the generators and get the generated - # images - # 2. Pass the generated images back to the generators to check if we - # can predict the original image from the generated image. - # 3. Do an identity mapping of the real images using the - # generators. - # 4. Pass the generated images in 1) to the corresponding - # discriminators. - # 5. Calculate the generators total loss (adversarial + cycle + - # identity) - # 6. Calculate the discriminators loss - # 7. Update the weights of the generators - # 8. Update the weights of the discriminators - # 9. Return the losses in a dictionary - - with tf.GradientTape(persistent=True) as tape: - # Horse to fake zebra - fake_y = self.gen_G(real_x, training=True) - # Zebra to fake horse -> y2x - fake_x = self.gen_F(real_y, training=True) - - # Cycle (Horse to fake zebra to fake horse): x -> y -> x - cycled_x = self.gen_F(fake_y, training=True) - # Cycle (Zebra to fake horse to fake zebra) y -> x -> y - cycled_y = self.gen_G(fake_x, training=True) - - # Identity mapping - same_x = self.gen_F(real_x, training=True) - same_y = self.gen_G(real_y, training=True) - - # Discriminator output - disc_real_x = self.disc_X(real_x, training=True) - disc_fake_x = self.disc_X(fake_x, training=True) - - disc_real_y = self.disc_Y(real_y, training=True) - disc_fake_y = self.disc_Y(fake_y, training=True) - - # Generator adversarial loss - gen_G_loss = self.generator_loss_fn(disc_fake_y) - gen_F_loss = self.generator_loss_fn(disc_fake_x) - - # Generator cycle loss - cycle_loss_G = self.cycle_loss_fn( - real_y, cycled_y) * self.lambda_cycle - cycle_loss_F = self.cycle_loss_fn( - real_x, cycled_x) * self.lambda_cycle - - # Generator identity loss - id_loss_G = ( - self.identity_loss_fn(real_y, same_y) - * self.lambda_cycle - * self.lambda_identity - ) - id_loss_F = ( - self.identity_loss_fn(real_x, same_x) - * self.lambda_cycle - * self.lambda_identity - ) - - # Total generator loss - total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G - total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F - - # Discriminator loss - disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x) - disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y) - - # Get the gradients for the generators - grads_G = tape.gradient(total_loss_G, self.gen_G.trainable_variables) - grads_F = tape.gradient(total_loss_F, self.gen_F.trainable_variables) - - # Get the gradients for the discriminators - disc_X_grads = tape.gradient( - disc_X_loss, self.disc_X.trainable_variables) - disc_Y_grads = tape.gradient( - disc_Y_loss, self.disc_Y.trainable_variables) - - # Update the weights of the generators - self.gen_G_optimizer.apply_gradients( - zip(grads_G, self.gen_G.trainable_variables) - ) - self.gen_F_optimizer.apply_gradients( - zip(grads_F, self.gen_F.trainable_variables) - ) - - # Update the weights of the discriminators - self.disc_X_optimizer.apply_gradients( - zip(disc_X_grads, self.disc_X.trainable_variables) - ) - self.disc_Y_optimizer.apply_gradients( - zip(disc_Y_grads, self.disc_Y.trainable_variables) - ) - - return { - "G_loss": total_loss_G, - "F_loss": total_loss_F, - "D_X_loss": disc_X_loss, - "D_Y_loss": disc_Y_loss, - } - - def test_step(self, inputs): - real_x, real_y = inputs - - # Horse to fake zebra - fake_y = self.gen_G(real_x, training=False) - # Zebra to fake horse -> y2x - fake_x = self.gen_F(real_y, training=False) - - # Cycle (Horse to fake zebra to fake horse): x -> y -> x - cycled_x = self.gen_F(fake_y, training=False) - # Cycle (Zebra to fake horse to fake zebra) y -> x -> y - cycled_y = self.gen_G(fake_x, training=False) - - # Identity mapping - same_x = self.gen_F(real_x, training=False) - same_y = self.gen_G(real_y, training=False) - - # Discriminator output - disc_real_x = self.disc_X(real_x, training=False) - disc_fake_x = self.disc_X(fake_x, training=False) - - disc_real_y = self.disc_Y(real_y, training=False) - disc_fake_y = self.disc_Y(fake_y, training=False) - - # Generator adversarial loss - gen_G_loss = self.generator_loss_fn(disc_fake_y) - gen_F_loss = self.generator_loss_fn(disc_fake_x) - - # Generator cycle loss - cycle_loss_G = self.cycle_loss_fn(real_y, cycled_y) * self.lambda_cycle - cycle_loss_F = self.cycle_loss_fn(real_x, cycled_x) * self.lambda_cycle - - # Generator identity loss - id_loss_G = ( - self.identity_loss_fn(real_y, same_y) - * self.lambda_cycle - * self.lambda_identity - ) - id_loss_F = ( - self.identity_loss_fn(real_x, same_x) - * self.lambda_cycle - * self.lambda_identity - ) - - # Total generator loss - total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G - total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F - - # Discriminator loss - disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x) - disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y) - - return { - "G_loss": total_loss_G, - "F_loss": total_loss_F, - "D_X_loss": disc_X_loss, - "D_Y_loss": disc_Y_loss, - } - - def get_config(self): - config = super().get_config().copy() - config.update({ - 'generator_G': self.gen_G, - 'generator_F': self.gen_F, - 'discriminator_X': self.disc_X, - 'discriminator_Y': self.disc_Y, - 'lambda_cycle': self.lambda_cycle, - 'lambda_identity': self.lambda_identity, - }) - return config diff --git a/use-cases/zebra2horse/dataloader.py b/use-cases/zebra2horse/dataloader.py deleted file mode 100644 index 0970d270..00000000 --- a/use-cases/zebra2horse/dataloader.py +++ /dev/null @@ -1,85 +0,0 @@ -from typing import Tuple, Dict, Optional - -# import tensorflow.keras as keras -import tensorflow as tf -import tensorflow_datasets as tfds - -from itwinai.components import DataGetter - - -class Zebra2HorseDataLoader(DataGetter): - def __init__(self, buffer_size: int): - super().__init__() - self.buffer_size = buffer_size - - def load(self): - # Load the horse-zebra dataset using tensorflow-datasets. - dataset, _ = tfds.load("cycle_gan/horse2zebra", - with_info=True, as_supervised=True) - train_horses, train_zebras = dataset["trainA"], dataset["trainB"] - test_horses, test_zebras = dataset["testA"], dataset["testB"] - - # Image sizes - orig_img_size = (286, 286) - input_img_size = (256, 256, 3) - - def normalize_img(img): - img = tf.cast(img, dtype=tf.float32) - # Map values in the range [-1, 1] - return (img / 127.5) - 1.0 - - def preproc_train_fn(img, label): - # Random flip - img = tf.image.random_flip_left_right(img) - # Resize to the original size first - img = tf.image.resize(img, [*orig_img_size]) - # Random crop to 256X256 - img = tf.image.random_crop(img, size=[*input_img_size]) - # Normalize the pixel values in the range [-1, 1] - img = normalize_img(img) - return img - - def preproc_test_fn(img, label): - # Only resizing and normalization for the test images. - img = tf.image.resize(img, [input_img_size[0], input_img_size[1]]) - img = normalize_img(img) - return img - - # TODO: Add shuffle? - # Apply the preprocessing operations to the training data - train_horses = ( - train_horses.map(preproc_train_fn, - num_parallel_calls=tf.data.AUTOTUNE) - .cache() - ) - train_zebras = ( - train_zebras.map(preproc_train_fn, - num_parallel_calls=tf.data.AUTOTUNE) - .cache() - ) - - # Apply the preprocessing operations to the test data - test_horses = ( - test_horses.map(preproc_test_fn, - num_parallel_calls=tf.data.AUTOTUNE) - .cache() - ) - test_zebras = ( - test_zebras.map(preproc_test_fn, - num_parallel_calls=tf.data.AUTOTUNE) - .cache() - ) - - return ( - tf.data.Dataset.zip((train_horses, train_zebras) - ).shuffle(self.buffer_size), - tf.data.Dataset.zip((test_horses, test_zebras) - ).shuffle(self.buffer_size) - ) - - def execute( - self, - config: Optional[Dict] = None - ) -> Tuple[Optional[Tuple], Optional[Dict]]: - train, test = self.load() - return ([train, test],), config diff --git a/use-cases/zebra2horse/pipeline.yaml b/use-cases/zebra2horse/pipeline.yaml deleted file mode 100644 index ff00ef28..00000000 --- a/use-cases/zebra2horse/pipeline.yaml +++ /dev/null @@ -1,47 +0,0 @@ -loader: - class_path: dataloader.Zebra2HorseDataLoader - init_args: - buffer_size: 256 - -trainer: - class_path: trainer.Zebra2HorseTrainer - init_args: - epochs: 10 - batch_size: 1 - model: - class_path: cyclegan.CycleGAN - init_args: - generator_G: - class_path: cyclegan.Generator - generator_F: - class_path: cyclegan.Generator - discriminator_X: - class_path: cyclegan.Discriminator - discriminator_Y: - class_path: cyclegan.Discriminator - compile_conf: - gen_G_optimizer: { - class_name: "Adam", - config: { - learning_rate: 0.001 - } - } - gen_F_optimizer: { - class_name: "Adam", - config: { - learning_rate: 0.001 - } - } - disc_X_optimizer: { - class_name: "Adam", - config: { - learning_rate: 0.001 - } - } - disc_Y_optimizer: { - class_name: "Adam", - config: { - learning_rate: 0.001 - } - } - loggers: [] diff --git a/use-cases/zebra2horse/pix2pix.py b/use-cases/zebra2horse/pix2pix.py deleted file mode 100644 index 0c94dd40..00000000 --- a/use-cases/zebra2horse/pix2pix.py +++ /dev/null @@ -1,111 +0,0 @@ -import torch -import torch.nn as nn - -OUTPUT_CHANNELS = 3 - - -def downsample(in_c, out_c, apply_batchnorm=True): - result = nn.Sequential() - result.add_module(name="Conv2d", module=nn.Conv2d( - in_c, out_c, 4, 2, 1, bias=False)) - if apply_batchnorm: - result.add_module(name="BatchNorm2d", module=nn.BatchNorm2d(out_c)) - result.add_module(name="LeakyReLU", module=nn.LeakyReLU(inplace=True)) - - return result - - -def upsample(in_c, out_c, apply_dropout=False): - result = nn.Sequential() - result.add_module(name="ConvTranspose2d", module=nn.ConvTranspose2d( - in_c, out_c, 4, 2, 1, bias=False)) - result.add_module(name="BatchNorm2d", module=nn.BatchNorm2d(out_c)) - if apply_dropout: - result.add_module(name="Dropout", module=nn.Dropout(0.5, inplace=True)) - result.add_module(name="ReLU", module=nn.ReLU(inplace=True)) - - return result - - -class Generator(nn.Module): - def __init__(self): - super(Generator, self).__init__() - self.down1 = downsample(3, 64, apply_batchnorm=False) - self.down2 = downsample(64, 128) - self.down3 = downsample(128, 256) - self.down4 = downsample(256, 512) - self.down5_7 = downsample(512, 512) - self.down8 = downsample(512, 512, apply_batchnorm=False) - - self.up1 = upsample(512, 512, apply_dropout=True) - self.up2_3 = upsample(1024, 512, apply_dropout=True) - self.up4 = upsample(1024, 512) - self.up5 = upsample(1024, 256) - self.up6 = upsample(512, 128) - self.up7 = upsample(256, 64) - - self.last = nn.Sequential() - self.last.add_module(name="ConvTranspose2d", module=nn.ConvTranspose2d( - 128, OUTPUT_CHANNELS, 4, 2, 1)) - self.last.add_module(name="tanh", module=nn.Tanh()) - - def forward(self, image): - # Encoder - x1 = self.down1(image) - x2 = self.down2(x1) - x3 = self.down3(x2) - x4 = self.down4(x3) - x5 = self.down5_7(x4) - x6 = self.down5_7(x5) - x7 = self.down5_7(x6) - x8 = self.down8(x7) - - # Decoder - x = self.up1(x8) - x = torch.cat([x7, x], dim=1) - x = self.up2_3(x) - x = torch.cat([x6, x], dim=1) - x = self.up2_3(x) - x = torch.cat([x5, x], dim=1) - x = self.up4(x) - x = torch.cat([x4, x], dim=1) - x = self.up5(x) - x = torch.cat([x3, x], dim=1) - x = self.up6(x) - x = torch.cat([x2, x], dim=1) - x = self.up7(x) - x = torch.cat([x1, x], dim=1) - - x = self.last(x) - - return x - - -class Discriminator(nn.Module): - def __init__(self): - super(Discriminator, self).__init__() - self.down1 = downsample(6, 64, apply_batchnorm=False) - self.down2 = downsample(64, 128) - self.down3 = downsample(128, 256) - - self.conv1 = nn.Conv2d(256, 512, 4, 1, 1, bias=False) - self.batchnorm = nn.BatchNorm2d(512) - self.leakyrelu = nn.LeakyReLU(inplace=True) - - self.last = nn.Sequential() - self.last.add_module(name="Conv2d", module=nn.Conv2d(512, 1, 4, 1, 1)) - # self.last.add_module(name="sigmoid", module=nn.Sigmoid()) - - def forward(self, inp, tar): - x = torch.cat([inp, tar], dim=1) - x = self.down1(x) - x = self.down2(x) - x = self.down3(x) - - x = self.conv1(x) - x = self.batchnorm(x) - x = self.leakyrelu(x) - - x = self.last(x) - - return x diff --git a/use-cases/zebra2horse/requriements.txt b/use-cases/zebra2horse/requriements.txt deleted file mode 100644 index 79f66f84..00000000 --- a/use-cases/zebra2horse/requriements.txt +++ /dev/null @@ -1 +0,0 @@ -ray[tune] \ No newline at end of file diff --git a/use-cases/zebra2horse/startscript b/use-cases/zebra2horse/startscript deleted file mode 100644 index d4ccb4a5..00000000 --- a/use-cases/zebra2horse/startscript +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=PrototypeTest -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=4 -#SBATCH --gpus-per-node=4 - -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# load modules -ml --force purge -ml Stages/2023 StdEnv/2023 NVHPC/23.1 OpenMPI/4.1.4 cuDNN/8.6.0.163-CUDA-11.7 Python/3.10.4 HDF5 libaio/0.3.112 GCC/11.3.0 - -# shellcheck source=/dev/null -source ~/.bashrc - -# TODO: test on HPC -srun micromamba run -p ../../.venv-tf python train.py -p pipeline.yaml \ No newline at end of file diff --git a/use-cases/zebra2horse/train.py b/use-cases/zebra2horse/train.py deleted file mode 100644 index c33b9402..00000000 --- a/use-cases/zebra2horse/train.py +++ /dev/null @@ -1,22 +0,0 @@ -import argparse - -from trainer import Zebra2HorseTrainer -from dataloader import Zebra2HorseDataLoader -from itwinai.experimental.executors import LocalExecutor # , RayExecutor - - -if __name__ == "__main__": - # Create CLI Parser - parser = argparse.ArgumentParser() - parser.add_argument("-p", "--pipeline", type=str) - args = parser.parse_args() - - # Execute pipe - executor = LocalExecutor( - steps=args.pipeline, - class_dict={ - "loader": Zebra2HorseDataLoader, - "trainer": Zebra2HorseTrainer - }) - executor.setup(None) - executor.execute(None) diff --git a/use-cases/zebra2horse/trainer.py b/use-cases/zebra2horse/trainer.py deleted file mode 100644 index 1ae74896..00000000 --- a/use-cases/zebra2horse/trainer.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import List, Dict, Tuple, Optional -import tensorflow as tf -import tensorflow.keras as keras - -from itwinai.tensorflow.trainer import TensorflowTrainer -from itwinai.loggers import Logger - - -class Zebra2HorseTrainer(TensorflowTrainer): - def __init__( - self, - epochs: int, - batch_size: int, - compile_conf: Dict, - model: Dict, - logger: List[Logger], - ): - super().__init__() - # Configurable - self.logger = logger - - # Parse down the optimizers - for key in compile_conf.keys(): - compile_conf[key] = keras.optimizers.get(compile_conf[key]) - - print(model) - - super().__init__( - epochs=epochs, - batch_size=batch_size, - callbacks=[], - model_dict=model, - compile_conf=compile_conf, - strategy=tf.distribute.MirroredStrategy() - ) - - def train(self, train_dataset, validation_dataset): - super().train(train_dataset, validation_dataset) - - def execute( - self, - train_dataset, - validation_dataset, - config: Optional[Dict] = None, - ) -> Tuple[Optional[Tuple], Optional[Dict]]: - train_result = self.train(train_dataset, validation_dataset) - return (train_result,), config From fd9186cc9fc9fcbe7ecef2b5c87ea948c55635df Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 21 Mar 2024 13:37:08 +0100 Subject: [PATCH 066/171] UPDATE dist training tutorials torch --- src/itwinai/torch/distributed.py | 14 +- src/itwinai/torch/trainer.py | 6 + src/itwinai/torch/types.py | 2 + .../tutorial-0-basics/ddp_slurm.sh | 2 +- .../tutorial-0-basics/deepspeed_slurm.sh | 6 +- .../tutorial-0-basics/hvd_slurm.sh | 4 +- .../distributed-ml/tutorial-0-basics/train.py | 43 +---- .../tutorial-1-mnist/config.yaml | 2 +- .../tutorial-1-mnist/ddp_slurm.sh | 2 +- .../tutorial-1-mnist/deepspeed_slurm.sh | 4 +- .../tutorial-1-mnist/hvd_slurm.sh | 4 +- .../distributed-ml/tutorial-1-mnist/train.py | 178 +++++++++++------- 12 files changed, 146 insertions(+), 121 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 4ef4900a..527a77e4 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1,5 +1,5 @@ import abc -from typing import Any, Union, List, Dict, Optional, Tuple +from typing import Any, List, Optional, Tuple from pathlib import Path import json import os @@ -235,17 +235,14 @@ class DSDistributedStrategy(TorchDistributedStrategy): dictionary or a path to a JSON file. """ - config: Dict = None backend: str def __init__( self, - backend: str, - config: Union[Dict, Path, str] + backend: str ) -> None: super().__init__() self.backend = backend - self._load_config(config) def _load_config(self, ds_config): if isinstance(ds_config, (str, Path)): @@ -271,9 +268,12 @@ def init(self) -> None: def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, lr_scheduler: Optional[LRScheduler] = None, - model_parameters: Optional[Any] = None, **kwargs + model_parameters: Optional[Any] = None, + **kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" + if kwargs.get("config"): + kwargs["config"] = self._load_config(kwargs.get("config")) # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization # To prioritize optim in the config, you need to pass optim=None distrib_model, optimizer, _, lr_scheduler = deepspeed.initialize( @@ -282,7 +282,7 @@ def distributed( optimizer=optimizer, lr_scheduler=lr_scheduler, dist_init_required=True, - config=self.config + **kwargs ) return distrib_model, optimizer, lr_scheduler diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index 31794c49..141ed32f 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -26,6 +26,12 @@ from ..loggers import LogMixin, Logger, ConsoleLogger from ..utils import dynamically_import_class from ..cluster import ClusterEnvironment +from .distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + DSDistributedStrategy, + HVDDistributedStrategy +) def preproc_dataloader(dataloader: DataLoader, gwsize, grank): diff --git a/src/itwinai/torch/types.py b/src/itwinai/torch/types.py index 6f6e5c9f..614462ad 100644 --- a/src/itwinai/torch/types.py +++ b/src/itwinai/torch/types.py @@ -42,6 +42,8 @@ class TorchDistributedStrategy(BaseEnum): DEFAULT = None NONE = None DDP = 'ddp' + HVD = 'horovod' + DS = 'deepspeed' class TorchLoss(BaseEnum): diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh index 301a901b..1b53f04c 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh index 570159a5..b12009de 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 @@ -60,7 +60,9 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed" # Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u "$TRAINING_CMD" --deepspeed +srun --cpu-bind=none python -u $TRAINING_CMD #--deepspeed + +# srun pwd # # Run with deepspeed launcher: set --ntasks-per-node=1 # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh index 91becb61..a2a06e6c 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 #SBATCH --gpus-per-node=4 @@ -56,5 +56,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod" -srun --cpu-bind=none python -u "$TRAINING_CMD" +srun --cpu-bind=none python -u $TRAINING_CMD diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/tutorial-0-basics/train.py index 3a19fdf5..614b56e4 100644 --- a/tutorials/distributed-ml/tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tutorial-0-basics/train.py @@ -1,30 +1,6 @@ """ -Show how to use DDP, Horovod and DeepSpeed strategies interchangeably. -Depending on the strategy you choose, you need to run this script with -different ad-hoc commands: - -Torch DistributedDataParallel (DDP). Launch from terminal with torchrun: ->>> micromamba run -p ../../.venv-pytorch/ torchrun \ - --rdzv_backend=c10d \ - --rdzv_endpoint=localhost:0 \ - --nnodes=1 \ - --nproc_per_node=4 \ - train.py -s ddp -with SLURM: ->>> sbatch ddp_slurm.sh - -DeepSpeed. Launch from terminal with deepspeed: ->>> micromamba run -p ../../.venv-pytorch/ deepspeed \ - train.py -s deepspeed -with SLURM: ->>> sbatch deepSpeed_slurm.sh - -Horovod. Only works with SLURM: ->>> sbatch horovod_slurm.sh - -Horovod. Launch with horovodrun (NOT WORKING YET): ->>> micromamba run -p ../../.venv-pytorch/ horovodrun -np 4 \ - python train.py -s horovod +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably +with an extremely simple neural network. """ from typing import Any import os @@ -60,8 +36,6 @@ def parse_args() -> argparse.Namespace: help='local rank passed from distributed launcher') parser = deepspeed.add_config_arguments(parser) args = parser.parse_args() - # os.environ['LOCAL_RANK'] = str(args.local_rank) # may not be needed - return args @@ -96,9 +70,10 @@ def trainer_entrypoint_fn( optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = nn.MSELoss() # Distributed model - # model_engine: ModelEngine = strategy.distributed(model, optim) + deepspeed_config = dict(train_batch_size=32) + # 'config_params' key is ignored if strategy != DSDistributedStrategy model, optim, lr_sched = strategy.distributed( - model, optim, lr_scheduler=None + model, optim, lr_scheduler=None, config_params=deepspeed_config ) # Data @@ -117,10 +92,6 @@ def trainer_entrypoint_fn( # Device allocated for this worker device = strategy.dist_device() - print(f" DEVICES: DS={model.device}, " - f"TORCH.DIST={strategy.dist_device()}, " - f"ENV={os.environ['LOCAL_RANK']}") - for epoch in range(2): for (x, y) in train_loader: # print(f"tensor to cuda:{device}") @@ -163,9 +134,7 @@ def trainer_entrypoint_fn( elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy( - backend='nccl', config=dict(train_batch_size=32) - ) + strategy = DSDistributedStrategy(backend='nccl') else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") diff --git a/tutorials/distributed-ml/tutorial-1-mnist/config.yaml b/tutorials/distributed-ml/tutorial-1-mnist/config.yaml index 4cbf7354..cb221dec 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/config.yaml +++ b/tutorials/distributed-ml/tutorial-1-mnist/config.yaml @@ -6,7 +6,7 @@ verbose: True # Model batch_size: 64 -epochs: 10 +epochs: 2 lr: 0.001 concM: 100 momentum: 0.5 diff --git a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh index 2ca6297d..3d5d4bb3 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh index e7d6b030..8e5f7881 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh @@ -13,7 +13,7 @@ #SBATCH --partition=batch #SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=32 +#SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 # SBATCH --exclusive @@ -60,7 +60,7 @@ export MASTER_PORT=29500 TRAINING_CMD="train.py -s deepspeed -c config.yaml" # Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u "$TRAINING_CMD" --deepspeed +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed # # Run with deepspeed launcher: set --ntasks-per-node=1 # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh index db2ee480..3774b6e1 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh +++ b/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh @@ -11,7 +11,7 @@ # configure node and process count on the CM #SBATCH --partition=batch -#SBATCH --nodes=4 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 #SBATCH --gpus-per-node=4 @@ -56,5 +56,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training TRAINING_CMD="train.py -s horovod -c config.yaml" -srun --cpu-bind=none python -u "$TRAINING_CMD" +srun --cpu-bind=none python -u $TRAINING_CMD diff --git a/tutorials/distributed-ml/tutorial-1-mnist/train.py b/tutorials/distributed-ml/tutorial-1-mnist/train.py index 975be604..365a9048 100644 --- a/tutorials/distributed-ml/tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/tutorial-1-mnist/train.py @@ -1,5 +1,7 @@ """ -TODO: add description +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably +with a simple neural network trained on MNIST dataset, showing how +to use checkpoints. """ import os import argparse @@ -18,16 +20,22 @@ import deepspeed from itwinai.torch.distributed import ( - # TorchDistributedStrategy, + TorchDistributedStrategy, DDPDistributedStrategy, HVDDistributedStrategy, DSDistributedStrategy, ) -from itwinai.parser import ArgumentParser +from itwinai.parser import ArgumentParser as ItAIArgumentParser def parse_args() -> argparse.Namespace: - parser = ArgumentParser(description='PyTorch MNIST Example') + """ + Parse CLI args, which can also be loaded from a configuration file + using the --config flag: + + >>> train.py --strategy ddp --config config.yaml + """ + parser = ItAIArgumentParser(description='PyTorch MNIST Example') # Distributed ML strategy parser.add_argument( @@ -96,6 +104,10 @@ def parse_args() -> argparse.Namespace: class Net(nn.Module): + """ + Simple neural network classifier for MNIST images. + """ + def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5) @@ -111,17 +123,21 @@ def forward(self, x): x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) - return F.log_softmax(x) - - -# train loop + return F.log_softmax(x, dim=-1) -def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): +def train( + model, device, train_loader, optimizer, epoch, + strategy: TorchDistributedStrategy, args +): + """ + Training function, representing an epoch. + """ model.train() t_list = [] loss_acc = 0 - if grank == 0: + gwsize = strategy.dist_gwsize() + if strategy.is_main_worker(): print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = time.perf_counter() @@ -131,7 +147,7 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: + if batch_idx % args.log_int == 0 and strategy.is_main_worker(): print( f'Train epoch: {epoch} ' f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' @@ -139,17 +155,19 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): f'Loss: {loss.item():.6f}') t_list.append(time.perf_counter() - t) loss_acc += loss.item() - if grank == 0: + if strategy.is_main_worker(): print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc -# test loop - -def test(model, device, test_loader, grank, gwsize, args): +def test(model, device, test_loader, strategy: TorchDistributedStrategy): + """ + Model validation. + """ model.eval() test_loss = 0 correct = 0 + gwsize = strategy.dist_gwsize() with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) @@ -160,7 +178,7 @@ def test(model, device, test_loader, grank, gwsize, args): pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) - if grank == 0: + if strategy.is_main_worker(): print( f'Test set: average loss: {test_loss:.4f}\t' f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') @@ -168,11 +186,14 @@ def test(model, device, test_loader, grank, gwsize, args): return acc_test -# save state of the training def save_state( epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best, strategy + res_name, is_best, strategy: TorchDistributedStrategy ): + """ + Save training state. + """ + grank = strategy.dist_grank() rt = time.time() # find if is_best happened in any worker if torch.cuda.is_available(): @@ -208,31 +229,41 @@ def save_state( f'{time.time()-rt} s') -# deterministic dataloader def seed_worker(worker_id): + """ + Seed dataloader worker. + """ worker_seed = torch.initial_seed() % 2**32 np.random.seed(worker_seed) random.seed(worker_seed) +def download_mnist(): + """ + Use built-in torch datasets functions to pull MNIST dataset. + """ + + _ = datasets.MNIST( + args.data_dir, train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + _ = datasets.MNIST( + args.data_dir, train=False, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + + if __name__ == "__main__": args = parse_args() if args.download_only: # Download datasets and exit - _ = datasets.MNIST( - args.data_dir, train=True, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - _ = datasets.MNIST( - args.data_dir, train=False, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) + download_mnist() sys.exit() # Instantiate Strategy @@ -245,10 +276,7 @@ def seed_worker(worker_id): elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy( - backend=args.backend, - config=dict(train_batch_size=args.batch_size) - ) + strategy = DSDistributedStrategy(backend=args.backend) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") @@ -276,20 +304,18 @@ def seed_worker(worker_id): if torch.cuda.is_available(): # local world size - per node lwsize = strategy.dist_lwsize() if args.cuda else 0 - gwsize = strategy.dist_gwsize() # global world size - per run - grank = strategy.dist_grank() # global rank - assign per run + gwsize = strategy.dist_gwsize() # global world size - per run + grank = strategy.dist_grank() # global rank - assign per run lrank = strategy.dist_lrank() # local rank - assign per node else: gwsize = 1 grank = 0 # some debug - if grank == 0: + if strategy.is_main_worker(): print('TIMER: initialise:', time.time()-st, 's') - # encapsulate the model on the GPU assigned to the current process - # device = torch.device( - # 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + # move the model on the GPU assigned to the current process device = torch.device( strategy.dist_device() if args.cuda and torch.cuda.is_available() else 'cpu') @@ -361,7 +387,7 @@ def seed_worker(worker_id): test_loader = DataLoader( test_dataset, batch_size=args.batch_size) - if grank == 0: + if strategy.is_main_worker(): print('TIMER: read and concat data:', time.time()-st, 's') # create CNN model @@ -371,14 +397,12 @@ def seed_worker(worker_id): optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum) + deepspeed_config = dict(train_batch_size=args.batch_size) + # 'config_params' key is ignored if strategy != DSDistributedStrategy distrib_model, optimizer, _ = strategy.distributed( - model, optimizer, lr_scheduler=None + model, optimizer, lr_scheduler=None, config_params=deepspeed_config ) - print(f" DEVICES: DS={distrib_model.device}, " - f"TORCH.DIST={strategy.dist_device()}, " - f"ENV={os.environ['LOCAL_RANK']}") - # resume state start_epoch = 1 best_acc = np.Inf @@ -399,13 +423,13 @@ def seed_worker(worker_id): distrib_model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if torch.cuda.is_available(): - if grank == 0: + if strategy.is_main_worker(): print(f'WARNING: restarting from {start_epoch} epoch') else: print(f'WARNING: restarting from {start_epoch} epoch') except Exception: if torch.cuda.is_available(): - if grank == 0: + if strategy.is_main_worker(): print('WARNING: restart file cannot be loaded, ' 'restarting!') else: @@ -413,7 +437,7 @@ def seed_worker(worker_id): if start_epoch > args.epochs: if torch.cuda.is_available(): - if grank == 0: + if strategy.is_main_worker(): print('WARNING: given epochs are less than the one in the ' 'restart file!\n' 'WARNING: SYS.EXIT is issued') @@ -427,7 +451,7 @@ def seed_worker(worker_id): sys.exit() # start trainin/testing loop - if grank == 0: + if strategy.is_main_worker(): print('TIMER: broadcast:', time.time()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') @@ -436,12 +460,23 @@ def seed_worker(worker_id): for epoch in range(start_epoch, args.epochs + 1): lt = time.time() # training - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) + loss_acc = train( + model=distrib_model, + device=device, + train_loader=train_loader, + optimizer=optimizer, + epoch=epoch, + strategy=strategy, + args=args + ) # testing - acc_test = test(distrib_model, device, - test_loader, grank, gwsize, args) + acc_test = test( + model=distrib_model, + device=device, + test_loader=test_loader, + strategy=strategy + ) # save first epoch timer if epoch == start_epoch: @@ -452,27 +487,39 @@ def seed_worker(worker_id): train_loader.last_epoch = True test_loader.last_epoch = True - if grank == 0: + if strategy.is_main_worker(): print('TIMER: epoch time:', time.time()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state is_best = loss_acc < best_acc if epoch % args.restart_int == 0: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best, strategy) + save_state( + epoch=epoch, + distrib_model=distrib_model, + loss_acc=loss_acc, + optimizer=optimizer, + res_name=res_name, + is_best=is_best, + strategy=strategy + ) # reset best_acc best_acc = min(loss_acc, best_acc) # finalise # save final state - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, True, strategy) - # if torch.cuda.is_available(): - # dist.barrier() + save_state( + epoch=epoch, + distrib_model=distrib_model, + loss_acc=loss_acc, + optimizer=optimizer, + res_name=res_name, + is_best=True, + strategy=strategy + ) # some debug - if grank == 0: + if strategy.is_main_worker(): print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -491,10 +538,9 @@ def seed_worker(worker_id): print('DEBUG: memory summary:\n\n', torch.cuda.memory_summary(0)) if args.cuda else '' - if grank == 0: + if strategy.is_main_worker(): print(f'TIMER: final time: {time.time()-st} s\n') + print(f" - TRAINING FINISHED") strategy.clean_up() - - print("TRAINING FINISHED") sys.exit() From 447c88ef3fd0885d9abb7e75218cd0e8f856c091 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 22 Mar 2024 15:51:21 +0100 Subject: [PATCH 067/171] RENAME folders with torch --- .../{tutorial-0-basics => torch-tutorial-0-basics}/README.md | 0 .../{tutorial-0-basics => torch-tutorial-0-basics}/ddp_slurm.sh | 0 .../deepspeed_slurm.sh | 0 .../{tutorial-0-basics => torch-tutorial-0-basics}/hvd_slurm.sh | 0 .../{tutorial-0-basics => torch-tutorial-0-basics}/runall.sh | 0 .../{tutorial-0-basics => torch-tutorial-0-basics}/train.py | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/README.md | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/config.yaml | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/ddp_slurm.sh | 0 .../deepspeed_slurm.sh | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/hvd_slurm.sh | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/runall.sh | 0 .../{tutorial-1-mnist => torch-tutorial-1-mnist}/train.py | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/README.md (100%) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/ddp_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/deepspeed_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/hvd_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/runall.sh (100%) rename tutorials/distributed-ml/{tutorial-0-basics => torch-tutorial-0-basics}/train.py (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/README.md (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/config.yaml (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/ddp_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/deepspeed_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/hvd_slurm.sh (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/runall.sh (100%) rename tutorials/distributed-ml/{tutorial-1-mnist => torch-tutorial-1-mnist}/train.py (100%) diff --git a/tutorials/distributed-ml/tutorial-0-basics/README.md b/tutorials/distributed-ml/torch-tutorial-0-basics/README.md similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/README.md rename to tutorials/distributed-ml/torch-tutorial-0-basics/README.md diff --git a/tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/ddp_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/deepspeed_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/hvd_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-0-basics/runall.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/runall.sh rename to tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh diff --git a/tutorials/distributed-ml/tutorial-0-basics/train.py b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py similarity index 100% rename from tutorials/distributed-ml/tutorial-0-basics/train.py rename to tutorials/distributed-ml/torch-tutorial-0-basics/train.py diff --git a/tutorials/distributed-ml/tutorial-1-mnist/README.md b/tutorials/distributed-ml/torch-tutorial-1-mnist/README.md similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/README.md rename to tutorials/distributed-ml/torch-tutorial-1-mnist/README.md diff --git a/tutorials/distributed-ml/tutorial-1-mnist/config.yaml b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/config.yaml rename to tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml diff --git a/tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/ddp_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/deepspeed_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/hvd_slurm.sh rename to tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh diff --git a/tutorials/distributed-ml/tutorial-1-mnist/runall.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/runall.sh rename to tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh diff --git a/tutorials/distributed-ml/tutorial-1-mnist/train.py b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py similarity index 100% rename from tutorials/distributed-ml/tutorial-1-mnist/train.py rename to tutorials/distributed-ml/torch-tutorial-1-mnist/train.py From a65944aac575b4562948a9cb7f4cfd6ebbee9db1 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 22 Mar 2024 16:48:49 +0100 Subject: [PATCH 068/171] DRAFT torch imagenet tutorial --- .../torch-tutorial-2-imagenet/README.md | 47 ++ .../torch-tutorial-2-imagenet/config.yaml | 24 + .../torch-tutorial-2-imagenet/ddp_slurm.sh | 66 +++ .../deepspeed_slurm.sh | 74 +++ .../torch-tutorial-2-imagenet/hvd_slurm.sh | 60 +++ .../torch-tutorial-2-imagenet/runall.sh | 6 + .../torch-tutorial-2-imagenet/train.py | 486 ++++++++++++++++++ 7 files changed, 763 insertions(+) create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md b/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md new file mode 100644 index 00000000..780eb278 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md @@ -0,0 +1,47 @@ +# Tutorial: distributed strategies for PyTorch model trained on MNIST dataset + +In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and +DeepSpeed from the same client code. +Note that the environment is tested on the HDFML system at JSC. For other systems, +the module versions might need change accordingly. + +## Setup + +First, from the root of this repository, build the environment containing +pytorch, horovod and deepspeed. You can *try* with: + +```bash +# Creates a Python venv called envAI_hdfml +make torch-gpu-jsc +``` + +The Imagenet dataset is assumed to be already downloaded to some location. + +## Distributed training + +Each distributed strategy has its own SLURM job script, which +should be used to run it: + +If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: + +```bash +sbatch ddp_slurm.sh +``` + +If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: + +```bash +sbatch deepspeed_slurm.sh +``` + +If you want to distribute the code in `train.py` with **Horovod**, run from terminal: + +```bash +sbatch hvd_slurm.sh +``` + +You can run all of them with: + +```bash +bash runall.sh +``` diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml new file mode 100644 index 00000000..fe01e6e7 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -0,0 +1,24 @@ +# I/O +data_dir: /p/largedata2/raise/ImageNet_uncompressed +restart_int: 10 +verbose: True + +# Model +batch_size: 64 +epochs: 2 +lr: 0.001 +momentum: 0.5 +shuff: False + +# Debugging +testrun: False +nseed: 10 +log_int: 10 + +# Distributed ML +backend: nccl +nworker: 4 # num workers dataloader +prefetch: 2 +no_cuda: False + + diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh new file mode 100644 index 00000000..3d5d4bb3 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DDP_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-ddp.out +#SBATCH --error=job-ddp.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="train.py -s ddp -c config.yaml" + +srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" + diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh new file mode 100644 index 00000000..8e5f7881 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-ds.out +#SBATCH --error=job-ds.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set env vars +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_ADDR +export MASTER_PORT=29500 + +TRAINING_CMD="train.py -s deepspeed -c config.yaml" + +# Run without launcher: set --ntasks-per-node=NUM_GPUS +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed + +# # Run with deepspeed launcher: set --ntasks-per-node=1 +# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables +# export NCCL_IB_DISABLE=1 +# export NCCL_SOCKET_IFNAME=eth0 +# nodelist=$(scontrol show hostname $SLURM_NODELIST) +# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile +# # Requires passwordless SSH access among compute node +# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed +# rm .hostfile + diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh new file mode 100644 index 00000000..3774b6e1 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_HVD_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-hvd.out +#SBATCH --error=job-hvd.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set vars +# export NCCL_DEBUG=INFO +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +TRAINING_CMD="train.py -s horovod -c config.yaml" + +srun --cpu-bind=none python -u $TRAINING_CMD + diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh new file mode 100644 index 00000000..b1470d75 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run all versions of distributed ML for MNIST +rm *checkpoint.pth.tar *.out *.err +echo "Torch DDP training: $(sbatch ddp_slurm.sh)" +echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" +echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py new file mode 100644 index 00000000..5f8b7e98 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -0,0 +1,486 @@ +""" +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably +with a large neural network trained on Imagenet dataset, showing how +to use checkpoints. +""" +import os +import argparse +import sys +import time +import numpy as np +import random + +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torchvision +from torchvision import transforms +from torch.utils.data import DataLoader, DistributedSampler + +import deepspeed + +from itwinai.torch.distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + HVDDistributedStrategy, + DSDistributedStrategy, +) +from itwinai.parser import ArgumentParser as ItAIArgumentParser + + +def parse_args() -> argparse.Namespace: + """ + Parse CLI args, which can also be loaded from a configuration file + using the --config flag: + + >>> train.py --strategy ddp --config config.yaml + """ + parser = ItAIArgumentParser(description='PyTorch MNIST Example') + + # Distributed ML strategy + parser.add_argument( + "--strategy", "-s", type=str, + choices=['ddp', 'horovod', 'deepspeed'], + default='ddp' + ) + + # IO parsers + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the local ' + 'filesystem')) + parser.add_argument('--restart-int', type=int, default=10, + help='restart interval per epoch (default: 10)') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + + # model parsers + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # debug parsers + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run with seed (default: False)') + parser.add_argument('--nseed', type=int, default=0, + help='seed integer for reproducibility (default: 0)') + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + + # parallel parsers + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader (default: 0 -' + ' only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + + # DeepSpeed + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train( + model, device, train_loader, optimizer, epoch, + strategy: TorchDistributedStrategy, args +): + """ + Training function, representing an epoch. + """ + model.train() + t_list = [] + loss_acc = 0 + gwsize = strategy.dist_gwsize() + if strategy.is_main_worker(): + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = time.perf_counter() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and strategy.is_main_worker(): + print( + f'Train epoch: {epoch} ' + f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' + f'Loss: {loss.item():.6f}') + t_list.append(time.perf_counter() - t) + loss_acc += loss.item() + if strategy.is_main_worker(): + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def test(model, device, test_loader, strategy: TorchDistributedStrategy): + """ + Model validation. + """ + model.eval() + test_loss = 0 + correct = 0 + gwsize = strategy.dist_gwsize() + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if strategy.is_main_worker(): + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test + + +def save_state( + epoch, distrib_model, loss_acc, optimizer, + res_name, is_best, strategy: TorchDistributedStrategy +): + """ + Save training state. + """ + grank = strategy.dist_grank() + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = strategy.par_allgather_obj(is_best) + + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m))[0][0] + + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + # write on worker with is_best + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} in ' + f'{time.time()-rt} s') + + +def seed_worker(worker_id): + """ + Seed dataloader worker. + """ + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +if __name__ == "__main__": + + args = parse_args() + + # Instantiate Strategy + if args.strategy == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPDistributedStrategy(backend=args.backend) + elif args.strategy == 'horovod': + strategy = HVDDistributedStrategy() + elif args.strategy == 'deepspeed': + strategy = DSDistributedStrategy(backend=args.backend) + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + strategy.init() + + # check CUDA availability + args.cuda = not args.no_cuda and torch.cuda.is_available() + + # limit # of CPU threads to be used per worker + torch.set_num_threads(1) + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + # deterministic testrun + if args.testrun: + torch.manual_seed(args.nseed) + g = torch.Generator() + g.manual_seed(args.nseed) + + # get job rank info - rank==0 master gpu + if torch.cuda.is_available(): + # local world size - per node + lwsize = strategy.dist_lwsize() if args.cuda else 0 + gwsize = strategy.dist_gwsize() # global world size - per run + grank = strategy.dist_grank() # global rank - assign per run + lrank = strategy.dist_lrank() # local rank - assign per node + else: + gwsize = 1 + grank = 0 + + # some debug + if strategy.is_main_worker(): + print('TIMER: initialise:', time.time()-st, 's') + + # move the model on the GPU assigned to the current process + device = torch.device( + strategy.dist_device() if args.cuda and torch.cuda.is_available() + else 'cpu') + if args.cuda: + torch.cuda.set_device(lrank) + # deterministic testrun + if args.testrun: + torch.cuda.manual_seed(args.nseed) + + # dataset + # Initialize transformations for data augmentation + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load the ImageNet Object Localization Challenge dataset + train_dataset = torchvision.datasets.ImageFolder( + root=args.data_dir, + transform=transform + ) + test_dataset = ... + + # restricts data loading to a subset of the dataset exclusive to the + # current process + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + test_sampler = DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker > 1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + ) + test_loader = DataLoader( + test_dataset, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + ) + else: + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size) + test_loader = DataLoader( + test_dataset, batch_size=args.batch_size) + + if strategy.is_main_worker(): + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = torchvision.models.resnet101(pretrained=False) + + # optimizer + optimizer = torch.optim.SGD( + model.parameters(), lr=args.lr, momentum=args.momentum) + + deepspeed_config = dict(train_batch_size=args.batch_size) + # 'config_params' key is ignored if strategy != DSDistributedStrategy + distrib_model, optimizer, _ = strategy.distributed( + model, optimizer, lr_scheduler=None, config_params=deepspeed_config + ) + + # resume state + start_epoch = 1 + best_acc = np.Inf + res_name = f'{args.strategy}-checkpoint.pth.tar' + if os.path.isfile(res_name): + try: + if torch.cuda.is_available(): + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) + else: + checkpoint = torch.load(program_dir+'/'+res_name) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if torch.cuda.is_available(): + if strategy.is_main_worker(): + print(f'WARNING: restarting from {start_epoch} epoch') + else: + print(f'WARNING: restarting from {start_epoch} epoch') + except Exception: + if torch.cuda.is_available(): + if strategy.is_main_worker(): + print('WARNING: restart file cannot be loaded, ' + 'restarting!') + else: + print('WARNING: restart file cannot be loaded, restarting!') + + if start_epoch > args.epochs: + if torch.cuda.is_available(): + if strategy.is_main_worker(): + print('WARNING: given epochs are less than the one in the ' + 'restart file!\n' + 'WARNING: SYS.EXIT is issued') + + strategy.clean_up() + sys.exit() + else: + print('WARNING: given epochs are less than the one in ' + 'the restart file!\n' + 'WARNING: SYS.EXIT is issued') + sys.exit() + + # start trainin/testing loop + if strategy.is_main_worker(): + print('TIMER: broadcast:', time.time()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + loss_acc = train( + model=distrib_model, + device=device, + train_loader=train_loader, + optimizer=optimizer, + epoch=epoch, + strategy=strategy, + args=args + ) + + # testing + acc_test = test( + model=distrib_model, + device=device, + test_loader=test_loader, + strategy=strategy + ) + + # save first epoch timer + if epoch == start_epoch: + first_ep_t = time.time()-lt + + # final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + test_loader.last_epoch = True + + if strategy.is_main_worker(): + print('TIMER: epoch time:', time.time()-lt, 's') + print('DEBUG: accuracy:', acc_test, '%') + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0: + save_state( + epoch=epoch, + distrib_model=distrib_model, + loss_acc=loss_acc, + optimizer=optimizer, + res_name=res_name, + is_best=is_best, + strategy=strategy + ) + # reset best_acc + best_acc = min(loss_acc, best_acc) + + # finalise + # save final state + save_state( + epoch=epoch, + distrib_model=distrib_model, + loss_acc=loss_acc, + optimizer=optimizer, + res_name=res_name, + is_best=True, + strategy=strategy + ) + + # some debug + if strategy.is_main_worker(): + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', time.time()-lt, ' s') + print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', time.time()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + print('DEBUG: last accuracy:', acc_test, '%') + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) if args.cuda else '' + + if strategy.is_main_worker(): + print(f'TIMER: final time: {time.time()-st} s\n') + + print(f" - TRAINING FINISHED") + strategy.clean_up() + sys.exit() From c26dae40d4869e1e03c85a3346ac1bacf924f0ab Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 22 Mar 2024 17:12:22 +0100 Subject: [PATCH 069/171] UPDATE configuration --- .../torch-tutorial-2-imagenet/config.yaml | 5 +- .../torch-tutorial-2-imagenet/train.py | 48 +++++++++++-------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml index fe01e6e7..0091cc64 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -1,14 +1,15 @@ # I/O -data_dir: /p/largedata2/raise/ImageNet_uncompressed +data_dir: tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True # Model batch_size: 64 -epochs: 2 +epochs: 2 # TODO: increase to 3 lr: 0.001 momentum: 0.5 shuff: False +num_classes: 2 # TODO: reset # Debugging testrun: False diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py index 5f8b7e98..798b1c3d 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -11,6 +11,7 @@ import random import torch +from torch import nn import torch.distributed as dist import torch.nn.functional as F import torchvision @@ -65,6 +66,8 @@ def parse_args() -> argparse.Namespace: help='momentum in SGD optimizer (default: 0.5)') parser.add_argument('--shuff', action='store_true', default=False, help='shuffle dataset (default: False)') + parser.add_argument('--num-classes', type=int, default=1000, + help='number of classes in dataset') # debug parsers parser.add_argument('--testrun', action='store_true', default=False, @@ -292,7 +295,7 @@ def seed_worker(worker_id): root=args.data_dir, transform=transform ) - test_dataset = ... + # test_dataset = ... # restricts data loading to a subset of the dataset exclusive to the # current process @@ -300,8 +303,9 @@ def seed_worker(worker_id): if torch.cuda.is_available(): train_sampler = DistributedSampler( train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - test_sampler = DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # test_sampler = DistributedSampler( + # test_dataset, num_replicas=gwsize, rank=grank, + # shuffle=args.shuff) # distribute dataset to workers # persistent workers is not possible for nworker=0 pers_w = True if args.nworker > 1 else False @@ -316,22 +320,24 @@ def seed_worker(worker_id): sampler=train_sampler, num_workers=args.nworker, pin_memory=True, persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs ) - test_loader = DataLoader( - test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs - ) + # test_loader = DataLoader( + # test_dataset, batch_size=args.batch_size, + # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + # persistent_workers=pers_w, prefetch_factor=args.prefetch, + # **kwargs + # ) else: train_loader = DataLoader( train_dataset, batch_size=args.batch_size) - test_loader = DataLoader( - test_dataset, batch_size=args.batch_size) + # test_loader = DataLoader( + # test_dataset, batch_size=args.batch_size) if strategy.is_main_worker(): print('TIMER: read and concat data:', time.time()-st, 's') # create CNN model - model = torchvision.models.resnet101(pretrained=False) + model = torchvision.models.resnet101() + model.fc = nn.Linear(2048, args.num_classes) # optimizer optimizer = torch.optim.SGD( @@ -410,13 +416,13 @@ def seed_worker(worker_id): args=args ) - # testing - acc_test = test( - model=distrib_model, - device=device, - test_loader=test_loader, - strategy=strategy - ) + # # testing + # acc_test = test( + # model=distrib_model, + # device=device, + # test_loader=test_loader, + # strategy=strategy + # ) # save first epoch timer if epoch == start_epoch: @@ -425,11 +431,11 @@ def seed_worker(worker_id): # final epoch if epoch + 1 == args.epochs: train_loader.last_epoch = True - test_loader.last_epoch = True + # test_loader.last_epoch = True if strategy.is_main_worker(): print('TIMER: epoch time:', time.time()-lt, 's') - print('DEBUG: accuracy:', acc_test, '%') + # print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state is_best = loss_acc < best_acc @@ -471,7 +477,7 @@ def seed_worker(worker_id): time.time()-et-first_ep_t, ' s') print('TIMER: average epoch-1 time:', (time.time()-et-first_ep_t)/(args.epochs-1), ' s') - print('DEBUG: last accuracy:', acc_test, '%') + # print('DEBUG: last accuracy:', acc_test, '%') print('DEBUG: memory req:', int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ if args.cuda else 'DEBUG: memory req: - MB' From 8f33bf8db466e15049cb71ed0f7844de665ff326 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 22 Mar 2024 17:40:16 +0100 Subject: [PATCH 070/171] UPDATE imagenet tutorial --- .../distributed-ml/torch-tutorial-2-imagenet/config.yaml | 6 +++--- tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml index 0091cc64..f101a083 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -1,15 +1,15 @@ # I/O -data_dir: tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True # Model batch_size: 64 -epochs: 2 # TODO: increase to 3 +epochs: 3 lr: 0.001 momentum: 0.5 shuff: False -num_classes: 2 # TODO: reset +num_classes: 1000 # Debugging testrun: False diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py index 798b1c3d..d78dc1b3 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -335,8 +335,8 @@ def seed_worker(worker_id): if strategy.is_main_worker(): print('TIMER: read and concat data:', time.time()-st, 's') - # create CNN model - model = torchvision.models.resnet101() + # create CNN model: resnet 50, resnet101, resnet152 + model = torchvision.models.resnet152() model.fc = nn.Linear(2048, args.num_classes) # optimizer From 89e8097fd9d048628f8f77be49a7d979843cfdd9 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 22 Mar 2024 20:08:49 +0100 Subject: [PATCH 071/171] DRAFT scaling test --- .../torch-scaling-test/README.md | 4 + .../torch-scaling-test/ddp/DDP_trainer.py | 468 ++++++++++++++++++ .../torch-scaling-test/ddp/config.yaml | 23 + .../torch-scaling-test/ddp/ddp_slurm.sh | 66 +++ .../torch-scaling-test/ddp/scaling-test.sh | 9 + .../deepspeed/DS_trainer.py | 346 +++++++++++++ .../torch-scaling-test/deepspeed/config.yaml | 17 + .../deepspeed/deepspeed_slurm.sh | 74 +++ .../deepspeed/scaling-test.sh | 9 + .../torch-scaling-test/horovod/config.yaml | 20 + .../horovod/horovod_trainer.py | 250 ++++++++++ .../torch-scaling-test/horovod/hvd_slurm.sh | 60 +++ .../horovod/scaling-test.sh | 9 + .../torch-tutorial-2-imagenet/runall.sh | 2 +- .../torch-tutorial-2-imagenet/scaling-test.sh | 11 + 15 files changed, 1367 insertions(+), 1 deletion(-) create mode 100644 tutorials/distributed-ml/torch-scaling-test/README.md create mode 100755 tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml create mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml create mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml create mode 100755 tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md new file mode 100644 index 00000000..dcc5233a --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -0,0 +1,4 @@ +# Scaling tests for PyTorch + +Examples of scaling tests which can be used as baselines for `itwinai` distributed. +Work in progress. \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py new file mode 100755 index 00000000..b7235078 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py @@ -0,0 +1,468 @@ +""" +Scaling test of torch Distributed Data Parallel on Imagenet using Resnet. +""" +import argparse +import sys +import os +import time +import random +import numpy as np + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +import torchvision +from torchvision import datasets, transforms + +from itwinai.parser import ArgumentParser as ItAIArgumentParser + + +def pars_ini(): + parser = ItAIArgumentParser(description='PyTorch Imagenet scaling test') + + # IO parsers + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--restart-int', type=int, default=10, + help='restart interval per epoch (default: 10)') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + + # model parsers + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # debug parsers + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run with seed (default: False)') + parser.add_argument('--nseed', type=int, default=0, + help='seed integer for reproducibility (default: 0)') + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + parser.add_argument('--benchrun', + action=argparse.BooleanOptionalAction) + + # parallel parsers + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + + args = parser.parse_args() + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + return args + + +def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): + model.train() + t_list = [] + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = time.perf_counter() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/' + f'{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' + f'{loss.item():.6f}') + t_list.append(time.perf_counter() - t) + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def test(model, device, test_loader, grank, gwsize): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if grank == 0: + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test + + +def save_state( + epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, is_best +): + """Save training state.""" + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = par_allgather_obj(is_best, gwsize) + + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m))[0][0] + + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + # write on worker with is_best + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def par_allgather_obj(obj, gwsize): + """Gathers any object from the whole group in a list (to all workers)""" + res = [None]*gwsize + dist.all_gather_object(res, obj, group=None) + return res + + +def main(): + # get parse args + args = pars_ini() + + # check CUDA availibility + args.cuda = not args.no_cuda and torch.cuda.is_available() + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + # initializes the distributed backend which will take care of synchronizing + # nodes/GPUs + if torch.cuda.is_available(): + dist.init_process_group(backend=args.backend) + + # deterministic testrun + if args.testrun: + torch.manual_seed(args.nseed) + g = torch.Generator() + g.manual_seed(args.nseed) + + # get job rank info - rank==0 master gpu + if torch.cuda.is_available(): + # local world size - per node + lwsize = torch.cuda.device_count() if args.cuda else 0 + gwsize = dist.get_world_size() # global world size - per run + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node + else: + gwsize = 1 + grank = 0 + + # some debug + if grank == 0: + print('TIMER: initialise:', time.time()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version, '\n') + + print('DEBUG: IO parsers:') + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.restart_int:', args.restart_int, '\n') + + print('DEBUG: model parsers:') + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff, '\n') + + print('DEBUG: debug parsers:') + print('DEBUG: args.testrun:', args.testrun) + print('DEBUG: args.nseed:', args.nseed) + print('DEBUG: args.log_int:', args.log_int, '\n') + + print('DEBUG: parallel parsers:') + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.cuda:', args.cuda) + print('DEBUG: args.benchrun:', args.benchrun, '\n') + + # encapsulate the model on the GPU assigned to the current process + device = torch.device( + 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + if args.cuda: + torch.cuda.set_device(lrank) + # deterministic testrun + if args.testrun: + torch.cuda.manual_seed(args.nseed) + + # dataset + # Initialize transformations for data augmentation + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load the ImageNet Object Localization Challenge dataset + train_dataset = datasets.ImageFolder( + root=args.data_dir, + transform=transform + ) + # test_dataset = ... + + # restricts data loading to a subset of the dataset exclusive to the + # current process + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # test_sampler = torch.utils.data.distributed.DistributedSampler( + # test_dataset, num_replicas=gwsize, rank=grank, + # shuffle=args.shuff) + + # distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker > 1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size, + # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + # persistent_workers=pers_w, prefetch_factor=args.prefetch, + # **kwargs) + else: + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size) + + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = torchvision.models.resnet152().to(device) + + # distribute model to workers + if torch.cuda.is_available(): + distrib_model = nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device) + else: + distrib_model = model + + # optimizer + # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) + optimizer = torch.optim.SGD( + distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + + # resume state + start_epoch = 1 + best_acc = np.Inf + res_name = 'ddp-checkpoint.pth.tar' + if os.path.isfile(res_name) and not args.benchrun: + try: + if torch.cuda.is_available(): + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) + else: + checkpoint = torch.load(program_dir+'/'+res_name) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if torch.cuda.is_available(): + if grank == 0: + print(f'WARNING: restarting from {start_epoch} epoch') + else: + print(f'WARNING: restarting from {start_epoch} epoch') + except Exception: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: restart file cannot ' + 'be loaded, restarting!') + else: + print('WARNING: restart file cannot be loaded, restarting!') + + if start_epoch >= args.epochs: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: given epochs are less than the one in the' + ' restart file!\n' + 'WARNING: SYS.EXIT is issued') + dist.barrier() + dist.destroy_process_group() + sys.exit() + else: + print('WARNING: given epochs are less than the one in the ' + 'restart file!\n' + 'WARNING: SYS.EXIT is issued') + sys.exit() + + # start trainin/testing loop + if grank == 0: + print('TIMER: broadcast:', time.time()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + if args.benchrun and epoch == args.epochs: + # profiling (done on last epoch - slower!) + with torch.autograd.profiler.profile(use_cuda=args.cuda, + profile_memory=True) as prof: + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + else: + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + + # # testing + # acc_test = test(distrib_model, device, + # test_loader, grank, gwsize, args) + + # save first epoch timer + if epoch == start_epoch: + first_ep_t = time.time()-lt + + # final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + # test_loader.last_epoch = True + + if grank == 0: + print('TIMER: epoch time:', time.time()-lt, 's') + # print('DEBUG: accuracy:', acc_test, '%') + if args.benchrun and epoch == args.epochs: + print('\n----------------------------------------------------') + print('DEBUG: benchmark of last epoch:\n') + what1 = 'cuda' if args.cuda else 'cpu' + print(prof.key_averages().table( + sort_by='self_'+str(what1)+'_time_total')) + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0 and not args.benchrun: + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best) + # reset best_acc + best_acc = min(loss_acc, best_acc) + + # finalise + # save final state + if not args.benchrun: + save_state(epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, True) + if torch.cuda.is_available(): + dist.barrier() + + # some debug + if grank == 0: + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', time.time()-lt, ' s') + print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', time.time()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + time.time()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + if args.benchrun: + print('TIMER: total epoch-2 time:', lt-first_ep_t, ' s') + print('TIMER: average epoch-2 time:', + (lt-first_ep_t)/(args.epochs-2), ' s') + # print('DEBUG: last accuracy:', acc_test, '%') + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ + if args.cuda else 'DEBUG: memory req: - MB' + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) if args.cuda else '' + + if grank == 0: + print(f'TIMER: final time: {time.time()-st} s\n') + + print(f" - TRAINING FINISHED") + + # clean-up + if torch.cuda.is_available(): + dist.barrier() + dist.destroy_process_group() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml new file mode 100644 index 00000000..219ae32e --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml @@ -0,0 +1,23 @@ +# I/O +data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +restart_int: 10 +verbose: True + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 +momentum: 0.5 +shuff: False + +# Debugging +benchrun: False +testrun: False +nseed: 10 +log_int: 10 + +# Distributed ML +backend: nccl +nworker: 4 # num workers dataloader +prefetch: 2 +no_cuda: False diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh new file mode 100644 index 00000000..b342ceb3 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DDP_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-ddp.out +#SBATCH --error=job-ddp.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="DDP_trainer.py -c config.yaml" + +srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" + diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh new file mode 100644 index 00000000..12476f41 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +rm *checkpoint.pth.tar *.out *.err + +timeout="00:01:00" +for N in 1 2 4 8 +do + sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh +done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py new file mode 100644 index 00000000..e4f89eb0 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py @@ -0,0 +1,346 @@ +""" +Scaling test of Microsoft Deepspeed on Imagenet using Resnet. +""" +import argparse +import sys +import os +import time +import random +import numpy as np +import deepspeed + +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torchvision +from torchvision import datasets, transforms + +from itwinai.parser import ArgumentParser as ItAIArgumentParser + + +def parsIni(): + parser = ItAIArgumentParser( + description='PyTorch Imagenet scaling test') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') + parser.add_argument('--log-int', type=int, default=100, metavar='N', + help=( + 'how many batches to wait before logging ' + 'training status')) + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the local ' + 'filesystem')) + parser.add_argument('--backend', type=str, default='nccl', metavar='N', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--restart-int', type=int, default=10, metavar='N', + help='restart int per epoch (default: 10)') + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run (default: False)') + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + # parse to deepspeed + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train(args, model, train_loader, optimizer, epoch, grank, gwsize): + device = model.local_rank + t_list = [] + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = time.perf_counter() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/' + f'{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx *len(data) / len(train_loader):.0f}%)]' + '\t\tLoss: {loss.item():.6f}') + t_list.append(time.perf_counter() - t) + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def test(model, test_loader, grank, gwsize): + device = model.local_rank + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if grank == 0: + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test + + +def save_state( + epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, is_best +): + """Save training state.""" + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = par_allgather_obj(is_best, gwsize) + + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m))[0][0] + + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + # write on worker with is_best + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def par_allgather_obj(obj, gwsize): + """Gathers any object from the whole group in a list (to all workers)""" + res = [None]*gwsize + dist.all_gather_object(res, obj, group=None) + return res + + +def main(): + # get parse args + args = parsIni() + + # limit # of CPU threads to be used per worker + torch.set_num_threads(1) + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + # initializes the distributed backend + deepspeed.init_distributed(dist_backend=args.backend) + + # get job rank info - rank==0 master gpu + gwsize = dist.get_world_size() # global world size - per run + lwsize = torch.cuda.device_count() # local world size - per node + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node + + # some debug + if grank == 0: + print('TIMER: initialise:', time.time()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.restart_int:', args.restart_int) + print('DEBUG: args.testrun:', args.testrun, '\n') + + # encapsulate the model on the GPU assigned to the current process + torch.cuda.set_device(lrank) + + # read training dataset + # Initialize transformations for data augmentation + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load the ImageNet Object Localization Challenge dataset + train_dataset = datasets.ImageFolder( + root=args.data_dir, + transform=transform + ) + # test_dataset = ... + + # # distribute test dataset + # test_sampler = torch.utils.data.distributed.DistributedSampler( + # test_dataset, num_replicas=gwsize, rank=grank) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size, + # sampler=test_sampler, num_workers=0, pin_memory=True, shuffle=False) + + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = torchvision.models.resnet152() + + # Initialize DeepSpeed to use the following features + # 1) Distributed model + # 2) DeepSpeed optimizer + # 3) Distributed data loader + deepspeed_config = { + "train_batch_size": args.batch_size, + "optimizer": { + "type": "SGD", + "params": { + "lr": args.lr, + "momentum": 0.5 + } + }, + "fp16": { + "enabled": False + }, + "zero_optimization": False + } + distrib_model, optimizer, train_loader, _ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), + training_data=train_dataset, config_params=deepspeed_config) + + # optimizer + # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) + # optimizer = torch.optim.SGD( + # distrib_model.parameters(), lr=args.lr, momentum=0.5) + + # resume state + start_epoch = 1 + best_acc = np.Inf + res_name = 'ds-checkpoint.pth.tar' + if os.path.isfile(res_name): + try: + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} + checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if grank == 0: + print(f'WARNING: restarting from {start_epoch} epoch') + except Exception: + if grank == 0: + print('WARNING: restart file cannot be loaded, restarting!') + + if start_epoch >= args.epochs+1: + if grank == 0: + print('WARNING: given epochs are less than the ' + 'one in the restart file!\n' + 'WARNING: SYS.EXIT is issued') + deepspeed.sys.exit() + sys.exit() + + # start trainin/testing loop + if grank == 0: + print('TIMER: broadcast:', time.time()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + loss_acc = train(args, distrib_model, train_loader, + optimizer, epoch, grank, gwsize) + + # testing + # acc_test = test(distrib_model, test_loader, grank, gwsize) + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0: + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best) + # reset best_acc + best_acc = min(loss_acc, best_acc) + + if grank == 0: + print('TIMER: epoch time:', time.time()-lt, 's') + # print('DEBUG: accuracy:', acc_test, '%') + + # finalise + # save final state + save_state(epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, True) + dist.barrier() + + # some debug + if grank == 0: + print('\n--------------------------------------------------------') + print('DEBUG: results:\n') + print('TIMER: last epoch time:', time.time()-lt, 's') + print('TIMER: total epoch time:', time.time()-et, 's') + # print('DEBUG: last accuracy:', acc_test, '%') + print('DEBUG: memory req:', int( + torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + + if grank == 0: + print(f'TIMER: final time: {time.time()-st} s\n') + + print(f" - TRAINING FINISHED") + + # clean-up + deepspeed.sys.exit() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml new file mode 100644 index 00000000..5711941c --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml @@ -0,0 +1,17 @@ +# I/O +data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +restart_int: 10 +verbose: True + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 + +# Debugging +testrun: False +log_int: 10 + +# Distributed ML +backend: nccl +nworker: 4 # num workers dataloader diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh new file mode 100644 index 00000000..9ba733f0 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-ds.out +#SBATCH --error=job-ds.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set env vars +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i +export MASTER_ADDR +export MASTER_PORT=29500 + +TRAINING_CMD="DS_trainer.py -c config.yaml" + +# Run without launcher: set --ntasks-per-node=NUM_GPUS +srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed + +# # Run with deepspeed launcher: set --ntasks-per-node=1 +# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables +# export NCCL_IB_DISABLE=1 +# export NCCL_SOCKET_IFNAME=eth0 +# nodelist=$(scontrol show hostname $SLURM_NODELIST) +# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile +# # Requires passwordless SSH access among compute node +# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed +# rm .hostfile + diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh new file mode 100644 index 00000000..9fff1316 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +rm *checkpoint.pth.tar *.out *.err + +timeout="00:01:00" +for N in 1 2 4 8 +do + sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh +done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml new file mode 100644 index 00000000..28b53b97 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml @@ -0,0 +1,20 @@ +# I/O +data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +verbose: True +nworker: 4 # num workers dataloader + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 +momentum: 0.5 +use_adasum: False + +# Debugging +seed: 10 +log_interval: 10 + +# Distributed ML +no_cuda: False +fp16_allreduce: False +gradient_predivide_factor: 1.0 \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py new file mode 100755 index 00000000..9ccfecbc --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py @@ -0,0 +1,250 @@ +""" +Scaling test of Horovod on Imagenet using Resnet. +""" +import argparse +import sys +from timeit import default_timer as timer + +import torch.multiprocessing as mp +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data.distributed +import horovod.torch as hvd +import torchvision +from torchvision import datasets, transforms + +from itwinai.parser import ArgumentParser as ItAIArgumentParser + + +def parsIni(): + parser = ItAIArgumentParser(description='PyTorch Imagenet Example') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, metavar='M', + help='SGD momentum (default: 0.5)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--seed', type=int, default=42, metavar='S', + help='random seed (default: 42)') + parser.add_argument('--log-interval', type=int, default=100, metavar='N', + help='#batches to wait before logging training status') + parser.add_argument('--fp16-allreduce', action='store_true', default=False, + help='use fp16 compression during allreduce') + parser.add_argument('--use-adasum', action='store_true', default=False, + help='use adasum algorithm to do reduction') + parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, + help=('apply gradient predivide factor in optimizer ' + '(default: 1.0)')) + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + + args = parser.parse_args() + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train(epoch): + model.train() + # Horovod: set epoch to sampler for shuffling + train_sampler.set_epoch(epoch) + print('Training:') + for batch_idx, (data, target) in enumerate(train_loader): + if args.cuda: + data, target = data.cuda(), target.cuda() + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + # Horovod: use train_sampler to determine the number of examples in + # this worker's partition + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_sampler), + 100. * batch_idx / len(train_loader), loss.item())) + + +def metric_average(val, namegiv): + tensor = torch.tensor(val) + avg_tensor = hvd.allreduce(tensor, name=namegiv) + return avg_tensor.item() + + +# def test(): +# model.eval() +# test_loss = 0. +# test_accuracy = 0. +# for data, target in test_loader: +# if args.cuda: +# data, target = data.cuda(), target.cuda() +# output = model(data) +# # sum up batch loss +# test_loss += F.nll_loss(output, target, size_average=False).item() +# # get the index of the max log-probability +# pred = output.data.max(1, keepdim=True)[1] +# test_accuracy += \ +# pred.eq(target.data.view_as(pred)).cpu().float().sum() + +# # Horovod: use test_sampler to determine the number of examples in +# # this worker's partition +# test_loss /= len(test_sampler) +# test_accuracy /= len(test_sampler) + +# # Horovod: average metric values across workers +# test_loss = metric_average(test_loss, 'avg_loss') +# test_accuracy = metric_average(test_accuracy, 'avg_accuracy') + +# # Horovod: print output only on first rank +# if hvd.rank() == 0: +# print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format( +# test_loss, 100. * test_accuracy)) + + +if __name__ == '__main__': + # get parse args + args = parsIni() + args.cuda = not args.no_cuda and torch.cuda.is_available() + + # Horovod: init + st = timer() + hvd.init() + torch.manual_seed(args.seed) + + # some debug + if hvd.rank() == 0 and hvd.local_rank() == 0: + print('DEBUG: sys.version:', sys.version) + print('DEBUG: torch.cuda.is_available():', torch.cuda.is_available()) + print('DEBUG: torch.cuda.current_device():', + torch.cuda.current_device()) + print('DEBUG: torch.cuda.device_count():', torch.cuda.device_count()) + print('DEBUG: torch.cuda.get_device_properties(hvd.local_rank()):', + torch.cuda.get_device_properties(hvd.local_rank())) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + + if hvd.rank() == 0 and hvd.local_rank() == 0: + print('TIMER: initialise:', timer()-st, 's') + + if args.cuda: + # Horovod: pin GPU to local rank + torch.cuda.set_device(hvd.local_rank()) + torch.cuda.manual_seed(args.seed) + + # Horovod: limit # of CPU threads to be used per worker + torch.set_num_threads(1) + + # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} + kwargs = {'num_workers': args.nworker, + 'pin_memory': True} if args.cuda else {} + # When supported, use 'forkserver' to spawn dataloader workers instead... + # issues with Infiniband implementations that are not fork-safe + if (kwargs.get('num_workers', 0) > 0 and hasattr(mp, '_supports_context') + and + mp._supports_context and + 'forkserver' in mp.get_all_start_methods()): + kwargs['multiprocessing_context'] = 'forkserver' + + # Initialize transformations for data augmentation + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load the ImageNet Object Localization Challenge dataset + train_dataset = datasets.ImageFolder( + root=args.data_dir, + transform=transform + ) + # test_dataset = ... + + # Horovod: use DistributedSampler to partition the training data + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, **kwargs) + + # create CNN model + model = torchvision.models.resnet152() + + # by default, Adasum doesn't need scaling up learning rate + lr_scaler = hvd.size() if not args.use_adasum else 1 + + if args.cuda: + # move model to GPU. + model.cuda() + # if using GPU Adasum allreduce, scale learning rate by local_size + if args.use_adasum and hvd.nccl_built(): + lr_scaler = hvd.local_size() + + # Horovod: scale learning rate by lr_scaler + optimizer = optim.SGD(model.parameters(), lr=args.lr * lr_scaler, + momentum=args.momentum) + + # Horovod: broadcast parameters & optimizer state + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=0) + + # Horovod: (optional) compression algorithm + compression = ( + hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none + ) + + # Horovod: wrap optimizer with DistributedOptimizer + optimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + compression=compression, + op=hvd.Adasum if args.use_adasum else hvd.Average, + gradient_predivide_factor=args.gradient_predivide_factor) + + if hvd.rank() == 0 and hvd.local_rank() == 0: + print('TIMER: broadcast:', timer()-st, 's') + + et = timer() + for epoch in range(1, args.epochs + 1): + lt = timer() + train(epoch) + # test() + print('TIMER: hvd.rank():', hvd.rank(), + 'hvd.local_rank():', hvd.local_rank(), + ', epoch time:', timer()-lt, 's') + print('TIMER: last epoch time:', timer()-lt, 's') + print('TIMER: total epoch time:', timer()-et, 's') + + if hvd.rank() == 0 and hvd.local_rank() == 0: + print('\n', torch.cuda.memory_summary(0), '\n') + + print('DEBUG: hvd.rank():', hvd.rank(), + 'hvd.local_rank():', hvd.local_rank(), + ', torch.cuda.memory_reserved():', + int(torch.cuda.memory_reserved(hvd.local_rank())/1024/1024), 'MB') + + if hvd.rank() == 0 and hvd.local_rank() == 0: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(hvd.local_rank())/1024/1024), + 'MB') + + print(f" - TRAINING FINISHED") diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh new file mode 100644 index 00000000..206bc5af --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=Torch_HVD_tutorial-1 +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job-hvd.out +#SBATCH --error=job-hvd.err +#SBATCH --time=00:30:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=8 +#SBATCH --gpus-per-node=4 +# SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../../../../envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set vars +# export NCCL_DEBUG=INFO +export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# launch training +TRAINING_CMD="horovod_trainer.py -c config.yaml" + +srun --cpu-bind=none python -u $TRAINING_CMD + diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh new file mode 100644 index 00000000..8ed39581 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +rm *checkpoint.pth.tar *.out *.err + +timeout="00:01:00" +for N in 1 2 4 8 +do + sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout hvd_slurm.sh +done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh index b1470d75..01b2e9e6 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Run all versions of distributed ML for MNIST +# Run all versions of distributed ML version rm *checkpoint.pth.tar *.out *.err echo "Torch DDP training: $(sbatch ddp_slurm.sh)" echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh new file mode 100644 index 00000000..beeb0576 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +rm *checkpoint.pth.tar *.out *.err + +timeout="00:01:00" +for N in 1 2 4 8 +do + sbatch --job-name="DDP-imagenet-n$N" --nodes=$N --output="job-ddp-n$N.out" --error="job-ddp-n$N.err" --time=$timeout ddp_slurm.sh + sbatch --job-name="DS-imagenet-n$N" --nodes=$N --output="job-ds-n$N.out" --error="job-ds-n$N.err" --time=$timeout deepspeed_slurm.sh + sbatch --job-name="HVD-imagenet-n$N" --nodes=$N --output="job-hvd-n$N.out" --error="job-hvd-n$N.err" --time=$timeout hvd_slurm.sh +done \ No newline at end of file From aa1089ac2bccd58ff4d19a8302d71152d1601dae Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 23 Mar 2024 14:35:29 +0100 Subject: [PATCH 072/171] ADD scaling analysis report --- src/itwinai/cli.py | 115 ++++++++++++++++++ src/itwinai/loggers.py | 17 +++ .../torch-scaling-test/ddp/DDP_trainer.py | 6 + .../torch-scaling-test/ddp/ddp_slurm.sh | 2 +- .../torch-scaling-test/ddp/scaling-test.sh | 4 +- .../deepspeed/DS_trainer.py | 6 + .../deepspeed/deepspeed_slurm.sh | 2 +- .../deepspeed/scaling-test.sh | 4 +- .../horovod/horovod_trainer.py | 11 +- .../torch-scaling-test/horovod/hvd_slurm.sh | 2 +- .../horovod/scaling-test.sh | 4 +- .../torch-scaling-test/runall.sh | 14 +++ .../torch-tutorial-2-imagenet/config.yaml | 2 +- .../torch-tutorial-2-imagenet/ddp_slurm.sh | 2 +- .../deepspeed_slurm.sh | 2 +- .../torch-tutorial-2-imagenet/hvd_slurm.sh | 2 +- .../torch-tutorial-2-imagenet/runall.sh | 2 +- .../torch-tutorial-2-imagenet/scaling-test.sh | 4 +- .../torch-tutorial-2-imagenet/train.py | 6 + 19 files changed, 190 insertions(+), 17 deletions(-) create mode 100644 tutorials/distributed-ml/torch-scaling-test/runall.sh diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 20977961..d15a26ce 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -19,6 +19,121 @@ app = typer.Typer() +@app.command() +def scalability_report( + pattern: Annotated[str, typer.Option( + help="Python pattern matching names of CSVs in sub-folders." + )], + plot_title: Annotated[Optional[str], typer.Option( + help=("Plot name.") + )] = None, + skip_id: Annotated[Optional[int], typer.Option( + help=("Skip epoch ID.") + )] = None, + archive: Annotated[Optional[str], typer.Option( + help=("Archive path where to backup the data, WITHOUT EXTENSION.") + )] = None, +): + """ + Generate scalability report merging all CSVs containing epoch time + records in sub-folders. + + Example: + >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \ + >>> --plot-title "Some title" --archive folder/archive_name + """ + # TODO: add max depth and path different from CWD + import os + import re + import shutil + import pandas as pd + import matplotlib.pyplot as plt + import numpy as np + + regex = re.compile(r'{}'.format(pattern)) + combined_df = pd.DataFrame() + csv_files = [] + for root, _, files in os.walk(os.getcwd()): + for file in files: + if regex.match(file): + fpath = os.path.join(root, file) + csv_files.append(fpath) + df = pd.read_csv(fpath) + if skip_id is not None: + df = df.drop(df[df.epoch_id == skip_id].index) + combined_df = pd.concat([combined_df, df]) + print("Merged CSV:") + print(combined_df) + + avg_times = ( + combined_df + .drop(columns='epoch_id') + .groupby(['name', 'nodes']) + .mean() + .reset_index() + ) + print("\nAvg over name and nodes:") + print(avg_times.rename(columns=dict(time='avg(time)'))) + + # fig, (sp_up_ax, eff_ax) = plt.subplots(1, 2, figsize=(12, 4)) + fig, sp_up_ax = plt.subplots(1, 1, figsize=(6, 4)) + if plot_title is not None: + fig.suptitle(plot_title) + + for name in set(avg_times.name.values): + df = avg_times[avg_times.name == name].drop(columns='name') + + # Debug + # compute_time = [3791., 1884., 1011., 598.] + # nodes = [1, 2, 4, 8] + # d = {'nodes': nodes, 'time': compute_time} + # df = pd.DataFrame(data=d) + + df["NGPUs"] = df["nodes"]*4 + # speedup + df["Speedup - ideal"] = df["nodes"].astype(float) + df["Speedup"] = df["time"].iloc[0] / df["time"] + df["Nworkers"] = 1 + + # efficiency + df["Threadscaled Sim. Time / s"] = df["time"] * \ + df["nodes"] * df["Nworkers"] + df["Efficiency"] = df["Threadscaled Sim. Time / s"].iloc[0] / \ + df["Threadscaled Sim. Time / s"] + + # Plot + sp_up_ax.plot( + df["NGPUs"].values, df["Speedup"].values, + marker='*', lw=1.0, label=name) + sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, + ls='dashed', lw=1.0, c='k', label="ideal") + sp_up_ax.legend(ncol=1) + sp_up_ax.set_xticks(df["NGPUs"].values) + sp_up_ax.set_yticks(df["Speedup - ideal"].values) + sp_up_ax.set_ylabel('Speedup') + sp_up_ax.set_xlim((0, np.amax(df["NGPUs"].values+1))) + sp_up_ax.set_ylim((0, np.amax(df["Speedup - ideal"].values+1))) + sp_up_ax.grid() + plot_png = f"scaling_plot_{plot_title}.png" + plt.savefig(plot_png) + + if archive is not None: + tmp_d = archive + os.makedirs(tmp_d) + for csvfile in csv_files: + shutil.copyfile(csvfile, os.path.join(tmp_d, + os.path.basename(csvfile))) + shutil.copyfile(plot_png, os.path.join(tmp_d, plot_png)) + avg_times.to_csv(os.path.join(tmp_d, "avg_times.csv"), index=False) + archive_name = shutil.make_archive( + base_name=archive, + format='gztar', + root_dir=os.path.dirname(archive), + ) + shutil.rmtree(tmp_d) + print("Archived logs and plot at: ", archive_name) + + @app.command() def exec_pipeline( config: Annotated[Path, typer.Option( diff --git a/src/itwinai/loggers.py b/src/itwinai/loggers.py index d04becd7..e553a1b0 100644 --- a/src/itwinai/loggers.py +++ b/src/itwinai/loggers.py @@ -1,6 +1,7 @@ """Abstraction for loggers.""" import os +import csv from abc import ABCMeta, abstractmethod from contextlib import contextmanager from typing import Any, Dict, List, Optional, Union @@ -448,3 +449,19 @@ def log( batch_idx=batch_idx, **kwargs ) + + +class EpochTimeTracker: + def __init__(self, series_name: str) -> None: + self.series_name = series_name + self._data = [] + + def add_epoch_time(self, epoch_idx, time): + n_nodes = os.environ.get('SLURM_NNODES', -1) + self._data.append((self.series_name, n_nodes, epoch_idx, time)) + + def save(self, csv_file: str): + with open(csv_file, 'w') as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(['name', 'nodes', 'epoch_id', 'time']) + csvwriter.writerows(self._data) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py index b7235078..e318474e 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py @@ -16,6 +16,7 @@ from torchvision import datasets, transforms from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker def pars_ini(): @@ -374,6 +375,7 @@ def main(): print('TIMER: broadcast:', time.time()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") et = time.time() for epoch in range(start_epoch, args.epochs + 1): @@ -404,6 +406,7 @@ def main(): if grank == 0: print('TIMER: epoch time:', time.time()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) # print('DEBUG: accuracy:', acc_test, '%') if args.benchrun and epoch == args.epochs: print('\n----------------------------------------------------') @@ -454,6 +457,9 @@ def main(): if grank == 0: print(f'TIMER: final time: {time.time()-st} s\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_ddp-bl_{nnod}N.csv") print(f" - TRAINING FINISHED") diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh index b342ceb3..e85b9f1e 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh index 12476f41..44522766 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh @@ -1,8 +1,8 @@ #!/bin/bash -rm *checkpoint.pth.tar *.out *.err +rm *checkpoint.pth.tar *.out *.err *.csv -timeout="00:01:00" +timeout="01:01:00" for N in 1 2 4 8 do sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py index e4f89eb0..79e741e9 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py @@ -16,6 +16,7 @@ from torchvision import datasets, transforms from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker def parsIni(): @@ -293,6 +294,7 @@ def main(): print('TIMER: broadcast:', time.time()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name="deepspeed-bl") et = time.time() for epoch in range(start_epoch, args.epochs + 1): @@ -314,6 +316,7 @@ def main(): if grank == 0: print('TIMER: epoch time:', time.time()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) # print('DEBUG: accuracy:', acc_test, '%') # finalise @@ -334,6 +337,9 @@ def main(): if grank == 0: print(f'TIMER: final time: {time.time()-st} s\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_deepspeed-bl_{nnod}N.csv") print(f" - TRAINING FINISHED") diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh index 9ba733f0..e4cc784a 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh index 9fff1316..2b34df6a 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh @@ -1,8 +1,8 @@ #!/bin/bash -rm *checkpoint.pth.tar *.out *.err +rm *checkpoint.pth.tar *.out *.err *.csv -timeout="00:01:00" +timeout="01:01:00" for N in 1 2 4 8 do sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py index 9ccfecbc..af6b0900 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py @@ -2,6 +2,7 @@ Scaling test of Horovod on Imagenet using Resnet. """ import argparse +import os import sys from timeit import default_timer as timer @@ -14,6 +15,7 @@ from torchvision import datasets, transforms from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker def parsIni(): @@ -61,7 +63,6 @@ def train(epoch): model.train() # Horovod: set epoch to sampler for shuffling train_sampler.set_epoch(epoch) - print('Training:') for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() @@ -222,6 +223,7 @@ def metric_average(val, namegiv): if hvd.rank() == 0 and hvd.local_rank() == 0: print('TIMER: broadcast:', timer()-st, 's') + epoch_time_tracker = EpochTimeTracker(series_name="horovod-bl") et = timer() for epoch in range(1, args.epochs + 1): @@ -231,11 +233,18 @@ def metric_average(val, namegiv): print('TIMER: hvd.rank():', hvd.rank(), 'hvd.local_rank():', hvd.local_rank(), ', epoch time:', timer()-lt, 's') + + if hvd.rank() == 0 and hvd.local_rank() == 0: + epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + print('TIMER: last epoch time:', timer()-lt, 's') print('TIMER: total epoch time:', timer()-et, 's') if hvd.rank() == 0 and hvd.local_rank() == 0: print('\n', torch.cuda.memory_summary(0), '\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_horovod-bl_{nnod}N.csv") print('DEBUG: hvd.rank():', hvd.rank(), 'hvd.local_rank():', hvd.local_rank(), diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh index 206bc5af..e76532b1 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh index 8ed39581..33f9ca37 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh @@ -1,8 +1,8 @@ #!/bin/bash -rm *checkpoint.pth.tar *.out *.err +rm *checkpoint.pth.tar *.out *.err *.csv -timeout="00:01:00" +timeout="01:01:00" for N in 1 2 4 8 do sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout hvd_slurm.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh new file mode 100644 index 00000000..72245f1c --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Run all versions of distributed ML version + +for fold in ddp horovod deepspeed +do + cd $fold + rm *checkpoint.pth.tar *.out *.err *.csv + if [ $fold == "horovod" ] + then + fold="hvd" + fi + echo $fold" training: $(sbatch $fold"_slurm.sh")" + cd .. +done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml index f101a083..56e4f6f7 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh index 3d5d4bb3..4e9749c2 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh index 8e5f7881..8f1c2d2d 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=4 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh index 3774b6e1..69b9d51e 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh @@ -15,7 +15,7 @@ #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 #SBATCH --gpus-per-node=4 -# SBATCH --exclusive +#SBATCH --exclusive # gres options have to be disabled for deepv #SBATCH --gres=gpu:4 diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh index 01b2e9e6..21c02a22 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh @@ -1,6 +1,6 @@ #!/bin/bash # Run all versions of distributed ML version -rm *checkpoint.pth.tar *.out *.err +rm *checkpoint.pth.tar *.out *.err *.csv echo "Torch DDP training: $(sbatch ddp_slurm.sh)" echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh index beeb0576..275f7fb7 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh @@ -1,8 +1,8 @@ #!/bin/bash -rm *checkpoint.pth.tar *.out *.err +rm *checkpoint.pth.tar *.out *.err *.csv -timeout="00:01:00" +timeout="01:01:00" for N in 1 2 4 8 do sbatch --job-name="DDP-imagenet-n$N" --nodes=$N --output="job-ddp-n$N.out" --error="job-ddp-n$N.err" --time=$timeout ddp_slurm.sh diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py index d78dc1b3..2c51ae25 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -27,6 +27,7 @@ DSDistributedStrategy, ) from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker def parse_args() -> argparse.Namespace: @@ -401,6 +402,7 @@ def seed_worker(worker_id): print('TIMER: broadcast:', time.time()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name=args.strategy) et = time.time() for epoch in range(start_epoch, args.epochs + 1): @@ -435,6 +437,7 @@ def seed_worker(worker_id): if strategy.is_main_worker(): print('TIMER: epoch time:', time.time()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) # print('DEBUG: accuracy:', acc_test, '%') # save state if found a better state @@ -486,6 +489,9 @@ def seed_worker(worker_id): if strategy.is_main_worker(): print(f'TIMER: final time: {time.time()-st} s\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_{args.strategy}_{nnod}N.csv") print(f" - TRAINING FINISHED") strategy.clean_up() From f6a0b23de11ade677190d5661ba3ef3f47ddc73b Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 23 Mar 2024 14:52:51 +0100 Subject: [PATCH 073/171] FIX deepspeed micro batchsize --- .../distributed-ml/torch-scaling-test/ddp/DDP_trainer.py | 2 ++ .../distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py | 4 +++- .../torch-scaling-test/horovod/horovod_trainer.py | 2 ++ tutorials/distributed-ml/torch-scaling-test/runall.sh | 1 + tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py | 2 +- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py index e318474e..eacf27ef 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py @@ -80,6 +80,8 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): if grank == 0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): + # if grank == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") t = time.perf_counter() data, target = data.to(device), target.to(device) optimizer.zero_grad() diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py index 79e741e9..f0f4ea8d 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py @@ -66,6 +66,8 @@ def train(args, model, train_loader, optimizer, epoch, grank, gwsize): if grank == 0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): + # if grank == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") t = time.perf_counter() data, target = data.to(device), target.to(device) optimizer.zero_grad() @@ -239,7 +241,7 @@ def main(): # 2) DeepSpeed optimizer # 3) Distributed data loader deepspeed_config = { - "train_batch_size": args.batch_size, + "train_micro_batch_size_per_gpu": args.batch_size, "optimizer": { "type": "SGD", "params": { diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py index af6b0900..10cbdd08 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py @@ -64,6 +64,8 @@ def train(epoch): # Horovod: set epoch to sampler for shuffling train_sampler.set_epoch(epoch) for batch_idx, (data, target) in enumerate(train_loader): + # if hvd.local_rank() == 0 and hvd.rank() == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") if args.cuda: data, target = data.cuda(), target.cuda() optimizer.zero_grad() diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 72245f1c..eeae5448 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -9,6 +9,7 @@ do then fold="hvd" fi + # echo $fold" training: $(sbatch --nodes=1 $fold"_slurm.sh")" echo $fold" training: $(sbatch $fold"_slurm.sh")" cd .. done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py index 2c51ae25..795ab612 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -344,7 +344,7 @@ def seed_worker(worker_id): optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum) - deepspeed_config = dict(train_batch_size=args.batch_size) + deepspeed_config = dict(train_micro_batch_size_per_gpu=args.batch_size) # 'config_params' key is ignored if strategy != DSDistributedStrategy distrib_model, optimizer, _ = strategy.distributed( model, optimizer, lr_scheduler=None, config_params=deepspeed_config From 3c0061e2ccc813cdcf53e82de7118faeda35356a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 23 Mar 2024 14:56:26 +0100 Subject: [PATCH 074/171] UPDATE data path --- tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml | 2 +- .../distributed-ml/torch-scaling-test/deepspeed/config.yaml | 2 +- .../distributed-ml/torch-scaling-test/horovod/config.yaml | 4 ++-- .../distributed-ml/torch-tutorial-2-imagenet/config.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml index 219ae32e..c43da3fb 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml index 5711941c..05a1f0e8 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml index 28b53b97..12bbe32d 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml @@ -1,7 +1,6 @@ # I/O -data_dir: ../tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train verbose: True -nworker: 4 # num workers dataloader # Model batch_size: 64 @@ -15,6 +14,7 @@ seed: 10 log_interval: 10 # Distributed ML +nworker: 4 # num workers dataloader no_cuda: False fp16_allreduce: False gradient_predivide_factor: 1.0 \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml index 56e4f6f7..f101a083 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: tmp_data #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True From 7e72afcf57fa500c97c13cf578ca1e8948b394b6 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sun, 24 Mar 2024 00:06:13 +0100 Subject: [PATCH 075/171] UPDATE checkpoint to avoid race conditions --- tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py | 3 ++- .../distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py | 3 ++- tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py index eacf27ef..acfc1059 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py @@ -328,7 +328,8 @@ def main(): # resume state start_epoch = 1 best_acc = np.Inf - res_name = 'ddp-checkpoint.pth.tar' + nnod = os.environ.get('SLURM_NNODES', 'unk') + res_name = f'ddp-{nnod}N-checkpoint.pth.tar' if os.path.isfile(res_name) and not args.benchrun: try: if torch.cuda.is_available(): diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py index f0f4ea8d..3589278c 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py @@ -266,7 +266,8 @@ def main(): # resume state start_epoch = 1 best_acc = np.Inf - res_name = 'ds-checkpoint.pth.tar' + nnod = os.environ.get('SLURM_NNODES', 'unk') + res_name = f'ds-{nnod}N-checkpoint.pth.tar' if os.path.isfile(res_name): try: dist.barrier() diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py index 795ab612..6bd71214 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py @@ -353,7 +353,8 @@ def seed_worker(worker_id): # resume state start_epoch = 1 best_acc = np.Inf - res_name = f'{args.strategy}-checkpoint.pth.tar' + nnod = os.environ.get('SLURM_NNODES', 'unk') + res_name = f'{args.strategy}-{nnod}N-checkpoint.pth.tar' if os.path.isfile(res_name): try: if torch.cuda.is_available(): From a68bf66dc5c035dcc3c8295202759e8f69d65676 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 25 Mar 2024 15:11:23 +0100 Subject: [PATCH 076/171] UPDATE scalability report --- .gitignore | 2 ++ src/itwinai/cli.py | 41 +++++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index d0086ae1..187b261d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +*.png +*.pdf *_logs TODO /data diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index d15a26ce..d52c7c40 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -31,7 +31,7 @@ def scalability_report( help=("Skip epoch ID.") )] = None, archive: Annotated[Optional[str], typer.Option( - help=("Archive path where to backup the data, WITHOUT EXTENSION.") + help=("Archive name to backup the data, without extension.") )] = None, ): """ @@ -40,7 +40,7 @@ def scalability_report( Example: >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \ - >>> --plot-title "Some title" --archive folder/archive_name + >>> --plot-title "Some title" --archive archive_name """ # TODO: add max depth and path different from CWD import os @@ -108,29 +108,42 @@ def scalability_report( sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, ls='dashed', lw=1.0, c='k', label="ideal") sp_up_ax.legend(ncol=1) + sp_up_ax.set_xticks(df["NGPUs"].values) - sp_up_ax.set_yticks(df["Speedup - ideal"].values) + sp_up_ax.set_yticks( + np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1)) + sp_up_ax.set_ylabel('Speedup') - sp_up_ax.set_xlim((0, np.amax(df["NGPUs"].values+1))) - sp_up_ax.set_ylim((0, np.amax(df["Speedup - ideal"].values+1))) + sp_up_ax.set_xlabel('NGPUs (4 per node)') sp_up_ax.grid() plot_png = f"scaling_plot_{plot_title}.png" - plt.savefig(plot_png) + plt.tight_layout() + plt.savefig(plot_png, bbox_inches='tight', format='png') + print("Saved scaling plot to: ", plot_png) if archive is not None: - tmp_d = archive - os.makedirs(tmp_d) + if '/' in archive: + raise ValueError("Archive name must NOT contain a path. " + f"Received: '{archive}'") + if '.' in archive: + raise ValueError("Archive name must NOT contain an extension. " + f"Received: '{archive}'") + if os.path.isdir(archive): + raise ValueError(f"Folder '{archive}' already exists. " + "Change archive name.") + os.makedirs(archive) for csvfile in csv_files: - shutil.copyfile(csvfile, os.path.join(tmp_d, + shutil.copyfile(csvfile, os.path.join(archive, os.path.basename(csvfile))) - shutil.copyfile(plot_png, os.path.join(tmp_d, plot_png)) - avg_times.to_csv(os.path.join(tmp_d, "avg_times.csv"), index=False) + shutil.copyfile(plot_png, os.path.join(archive, plot_png)) + avg_times.to_csv(os.path.join(archive, "avg_times.csv"), index=False) archive_name = shutil.make_archive( - base_name=archive, + base_name=archive, # archive file name format='gztar', - root_dir=os.path.dirname(archive), + # root_dir='.', + base_dir=archive # folder path inside archive ) - shutil.rmtree(tmp_d) + shutil.rmtree(archive) print("Archived logs and plot at: ", archive_name) From 123dbb61c110a7345b748766ca7b27af41ac6b32 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 25 Mar 2024 23:35:56 +0100 Subject: [PATCH 077/171] UPDATE dataset path --- tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml | 2 +- .../distributed-ml/torch-scaling-test/deepspeed/config.yaml | 2 +- tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml | 2 +- tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml index c43da3fb..8cada7be 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml index 05a1f0e8..879f94fb 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml index 12bbe32d..50a408ad 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train verbose: True # Model diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml index f101a083..2473d346 100644 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml @@ -1,5 +1,5 @@ # I/O -data_dir: /p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train restart_int: 10 verbose: True From ddc62c4b9367e9631c7172007ff04d64e913ef7f Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 10:21:29 +0200 Subject: [PATCH 078/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 456022a1..450519ef 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -20,7 +20,7 @@ ml Python CMake HDF5 PnetCDF libaio mpi4py # ml # get python version -pver="$(python --version 2>&1 | awk {'print $2'} | cut -f1-2 -d.)" +pver="$(python --version 2>&1 | awk {'print $2;'} | cut -f1-2 -d.)" # use pyenv if exist if [ -d "$HOME/.pyenv" ];then From 7aa6b65f86481b25461ae8a6b0a18764b85cae26 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:02:41 +0200 Subject: [PATCH 079/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 450519ef..68174dc4 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -20,7 +20,7 @@ ml Python CMake HDF5 PnetCDF libaio mpi4py # ml # get python version -pver="$(python --version 2>&1 | awk {'print $2;'} | cut -f1-2 -d.)" +pver="$(python --version 2>&1 | awk '{print $2}' | cut -f1-2 -d.)" # use pyenv if exist if [ -d "$HOME/.pyenv" ];then From fcc7d7b41070714ff218d1fc171452c1ac23527a Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:18:50 +0200 Subject: [PATCH 080/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 68174dc4..aeb2410f 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -32,7 +32,7 @@ fi if [ -d "${cDir}/envAI_${sysN}" ];then echo 'env already exist' - source envAI_${sysN}/bin/activate + source envAI_$sysN/bin/activate else python3 -m venv envAI_${sysN} From 16562d3ba3826dc4030d4f8dfd1d3c95e5529fcb Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:26:44 +0200 Subject: [PATCH 081/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index aeb2410f..0c6357b3 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -32,7 +32,7 @@ fi if [ -d "${cDir}/envAI_${sysN}" ];then echo 'env already exist' - source envAI_$sysN/bin/activate + source ./envAI_${sysN}/bin/activate else python3 -m venv envAI_${sysN} From 3865a8f203291fe7f9696450c9a4411c8e343856 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:36:48 +0200 Subject: [PATCH 082/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 0c6357b3..68174dc4 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -32,7 +32,7 @@ fi if [ -d "${cDir}/envAI_${sysN}" ];then echo 'env already exist' - source ./envAI_${sysN}/bin/activate + source envAI_${sysN}/bin/activate else python3 -m venv envAI_${sysN} From 2d7b672f79d5bbc8bfe1ea0e2fc13a48d645df07 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:37:20 +0200 Subject: [PATCH 083/171] Update createEnvJSCTF.sh --- env-files/tensorflow/createEnvJSCTF.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh index cc014cd3..df6799ec 100644 --- a/env-files/tensorflow/createEnvJSCTF.sh +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -36,7 +36,7 @@ echo "modules loaded" echo # get python version -pver="$(python --version 2>&1 | awk {'print $2'} | cut -f1-2 -d.)" +pver="$(python --version 2>&1 | awk '{print $2}' | cut -f1-2 -d.)" echo "python version is ${pver}" echo From c6edd77fa3c84c1bddd63cd217937de4ad4ec827 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:42:52 +0200 Subject: [PATCH 084/171] Update README.md --- tutorials/distributed-ml/tf-tutorial-0-basics/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md index 983aee69..6b46437f 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md @@ -1,6 +1,9 @@ # Tutorial: distributed strategies for Tensorflow -In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. Other strategies will be updated here. +In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. +Note that the environment is tested on the HDFML system at JSC. +For other systems, the module versions might need change accordingly. +Other strategies will be updated here. First, from the root of this repo, build the environment containing Tensorflow. You can *try* with: From b34f73bc1b209fe07e9df770db449b716d08ec30 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:45:26 +0200 Subject: [PATCH 085/171] Update README.md --- tutorials/distributed-ml/tf-tutorial-0-basics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md index 6b46437f..90cfeb84 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md @@ -5,7 +5,7 @@ Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. Other strategies will be updated here. -First, from the root of this repo, build the environment containing +First, from the root of this repository, build the environment containing Tensorflow. You can *try* with: ```bash From 0c971897378ae9847b49286ff602cb10f247525a Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Tue, 9 Apr 2024 18:24:28 +0200 Subject: [PATCH 086/171] JUBE benchmarks --- .../torch-scaling-test/DDP_trainer.py | 479 ++++++++++++++++++ .../torch-scaling-test/general_jobsys.xml | 142 ++++++ .../torch-scaling-test/jube_ddp.sh | 61 +++ 3 files changed, 682 insertions(+) create mode 100644 tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml create mode 100644 tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py new file mode 100644 index 00000000..08fcbf98 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py @@ -0,0 +1,479 @@ +""" +Scaling test of torch Distributed Data Parallel on Imagenet using Resnet. +""" +import argparse +import sys +import os +import time +import random +import numpy as np +import logging + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +import torchvision +from torchvision import datasets, transforms + +import argparse + +#from itwinai.parser import ArgumentParser as ItAIArgumentParser +#from itwinai.loggers import EpochTimeTracker + + +def pars_ini(): + parser = argparse.ArgumentParser(description='itwinai - parsed arguments') + + # IO parsers + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--restart-int', type=int, default=10, + help='restart interval per epoch (default: 10)') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + + # model parsers + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # debug parsers + parser.add_argument('--testrun', action='store_true', default=False, + help='do a test run with seed (default: False)') + parser.add_argument('--nseed', type=int, default=0, + help='seed integer for reproducibility (default: 0)') + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + parser.add_argument('--benchrun', + action='store_true', default=True) + + # parallel parsers + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + + args = parser.parse_args() + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + return args + + +def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): + model.train() + lt_1 = time.perf_counter() + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + # if grank == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/' + f'{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' + f'{loss.item():.6f}') + + loss_acc += loss.item() + if grank == 0: + logging.info('epoch time: {:.2f}'.format(time.perf_counter()-lt_1)+' s') + return loss_acc + + +def test(model, device, test_loader, grank, gwsize): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + if grank == 0: + print( + f'Test set: average loss: {test_loss:.4f}\t' + f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') + acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + return acc_test + + +def save_state( + epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, is_best +): + """Save training state.""" + rt = time.time() + # find if is_best happened in any worker + if torch.cuda.is_available(): + is_best_m = par_allgather_obj(is_best, gwsize) + + if torch.cuda.is_available(): + if any(is_best_m): + # find which rank is_best happened - select first rank if multiple + is_best_rank = np.where(np.array(is_best_m))[0][0] + + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + # write on worker with is_best + if grank == is_best_rank: + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + else: + # collect state + state = {'epoch': epoch + 1, + 'state_dict': distrib_model.state_dict(), + 'best_acc': loss_acc, + 'optimizer': optimizer.state_dict()} + + torch.save(state, './'+res_name) + print( + f'DEBUG: state in {grank} is saved on epoch:{epoch} ' + f'in {time.time()-rt} s') + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def par_allgather_obj(obj, gwsize): + """Gathers any object from the whole group in a list (to all workers)""" + res = [None]*gwsize + dist.all_gather_object(res, obj, group=None) + return res + + +def main(): + # get parse args + args = pars_ini() + + # check CUDA availibility + args.cuda = not args.no_cuda and torch.cuda.is_available() + + # get directory + program_dir = os.getcwd() + + # start the time.time for profiling + st = time.time() + + # initializes the distributed backend which will take care of synchronizing + # nodes/GPUs + if torch.cuda.is_available(): + dist.init_process_group(backend=args.backend) + + # deterministic testrun + if args.testrun: + torch.manual_seed(args.nseed) + g = torch.Generator() + g.manual_seed(args.nseed) + + # get job rank info - rank==0 master gpu + if torch.cuda.is_available(): + # local world size - per node + lwsize = torch.cuda.device_count() if args.cuda else 0 + gwsize = dist.get_world_size() # global world size - per run + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node + else: + gwsize = 1 + grank = 0 + + # some debug + if grank == 0: + print('TIMER: initialise:', time.time()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version, '\n') + + print('DEBUG: IO parsers:') + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.restart_int:', args.restart_int, '\n') + + print('DEBUG: model parsers:') + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff, '\n') + + print('DEBUG: debug parsers:') + print('DEBUG: args.testrun:', args.testrun) + print('DEBUG: args.nseed:', args.nseed) + print('DEBUG: args.log_int:', args.log_int, '\n') + + print('DEBUG: parallel parsers:') + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.cuda:', args.cuda) + print('DEBUG: args.benchrun:', args.benchrun, '\n') + + # encapsulate the model on the GPU assigned to the current process + device = torch.device( + 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) + if args.cuda: + torch.cuda.set_device(lrank) + # deterministic testrun + if args.testrun: + torch.cuda.manual_seed(args.nseed) + + # dataset + # Initialize transformations for data augmentation + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load the ImageNet Object Localization Challenge dataset + train_dataset = datasets.ImageFolder( + root=args.data_dir, + transform=transform + ) + # test_dataset = ... + + # restricts data loading to a subset of the dataset exclusive to the + # current process + args.shuff = args.shuff and not args.testrun + if torch.cuda.is_available(): + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) + # test_sampler = torch.utils.data.distributed.DistributedSampler( + # test_dataset, num_replicas=gwsize, rank=grank, + # shuffle=args.shuff) + + # distribute dataset to workers + # persistent workers is not possible for nworker=0 + pers_w = True if args.nworker > 1 else False + + # deterministic testrun - the same dataset each run + kwargs = {'worker_init_fn': seed_worker, + 'generator': g} if args.testrun else {} + + if torch.cuda.is_available(): + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size, + # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, + # persistent_workers=pers_w, prefetch_factor=args.prefetch, + # **kwargs) + else: + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size) + # test_loader = torch.utils.data.DataLoader( + # test_dataset, batch_size=args.batch_size) + + if grank == 0: + print('TIMER: read and concat data:', time.time()-st, 's') + + # create CNN model + model = torchvision.models.resnet152().to(device) + + # distribute model to workers + if torch.cuda.is_available(): + distrib_model = nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device) + else: + distrib_model = model + + # optimizer + # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) + optimizer = torch.optim.SGD( + distrib_model.parameters(), lr=args.lr, momentum=args.momentum) + + # resume state + start_epoch = 1 + best_acc = np.Inf + nnod = os.environ.get('SLURM_NNODES', 'unk') + res_name = f'ddp-{nnod}N-checkpoint.pth.tar' + if os.path.isfile(res_name) and not args.benchrun: + try: + if torch.cuda.is_available(): + dist.barrier() + # Map model to be loaded to specified single gpu. + loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { + 'cpu:%d' % 0: 'cpu:%d' % lrank} + checkpoint = torch.load( + program_dir+'/'+res_name, map_location=loc) + else: + checkpoint = torch.load(program_dir+'/'+res_name) + start_epoch = checkpoint['epoch'] + best_acc = checkpoint['best_acc'] + distrib_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if torch.cuda.is_available(): + if grank == 0: + print(f'WARNING: restarting from {start_epoch} epoch') + else: + print(f'WARNING: restarting from {start_epoch} epoch') + except Exception: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: restart file cannot ' + 'be loaded, restarting!') + else: + print('WARNING: restart file cannot be loaded, restarting!') + + if start_epoch >= args.epochs: + if torch.cuda.is_available(): + if grank == 0: + print('WARNING: given epochs are less than the one in the' + ' restart file!\n' + 'WARNING: SYS.EXIT is issued') + dist.barrier() + dist.destroy_process_group() + sys.exit() + else: + print('WARNING: given epochs are less than the one in the ' + 'restart file!\n' + 'WARNING: SYS.EXIT is issued') + sys.exit() + + # start trainin/testing loop + if grank == 0: + print('TIMER: broadcast:', time.time()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + #epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") + + et = time.time() + for epoch in range(start_epoch, args.epochs + 1): + lt = time.time() + # training + if args.benchrun and epoch == args.epochs: + # profiling (done on last epoch - slower!) + with torch.autograd.profiler.profile(use_cuda=args.cuda, + profile_memory=True) as prof: + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + else: + loss_acc = train(distrib_model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + + # # testing + # acc_test = test(distrib_model, device, + # test_loader, grank, gwsize, args) + + # save first epoch timer + if epoch == start_epoch: + first_ep_t = time.time()-lt + + # final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + # test_loader.last_epoch = True + + if grank == 0: + print('TIMER: epoch time:', time.time()-lt, 's') + #epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) + # print('DEBUG: accuracy:', acc_test, '%') + if args.benchrun and epoch == args.epochs: + print('\n----------------------------------------------------') + print('DEBUG: benchmark of last epoch:\n') + what1 = 'cuda' if args.cuda else 'cpu' + print(prof.key_averages().table( + sort_by='self_'+str(what1)+'_time_total')) + + # save state if found a better state + is_best = loss_acc < best_acc + if epoch % args.restart_int == 0 and not args.benchrun: + save_state(epoch, distrib_model, loss_acc, optimizer, + res_name, grank, gwsize, is_best) + # reset best_acc + best_acc = min(loss_acc, best_acc) + + # finalise + # save final state + if not args.benchrun: + save_state(epoch, distrib_model, loss_acc, + optimizer, res_name, grank, gwsize, True) + if torch.cuda.is_available(): + dist.barrier() + + # some debug + if grank==0: + print(f'\n--------------------------------------------------------') + logging.info('training results:') + logging.info('first epoch time: {:.2f}'.format(first_ep_t)+' s') + logging.info('last epoch time: {:.2f}'.format(time.time()-lt)+' s') + logging.info('total epoch time: {:.2f}'.format(time.time()-et)+' s') + logging.info('average epoch time: {:.2f}'.format((time.time()-et)/done_epochs)+' s') + if epoch>1: + logging.info('total epoch-1 time: {:.2f}'.format(time.time()-et-first_ep_t)+' s') + logging.info('average epoch-1 time: {:.2f}'.format((time.time()-et-first_ep_t)/(args.epochs-1))+' s') + if args.benchrun: + tot_ep_tm2 = tot_ep_t - first_ep_t - last_ep_t + logging.info('total epoch-2 time: {:.2f}'.format(lt-first_ep_t)+' s') + logging.info('average epoch-2 time: {:.2f}'.format((lt-first_ep_t)/(args.epochs-2))+' s') + # memory on worker 0 + if args.cuda: + logging.info('memory req: '+str(int(torch.cuda.max_memory_reserved(0)/1024/1024))+' MB') + logging.info('memory summary:\n'+str(torch.cuda.memory_summary(0))) + + # timer for current epoch + if grank==0: + logging.info('epoch time: {:.2f}'.format(time.perf_counter()-lt_1)+' s') + + if grank == 0: + print(f'TIMER: final time: {time.time()-st} s\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + #epoch_time_tracker.save( + # csv_file=f"epochtime_ddp-bl_{nnod}N.csv") + + print(f" - TRAINING FINISHED") + + # clean-up + if torch.cuda.is_available(): + dist.barrier() + dist.destroy_process_group() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml b/tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml new file mode 100644 index 00000000..273b2224 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml @@ -0,0 +1,142 @@ + + + + General benchmark script + + + + + 1,2,4,8 + + 8 + + DDP_trainer.py + + + + + if [ -f /etc/FZJ/systemname ]; then cat /etc/FZJ/systemname | tr -d "\n"; else uname -n | head -c 3; fi + sbatch + $iterNO + $iterNW + ready + jube_ddp.sh + + { "hdfml": 4, + }["${systemname}"] + + intertwin + + 00:30:00 + + { "hdfml": "batch", + }["${systemname}"] + + + 00:10:00 + + { "hdfml": "batch", + }["${systemname}"] + + + + + { + "hdfml": "ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py", + }["${systemname}"] + + source /p/project/intertwin/rakesh/T6.5-AI-and-ML/bench/../envAI_hdfml/bin/activate + { + "hdfml": "export CUDA_VISIBLE_DEVICES=0,1,2,3" + }["${systemname}"] + + + + + + $job_file + $script + + + + + + + + + + + + + + + + + + + + + paramset + executeset + envirset + files,sub_job + echo "nID: $jube_wp_id" + + $submit_cmd $job_file + + + + + + ${jube_wp_id} + ${nodes} + ${nnw} + \s*INFO: total epoch-2 time:\s+$jube_pat_wrd\s* + \s*INFO: average epoch-2 time:\s+$jube_pat_wrd\s* + ${avgEpochT}/${nodes} + \s*INFO: memory req:\s+$jube_pat_wrd\s* + ${memory}/1024 + + + + + pattern + + stdout + job.out + + + + + + analyse + + ID + Nnodes + Nworkers + calcTime + avgEpochT + Naet + memoryGPU +
+
+ + + + analyse + + ID + Nnodes + Nworkers + calcTime + avgEpochT + Naet + memoryGPU +
+
+ +
+
+ + + diff --git a/tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh b/tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh new file mode 100644 index 00000000..2c6b2446 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=JUBE_DDP +#SBATCH --account=#ACC# +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=#TIMELIM# + +# configure node and process count on the CM +#SBATCH --partition=#QUEUE# +#SBATCH --nodes=#NODES# +#SBATCH --ntasks-per-node=#NGPU# +#SBATCH --cpus-per-task=#NW# +#SBATCH --gpus-per-node=#NGPU# +#SBATCH --exclusive + +# set modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# set env +source ../dist_trainer_v2/envAI_hdfml/bin/activate + +# job info +debug=false +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$debug" = true ] ; then + export NCCL_DEBUG=INFO +fi +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +# launch training +TRAINING_CMD="#SCRIPT#" + +srun --cpu-bind=none bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + $TRAINING_CMD" From e5734d2545b08b3a8cb1b4edb69db68492bb82d4 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:15:25 +0200 Subject: [PATCH 087/171] Update createEnvJSC.sh --- env-files/torch/createEnvJSC.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 68174dc4..6b0fa226 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -170,6 +170,13 @@ if __name__ == '__main__': """ >> ${cDir}/envAI_${sysN}/bin/torchrun fi +# JUBE benchmarking environment +if [ -f "${cDir}/envAI_${sysN}/bin/jube" ]; then + echo 'JUBE already installed' +else + pip3 install --no-cache-dir http://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=latest +fi + # some tests echo "unit tests:" for item in 'torch' 'deepspeed' 'horovod';do From f8d3cd94fee558cc8031f3797f22328fabd7e361 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:15:58 +0200 Subject: [PATCH 088/171] Update createEnvJSCTF.sh --- env-files/tensorflow/createEnvJSCTF.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh index df6799ec..8838347c 100644 --- a/env-files/tensorflow/createEnvJSCTF.sh +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -92,6 +92,13 @@ else pip3 install --no-cache-dir horovod --ignore-installed fi +# JUBE benchmarking environment +if [ -f "${cDir}/envAI_${sysN}/bin/jube" ]; then + echo 'JUBE already installed' +else + pip3 install --no-cache-dir http://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=latest +fi + # get rest of the libraries$ if [ "$cont1" = true ] ; then pip3 install -r reqs_TF.txt --ignore-installed From 1e1d32d7b32ef28597a6f2a688c1f6820c519f7b Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 11 Apr 2024 10:06:20 +0200 Subject: [PATCH 089/171] ADD logy scale option --- src/itwinai/cli.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index d52c7c40..437188d6 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -27,6 +27,9 @@ def scalability_report( plot_title: Annotated[Optional[str], typer.Option( help=("Plot name.") )] = None, + logy: Annotated[bool, typer.Option( + help=("Log scale on y axis.") + )] = False, skip_id: Annotated[Optional[int], typer.Option( help=("Skip epoch ID.") )] = None, @@ -40,7 +43,7 @@ def scalability_report( Example: >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \ - >>> --plot-title "Some title" --archive archive_name + >>> --plot-title "Some title" --logy --archive archive_name """ # TODO: add max depth and path different from CWD import os @@ -102,11 +105,22 @@ def scalability_report( df["Threadscaled Sim. Time / s"] # Plot - sp_up_ax.plot( - df["NGPUs"].values, df["Speedup"].values, - marker='*', lw=1.0, label=name) - sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, - ls='dashed', lw=1.0, c='k', label="ideal") + # when lines are very close to each other + if logy: + sp_up_ax.semilogy( + df["NGPUs"].values, df["Speedup"].values, + marker='*', lw=1.0, label=name) + else: + sp_up_ax.plot( + df["NGPUs"].values, df["Speedup"].values, + marker='*', lw=1.0, label=name) + + if logy: + sp_up_ax.semilogy(df["NGPUs"].values, df["Speedup - ideal"].values, + ls='dashed', lw=1.0, c='k', label="ideal") + else: + sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, + ls='dashed', lw=1.0, c='k', label="ideal") sp_up_ax.legend(ncol=1) sp_up_ax.set_xticks(df["NGPUs"].values) From 31ec4ffead8a69a2917329eaef5860295c4a39cd Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 11 Apr 2024 10:13:06 +0200 Subject: [PATCH 090/171] Extract JUBE tutorial --- .../{torch-scaling-test => jube-tutorial}/DDP_trainer.py | 0 tutorials/distributed-ml/jube-tutorial/README.md | 3 +++ .../{torch-scaling-test => jube-tutorial}/general_jobsys.xml | 0 .../{torch-scaling-test => jube-tutorial}/jube_ddp.sh | 0 4 files changed, 3 insertions(+) rename tutorials/distributed-ml/{torch-scaling-test => jube-tutorial}/DDP_trainer.py (100%) create mode 100644 tutorials/distributed-ml/jube-tutorial/README.md rename tutorials/distributed-ml/{torch-scaling-test => jube-tutorial}/general_jobsys.xml (100%) rename tutorials/distributed-ml/{torch-scaling-test => jube-tutorial}/jube_ddp.sh (100%) diff --git a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py b/tutorials/distributed-ml/jube-tutorial/DDP_trainer.py similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py rename to tutorials/distributed-ml/jube-tutorial/DDP_trainer.py diff --git a/tutorials/distributed-ml/jube-tutorial/README.md b/tutorials/distributed-ml/jube-tutorial/README.md new file mode 100644 index 00000000..0f564c99 --- /dev/null +++ b/tutorials/distributed-ml/jube-tutorial/README.md @@ -0,0 +1,3 @@ +# Benchmarking tutorial using JUBE + +TODO: complete \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml b/tutorials/distributed-ml/jube-tutorial/general_jobsys.xml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/general_jobsys.xml rename to tutorials/distributed-ml/jube-tutorial/general_jobsys.xml diff --git a/tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh b/tutorials/distributed-ml/jube-tutorial/jube_ddp.sh similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/jube_ddp.sh rename to tutorials/distributed-ml/jube-tutorial/jube_ddp.sh From c2f1f936e1f7e8b5586def388f54144e836fb24b Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 11 Apr 2024 18:46:09 +0200 Subject: [PATCH 091/171] CLEANUP baselines --- .vscode/settings.json | 1 + .../torch-scaling-test/DDP_trainer.py | 258 ++++++++++ .../torch-scaling-test/DS_trainer.py | 243 +++++++++ .../torch-scaling-test/README.md | 2 +- .../torch-scaling-test/ddp-config.yaml | 20 + .../torch-scaling-test/ddp/DDP_trainer.py | 477 ------------------ .../torch-scaling-test/ddp/config.yaml | 23 - .../torch-scaling-test/ddp/scaling-test.sh | 9 - .../torch-scaling-test/{ddp => }/ddp_slurm.sh | 4 +- .../torch-scaling-test/deepspeed-config.yaml | 18 + .../deepspeed/DS_trainer.py | 355 ------------- .../torch-scaling-test/deepspeed/config.yaml | 17 - .../deepspeed/scaling-test.sh | 9 - .../{deepspeed => }/deepspeed_slurm.sh | 4 +- .../config.yaml => horovod-config.yaml} | 12 +- .../horovod/horovod_trainer.py | 261 ---------- .../horovod/scaling-test.sh | 9 - .../torch-scaling-test/horovod_trainer.py | 309 ++++++++++++ .../{horovod => }/hvd_slurm.sh | 4 +- .../torch-scaling-test/runall.sh | 10 +- .../torch-scaling-test/scaling-test.sh | 11 + .../torch-scaling-test/utils.py | 52 ++ 22 files changed, 928 insertions(+), 1180 deletions(-) create mode 100755 tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/DS_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml delete mode 100755 tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py delete mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml delete mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh rename tutorials/distributed-ml/torch-scaling-test/{ddp => }/ddp_slurm.sh (95%) create mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml delete mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py delete mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml delete mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh rename tutorials/distributed-ml/torch-scaling-test/{deepspeed => }/deepspeed_slurm.sh (95%) rename tutorials/distributed-ml/torch-scaling-test/{horovod/config.yaml => horovod-config.yaml} (57%) delete mode 100755 tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py delete mode 100644 tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh create mode 100755 tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py rename tutorials/distributed-ml/torch-scaling-test/{horovod => }/hvd_slurm.sh (93%) create mode 100644 tutorials/distributed-ml/torch-scaling-test/scaling-test.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/utils.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 896f98c2..10679610 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -14,6 +14,7 @@ "fromlist", "hyperparameters", "hyperparams", + "imagenet", "ipython", "itwinai", "Lockfiles", diff --git a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py new file mode 100755 index 00000000..8afc6a4e --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py @@ -0,0 +1,258 @@ +""" +Scaling test of torch Distributed Data Parallel on Imagenet using Resnet. +""" +from typing import Optional +import argparse +import sys +import os +from timeit import default_timer as timer + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker + +from utils import seed_worker, imagenet_dataset, set_seed + + +def parse_params(): + parser = ItAIArgumentParser(description='PyTorch Imagenet scaling test') + + # Data and logging + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training. Disabled if < 0.') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + + # Model + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # Reproducibility + parser.add_argument('--rnd-seed', type=Optional[int], default=None, + help='seed integer for reproducibility (default: 0)') + + # Distributed ML + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + + args = parser.parse_args() + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + return args + + +def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): + model.train() + t_list = [] + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + # if grank == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") + t = timer() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if args.log_int > 0 and batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/' + f'{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' + f'{loss.item():.6f}') + t_list.append(timer() - t) + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def main(): + # Parse CLI args + args = parse_params() + + # Check resources availability + use_cuda = not args.no_cuda and torch.cuda.is_available() + is_distributed = False + if use_cuda and torch.cuda.device_count() > 0: + is_distributed = True + + # Start the timer for profiling + st = timer() + + if is_distributed: + # Initializes the distributed backend which will + # take care of synchronizing the workers (nodes/GPUs) + dist.init_process_group(backend=args.backend) + + # Set random seed for reproducibility + torch_prng = set_seed(args.rnd_seed, use_cuda) + + if is_distributed: + # get job rank info - rank==0 master gpu + lwsize = torch.cuda.device_count() # local world size - per run + gwsize = dist.get_world_size() # global world size - per run + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node + else: + # Use a single worker (either on GPU or CPU) + lwsize = 1 + gwsize = 1 + grank = 0 + lrank = 0 + + if grank == 0: + print('TIMER: initialise:', timer()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff) + print('DEBUG: args.rnd_seed:', args.rnd_seed) + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.no_cuda:', args.no_cuda, '\n') + + # Encapsulate the model on the GPU assigned to the current process + device = torch.device('cuda' if use_cuda else 'cpu', lrank) + if use_cuda: + torch.cuda.set_device(lrank) + + # Dataset + train_dataset = imagenet_dataset(args.data_dir) + + if is_distributed: + # Distributed sampler restricts data loading to a subset of the dataset + # exclusive to the current process + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) + ) + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) + else: + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) + + # Create CNN model + model = torchvision.models.resnet152().to(device) + + # Distribute model to workers + if is_distributed: + model = nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device) + + # Optimizer + optimizer = torch.optim.SGD( + model.parameters(), lr=args.lr, momentum=args.momentum) + + # Start training loop + if grank == 0: + print('TIMER: broadcast:', timer()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") + + et = timer() + start_epoch = 1 + for epoch in range(start_epoch, args.epochs + 1): + lt = timer() + if is_distributed: + # Inform the sampler that a new epoch started: shuffle + # may be needed + train_sampler.set_epoch(epoch) + train(model, device, train_loader, + optimizer, epoch, grank, gwsize, args) + # Save first epoch timer + if epoch == start_epoch: + first_ep_t = timer()-lt + + # Final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + + if grank == 0: + print('TIMER: epoch time:', timer()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + + if is_distributed: + dist.barrier() + + if grank == 0: + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', timer()-lt, ' s') + print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + timer()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (timer()-et-first_ep_t)/(args.epochs-1), ' s') + if use_cuda: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) + print(f'TIMER: final time: {timer()-st} s\n') + + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_ddp-bl_{nnod}N.csv") + + print(f" - TRAINING FINISHED") + + # Clean-up + if is_distributed: + dist.barrier() + dist.destroy_process_group() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py new file mode 100644 index 00000000..fdeac4eb --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py @@ -0,0 +1,243 @@ +""" +Scaling test of Microsoft Deepspeed on Imagenet using Resnet. +""" +from typing import Optional +import argparse +import sys +import os +from timeit import default_timer as timer +import deepspeed + +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torchvision + +from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker + +from utils import imagenet_dataset + + +def parse_params(): + parser = ItAIArgumentParser(description='PyTorch Imagenet scaling test') + + # Data and logging + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training. Disabled if < 0.') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + + # Model + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') + + # Reproducibility + parser.add_argument('--rnd-seed', type=Optional[int], default=None, + help='seed integer for reproducibility (default: 0)') + + # Distributed ML + parser.add_argument('--backend', type=str, default='nccl', metavar='N', + help='backend for parallelization (default: nccl)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + + # parse to deepspeed + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train(args, model, train_loader, optimizer, epoch, grank, gwsize): + device = model.local_rank + t_list = [] + loss_acc = 0 + if grank == 0: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + # if grank == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") + t = timer() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if args.log_int > 0 and batch_idx % args.log_int == 0 and grank == 0: + print( + f'Train epoch: {epoch} [{batch_idx * len(data)}/' + f'{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx *len(data) / len(train_loader):.0f}%)]' + f'\t\tLoss: {loss.item():.6f}') + t_list.append(timer() - t) + loss_acc += loss.item() + if grank == 0: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def main(): + # Parse CLI args + args = parse_params() + + # Check resources availability + use_cuda = not args.no_cuda and torch.cuda.is_available() + is_distributed = False + if use_cuda and torch.cuda.device_count() > 0: + is_distributed = True + + # Limit # of CPU threads to be used per worker + # torch.set_num_threads(1) + + # Start the timer for profiling + st = timer() + + # Initializes the distributed backend + if is_distributed: + deepspeed.init_distributed(dist_backend=args.backend) + + if args.rnd_seed is not None: + # Deterministic execution + torch.manual_seed(args.rnd_seed) + + if is_distributed: + # Get job rank info - rank==0 master gpu + gwsize = dist.get_world_size() # global world size - per run + lwsize = torch.cuda.device_count() # local world size - per node + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node + else: + # Use a single worker (either on GPU or CPU) + lwsize = 1 + gwsize = 1 + grank = 0 + lrank = 0 + + # some debug + if grank == 0: + print('TIMER: initialise:', timer()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.rnd_seed:', args.rnd_seed) + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.local_rank:', args.local_rank) + print('DEBUG: args.no_cuda:', args.no_cuda, '\n') + + # Encapsulate the model on the GPU assigned to the current process + if use_cuda: + torch.cuda.set_device(lrank) + + # Read training dataset + train_dataset = imagenet_dataset(args.data_dir) + + # Create CNN model + model = torchvision.models.resnet152() + + # Initialize DeepSpeed to use the following features + # 1) Distributed model + # 2) DeepSpeed optimizer + # 3) Distributed data loader + deepspeed_config = { + "train_micro_batch_size_per_gpu": args.batch_size, + "optimizer": { + "type": "SGD", + "params": { + "lr": args.lr, + "momentum": 0.5 + } + }, + "fp16": { + "enabled": False + }, + "zero_optimization": False + } + distrib_model, optimizer, train_loader, _ = deepspeed.initialize( + args=args, model=model, model_parameters=model.parameters(), + training_data=train_dataset, config_params=deepspeed_config) + + # Start training loop + if grank == 0: + print('TIMER: broadcast:', timer()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name="deepspeed-bl") + + et = timer() + start_epoch = 1 + for epoch in range(start_epoch, args.epochs + 1): + lt = timer() + # Training + train(args, distrib_model, train_loader, + optimizer, epoch, grank, gwsize) + + # Save first epoch timer + if epoch == start_epoch: + first_ep_t = timer()-lt + + # Final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + + if grank == 0: + print('TIMER: epoch time:', timer()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + + if torch.cuda.is_available(): + dist.barrier() + + if grank == 0: + print('\n--------------------------------------------------------') + print('DEBUG: results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', timer()-lt, ' s') + print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + timer()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (timer()-et-first_ep_t)/(args.epochs-1), ' s') + if use_cuda: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) + print(f'TIMER: final time: {timer()-st} s\n') + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_deepspeed-bl_{nnod}N.csv") + + print(f" - TRAINING FINISHED") + + # Clean-up + if is_distributed: + deepspeed.sys.exit() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index dcc5233a..7e7c6704 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -1,4 +1,4 @@ # Scaling tests for PyTorch Examples of scaling tests which can be used as baselines for `itwinai` distributed. -Work in progress. \ No newline at end of file +Work in progress. diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml new file mode 100644 index 00000000..583a5132 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml @@ -0,0 +1,20 @@ +# Data and logging +data_dir: tmp_data/ +log_int: 10 +verbose: True +nworker: 4 # num workers dataloader +prefetch: 2 + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 +momentum: 0.5 +shuff: False + +# Reproducibility +rnd_seed: 10 + +# Distributed ML +backend: nccl +no_cuda: False diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py deleted file mode 100755 index acfc1059..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/DDP_trainer.py +++ /dev/null @@ -1,477 +0,0 @@ -""" -Scaling test of torch Distributed Data Parallel on Imagenet using Resnet. -""" -import argparse -import sys -import os -import time -import random -import numpy as np - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -import torchvision -from torchvision import datasets, transforms - -from itwinai.parser import ArgumentParser as ItAIArgumentParser -from itwinai.loggers import EpochTimeTracker - - -def pars_ini(): - parser = ItAIArgumentParser(description='PyTorch Imagenet scaling test') - - # IO parsers - parser.add_argument('--data-dir', default='./', - help=('location of the training dataset in the ' - 'local filesystem')) - parser.add_argument('--restart-int', type=int, default=10, - help='restart interval per epoch (default: 10)') - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') - - # model parsers - parser.add_argument('--batch-size', type=int, default=64, - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, - help='learning rate (default: 0.01)') - parser.add_argument('--momentum', type=float, default=0.5, - help='momentum in SGD optimizer (default: 0.5)') - parser.add_argument('--shuff', action='store_true', default=False, - help='shuffle dataset (default: False)') - - # debug parsers - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run with seed (default: False)') - parser.add_argument('--nseed', type=int, default=0, - help='seed integer for reproducibility (default: 0)') - parser.add_argument('--log-int', type=int, default=10, - help='log interval per training') - parser.add_argument('--benchrun', - action=argparse.BooleanOptionalAction) - - # parallel parsers - parser.add_argument('--backend', type=str, default='nccl', - help='backend for parrallelisation (default: nccl)') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader ' - '(default: 0 - only main)')) - parser.add_argument('--prefetch', type=int, default=2, - help='prefetch data in DataLoader (default: 2)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables GPGPUs') - - args = parser.parse_args() - - if args.verbose: - args_list = [f"{key}: {val}" for key, val in args.items()] - print("PARSED ARGS:\n", '\n'.join(args_list)) - return args - - -def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): - model.train() - t_list = [] - loss_acc = 0 - if grank == 0: - print("\n") - for batch_idx, (data, target) in enumerate(train_loader): - # if grank == 0: - # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") - t = time.perf_counter() - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: - print( - f'Train epoch: {epoch} [{batch_idx * len(data)}/' - f'{len(train_loader.dataset)/gwsize} ' - f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' - f'{loss.item():.6f}') - t_list.append(time.perf_counter() - t) - loss_acc += loss.item() - if grank == 0: - print('TIMER: train time', sum(t_list) / len(t_list), 's') - return loss_acc - - -def test(model, device, test_loader, grank, gwsize): - model.eval() - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) - correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset) - if grank == 0: - print( - f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) - return acc_test - - -def save_state( - epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, is_best -): - """Save training state.""" - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = par_allgather_obj(is_best, gwsize) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - - -def seed_worker(worker_id): - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def par_allgather_obj(obj, gwsize): - """Gathers any object from the whole group in a list (to all workers)""" - res = [None]*gwsize - dist.all_gather_object(res, obj, group=None) - return res - - -def main(): - # get parse args - args = pars_ini() - - # check CUDA availibility - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # get directory - program_dir = os.getcwd() - - # start the time.time for profiling - st = time.time() - - # initializes the distributed backend which will take care of synchronizing - # nodes/GPUs - if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) - - # deterministic testrun - if args.testrun: - torch.manual_seed(args.nseed) - g = torch.Generator() - g.manual_seed(args.nseed) - - # get job rank info - rank==0 master gpu - if torch.cuda.is_available(): - # local world size - per node - lwsize = torch.cuda.device_count() if args.cuda else 0 - gwsize = dist.get_world_size() # global world size - per run - grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank() % lwsize # local rank - assign per node - else: - gwsize = 1 - grank = 0 - - # some debug - if grank == 0: - print('TIMER: initialise:', time.time()-st, 's') - print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:', sys.version, '\n') - - print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.restart_int:', args.restart_int, '\n') - - print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - print('DEBUG: args.lr:', args.lr) - print('DEBUG: args.momentum:', args.momentum) - print('DEBUG: args.shuff:', args.shuff, '\n') - - print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:', args.testrun) - print('DEBUG: args.nseed:', args.nseed) - print('DEBUG: args.log_int:', args.log_int, '\n') - - print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.nworker:', args.nworker) - print('DEBUG: args.prefetch:', args.prefetch) - print('DEBUG: args.cuda:', args.cuda) - print('DEBUG: args.benchrun:', args.benchrun, '\n') - - # encapsulate the model on the GPU assigned to the current process - device = torch.device( - 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) - if args.cuda: - torch.cuda.set_device(lrank) - # deterministic testrun - if args.testrun: - torch.cuda.manual_seed(args.nseed) - - # dataset - # Initialize transformations for data augmentation - transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - transforms.RandomRotation(degrees=45), - transforms.ColorJitter( - brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - - # Load the ImageNet Object Localization Challenge dataset - train_dataset = datasets.ImageFolder( - root=args.data_dir, - transform=transform - ) - # test_dataset = ... - - # restricts data loading to a subset of the dataset exclusive to the - # current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - # test_sampler = torch.utils.data.distributed.DistributedSampler( - # test_dataset, num_replicas=gwsize, rank=grank, - # shuffle=args.shuff) - - # distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False - - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size, - # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - # persistent_workers=pers_w, prefetch_factor=args.prefetch, - # **kwargs) - else: - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size) - - if grank == 0: - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model - model = torchvision.models.resnet152().to(device) - - # distribute model to workers - if torch.cuda.is_available(): - distrib_model = nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device) - else: - distrib_model = model - - # optimizer - # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) - optimizer = torch.optim.SGD( - distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - - # resume state - start_epoch = 1 - best_acc = np.Inf - nnod = os.environ.get('SLURM_NNODES', 'unk') - res_name = f'ddp-{nnod}N-checkpoint.pth.tar' - if os.path.isfile(res_name) and not args.benchrun: - try: - if torch.cuda.is_available(): - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) - else: - checkpoint = torch.load(program_dir+'/'+res_name) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: restarting from {start_epoch} epoch') - else: - print(f'WARNING: restarting from {start_epoch} epoch') - except Exception: - if torch.cuda.is_available(): - if grank == 0: - print('WARNING: restart file cannot ' - 'be loaded, restarting!') - else: - print('WARNING: restart file cannot be loaded, restarting!') - - if start_epoch >= args.epochs: - if torch.cuda.is_available(): - if grank == 0: - print('WARNING: given epochs are less than the one in the' - ' restart file!\n' - 'WARNING: SYS.EXIT is issued') - dist.barrier() - dist.destroy_process_group() - sys.exit() - else: - print('WARNING: given epochs are less than the one in the ' - 'restart file!\n' - 'WARNING: SYS.EXIT is issued') - sys.exit() - - # start trainin/testing loop - if grank == 0: - print('TIMER: broadcast:', time.time()-st, 's') - print('\nDEBUG: start training') - print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") - - et = time.time() - for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training - if args.benchrun and epoch == args.epochs: - # profiling (done on last epoch - slower!) - with torch.autograd.profiler.profile(use_cuda=args.cuda, - profile_memory=True) as prof: - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) - else: - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) - - # # testing - # acc_test = test(distrib_model, device, - # test_loader, grank, gwsize, args) - - # save first epoch timer - if epoch == start_epoch: - first_ep_t = time.time()-lt - - # final epoch - if epoch + 1 == args.epochs: - train_loader.last_epoch = True - # test_loader.last_epoch = True - - if grank == 0: - print('TIMER: epoch time:', time.time()-lt, 's') - epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) - # print('DEBUG: accuracy:', acc_test, '%') - if args.benchrun and epoch == args.epochs: - print('\n----------------------------------------------------') - print('DEBUG: benchmark of last epoch:\n') - what1 = 'cuda' if args.cuda else 'cpu' - print(prof.key_averages().table( - sort_by='self_'+str(what1)+'_time_total')) - - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0 and not args.benchrun: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best) - # reset best_acc - best_acc = min(loss_acc, best_acc) - - # finalise - # save final state - if not args.benchrun: - save_state(epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, True) - if torch.cuda.is_available(): - dist.barrier() - - # some debug - if grank == 0: - print('\n--------------------------------------------------------') - print('DEBUG: training results:\n') - print('TIMER: first epoch time:', first_ep_t, ' s') - print('TIMER: last epoch time:', time.time()-lt, ' s') - print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') - print('TIMER: total epoch time:', time.time()-et, ' s') - if epoch > 1: - print('TIMER: total epoch-1 time:', - time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', - (time.time()-et-first_ep_t)/(args.epochs-1), ' s') - if args.benchrun: - print('TIMER: total epoch-2 time:', lt-first_ep_t, ' s') - print('TIMER: average epoch-2 time:', - (lt-first_ep_t)/(args.epochs-2), ' s') - # print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n', - torch.cuda.memory_summary(0)) if args.cuda else '' - - if grank == 0: - print(f'TIMER: final time: {time.time()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_ddp-bl_{nnod}N.csv") - - print(f" - TRAINING FINISHED") - - # clean-up - if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() - - -if __name__ == "__main__": - main() - sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml deleted file mode 100644 index 8cada7be..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# I/O -data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train -restart_int: 10 -verbose: True - -# Model -batch_size: 64 -epochs: 3 -lr: 0.001 -momentum: 0.5 -shuff: False - -# Debugging -benchrun: False -testrun: False -nseed: 10 -log_int: 10 - -# Distributed ML -backend: nccl -nworker: 4 # num workers dataloader -prefetch: 2 -no_cuda: False diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh deleted file mode 100644 index 44522766..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/scaling-test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -rm *checkpoint.pth.tar *.out *.err *.csv - -timeout="01:01:00" -for N in 1 2 4 8 -do - sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh -done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh similarity index 95% rename from tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh rename to tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh index e85b9f1e..1bef78df 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp/ddp_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh @@ -24,7 +24,7 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py # set env -source ../../../../envAI_hdfml/bin/activate +source ../../../envAI_hdfml/bin/activate # job info debug=false @@ -52,7 +52,7 @@ if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then fi # launch training -TRAINING_CMD="DDP_trainer.py -c config.yaml" +TRAINING_CMD="DDP_trainer.py -c ddp-config.yaml" srun --cpu-bind=none bash -c "torchrun \ --log_dir='logs' \ diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml new file mode 100644 index 00000000..a3529f95 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml @@ -0,0 +1,18 @@ +# Data and logging +data_dir: tmp_data/ +log_int: 10 +verbose: True +nworker: 4 # num workers dataloader + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 + +# Reproducibility +rnd_seed: 10 + +# Distributed ML +backend: nccl +no_cuda: False + diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py deleted file mode 100644 index 3589278c..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/DS_trainer.py +++ /dev/null @@ -1,355 +0,0 @@ -""" -Scaling test of Microsoft Deepspeed on Imagenet using Resnet. -""" -import argparse -import sys -import os -import time -import random -import numpy as np -import deepspeed - -import torch -import torch.distributed as dist -import torch.nn.functional as F -import torchvision -from torchvision import datasets, transforms - -from itwinai.parser import ArgumentParser as ItAIArgumentParser -from itwinai.loggers import EpochTimeTracker - - -def parsIni(): - parser = ItAIArgumentParser( - description='PyTorch Imagenet scaling test') - parser.add_argument('--batch-size', type=int, default=64, metavar='N', - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, metavar='N', - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, metavar='LR', - help='learning rate (default: 0.01)') - parser.add_argument('--log-int', type=int, default=100, metavar='N', - help=( - 'how many batches to wait before logging ' - 'training status')) - parser.add_argument('--data-dir', default='./', - help=('location of the training dataset in the local ' - 'filesystem')) - parser.add_argument('--backend', type=str, default='nccl', metavar='N', - help='backend for parrallelisation (default: nccl)') - parser.add_argument('--restart-int', type=int, default=10, metavar='N', - help='restart int per epoch (default: 10)') - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run (default: False)') - parser.add_argument('--local_rank', type=int, default=-1, - help='local rank passed from distributed launcher') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader ' - '(default: 0 - only main)')) - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') - # parse to deepspeed - parser = deepspeed.add_config_arguments(parser) - args = parser.parse_args() - if args.verbose: - args_list = [f"{key}: {val}" for key, val in args.items()] - print("PARSED ARGS:\n", '\n'.join(args_list)) - - return args - - -def train(args, model, train_loader, optimizer, epoch, grank, gwsize): - device = model.local_rank - t_list = [] - loss_acc = 0 - if grank == 0: - print("\n") - for batch_idx, (data, target) in enumerate(train_loader): - # if grank == 0: - # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") - t = time.perf_counter() - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: - print( - f'Train epoch: {epoch} [{batch_idx * len(data)}/' - f'{len(train_loader.dataset)/gwsize} ' - f'({100.0 * batch_idx *len(data) / len(train_loader):.0f}%)]' - '\t\tLoss: {loss.item():.6f}') - t_list.append(time.perf_counter() - t) - loss_acc += loss.item() - if grank == 0: - print('TIMER: train time', sum(t_list) / len(t_list), 's') - return loss_acc - - -def test(model, test_loader, grank, gwsize): - device = model.local_rank - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) - correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset) - if grank == 0: - print( - f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) - return acc_test - - -def save_state( - epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, is_best -): - """Save training state.""" - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = par_allgather_obj(is_best, gwsize) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - - -def seed_worker(worker_id): - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def par_allgather_obj(obj, gwsize): - """Gathers any object from the whole group in a list (to all workers)""" - res = [None]*gwsize - dist.all_gather_object(res, obj, group=None) - return res - - -def main(): - # get parse args - args = parsIni() - - # limit # of CPU threads to be used per worker - torch.set_num_threads(1) - - # get directory - program_dir = os.getcwd() - - # start the time.time for profiling - st = time.time() - - # initializes the distributed backend - deepspeed.init_distributed(dist_backend=args.backend) - - # get job rank info - rank==0 master gpu - gwsize = dist.get_world_size() # global world size - per run - lwsize = torch.cuda.device_count() # local world size - per node - grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank() % lwsize # local rank - assign per node - - # some debug - if grank == 0: - print('TIMER: initialise:', time.time()-st, 's') - print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:', sys.version) - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - print('DEBUG: args.lr:', args.lr) - print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.log_int:', args.log_int) - print('DEBUG: args.restart_int:', args.restart_int) - print('DEBUG: args.testrun:', args.testrun, '\n') - - # encapsulate the model on the GPU assigned to the current process - torch.cuda.set_device(lrank) - - # read training dataset - # Initialize transformations for data augmentation - transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - transforms.RandomRotation(degrees=45), - transforms.ColorJitter( - brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - - # Load the ImageNet Object Localization Challenge dataset - train_dataset = datasets.ImageFolder( - root=args.data_dir, - transform=transform - ) - # test_dataset = ... - - # # distribute test dataset - # test_sampler = torch.utils.data.distributed.DistributedSampler( - # test_dataset, num_replicas=gwsize, rank=grank) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size, - # sampler=test_sampler, num_workers=0, pin_memory=True, shuffle=False) - - if grank == 0: - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model - model = torchvision.models.resnet152() - - # Initialize DeepSpeed to use the following features - # 1) Distributed model - # 2) DeepSpeed optimizer - # 3) Distributed data loader - deepspeed_config = { - "train_micro_batch_size_per_gpu": args.batch_size, - "optimizer": { - "type": "SGD", - "params": { - "lr": args.lr, - "momentum": 0.5 - } - }, - "fp16": { - "enabled": False - }, - "zero_optimization": False - } - distrib_model, optimizer, train_loader, _ = deepspeed.initialize( - args=args, model=model, model_parameters=model.parameters(), - training_data=train_dataset, config_params=deepspeed_config) - - # optimizer - # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) - # optimizer = torch.optim.SGD( - # distrib_model.parameters(), lr=args.lr, momentum=0.5) - - # resume state - start_epoch = 1 - best_acc = np.Inf - nnod = os.environ.get('SLURM_NNODES', 'unk') - res_name = f'ds-{nnod}N-checkpoint.pth.tar' - if os.path.isfile(res_name): - try: - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} - checkpoint = torch.load(program_dir+'/'+res_name, map_location=loc) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if grank == 0: - print(f'WARNING: restarting from {start_epoch} epoch') - except Exception: - if grank == 0: - print('WARNING: restart file cannot be loaded, restarting!') - - if start_epoch >= args.epochs+1: - if grank == 0: - print('WARNING: given epochs are less than the ' - 'one in the restart file!\n' - 'WARNING: SYS.EXIT is issued') - deepspeed.sys.exit() - sys.exit() - - # start trainin/testing loop - if grank == 0: - print('TIMER: broadcast:', time.time()-st, 's') - print('\nDEBUG: start training') - print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name="deepspeed-bl") - - et = time.time() - for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training - loss_acc = train(args, distrib_model, train_loader, - optimizer, epoch, grank, gwsize) - - # testing - # acc_test = test(distrib_model, test_loader, grank, gwsize) - - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best) - # reset best_acc - best_acc = min(loss_acc, best_acc) - - if grank == 0: - print('TIMER: epoch time:', time.time()-lt, 's') - epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) - # print('DEBUG: accuracy:', acc_test, '%') - - # finalise - # save final state - save_state(epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, True) - dist.barrier() - - # some debug - if grank == 0: - print('\n--------------------------------------------------------') - print('DEBUG: results:\n') - print('TIMER: last epoch time:', time.time()-lt, 's') - print('TIMER: total epoch time:', time.time()-et, 's') - # print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', int( - torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') - - if grank == 0: - print(f'TIMER: final time: {time.time()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_deepspeed-bl_{nnod}N.csv") - - print(f" - TRAINING FINISHED") - - # clean-up - deepspeed.sys.exit() - - -if __name__ == "__main__": - main() - sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml deleted file mode 100644 index 879f94fb..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/config.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# I/O -data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train -restart_int: 10 -verbose: True - -# Model -batch_size: 64 -epochs: 3 -lr: 0.001 - -# Debugging -testrun: False -log_int: 10 - -# Distributed ML -backend: nccl -nworker: 4 # num workers dataloader diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh deleted file mode 100644 index 2b34df6a..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/scaling-test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -rm *checkpoint.pth.tar *.out *.err *.csv - -timeout="01:01:00" -for N in 1 2 4 8 -do - sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh -done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh similarity index 95% rename from tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh rename to tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh index e4cc784a..ce5f04b3 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh @@ -24,7 +24,7 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py # set env -source ../../../../envAI_hdfml/bin/activate +source ../../../envAI_hdfml/bin/activate # job info debug=false @@ -57,7 +57,7 @@ MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i export MASTER_ADDR export MASTER_PORT=29500 -TRAINING_CMD="DS_trainer.py -c config.yaml" +TRAINING_CMD="DS_trainer.py -c deepspeed-config.yaml" # Run without launcher: set --ntasks-per-node=NUM_GPUS srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml similarity index 57% rename from tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml rename to tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml index 50a408ad..eaddc9d2 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml @@ -1,20 +1,22 @@ # I/O -data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train +data_dir: tmp_data/ +log_int: 10 verbose: True +nworker: 4 # num workers dataloader +prefetch: 2 # Model batch_size: 64 epochs: 3 lr: 0.001 momentum: 0.5 -use_adasum: False +shuff: False # Debugging -seed: 10 -log_interval: 10 +rnd_seed: 10 # Distributed ML -nworker: 4 # num workers dataloader no_cuda: False fp16_allreduce: False +use_adasum: False gradient_predivide_factor: 1.0 \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py deleted file mode 100755 index 10cbdd08..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/horovod_trainer.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Scaling test of Horovod on Imagenet using Resnet. -""" -import argparse -import os -import sys -from timeit import default_timer as timer - -import torch.multiprocessing as mp -import torch.nn.functional as F -import torch.optim as optim -import torch.utils.data.distributed -import horovod.torch as hvd -import torchvision -from torchvision import datasets, transforms - -from itwinai.parser import ArgumentParser as ItAIArgumentParser -from itwinai.loggers import EpochTimeTracker - - -def parsIni(): - parser = ItAIArgumentParser(description='PyTorch Imagenet Example') - parser.add_argument('--batch-size', type=int, default=64, metavar='N', - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, metavar='N', - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, metavar='LR', - help='learning rate (default: 0.01)') - parser.add_argument('--momentum', type=float, default=0.5, metavar='M', - help='SGD momentum (default: 0.5)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables CUDA training') - parser.add_argument('--seed', type=int, default=42, metavar='S', - help='random seed (default: 42)') - parser.add_argument('--log-interval', type=int, default=100, metavar='N', - help='#batches to wait before logging training status') - parser.add_argument('--fp16-allreduce', action='store_true', default=False, - help='use fp16 compression during allreduce') - parser.add_argument('--use-adasum', action='store_true', default=False, - help='use adasum algorithm to do reduction') - parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, - help=('apply gradient predivide factor in optimizer ' - '(default: 1.0)')) - parser.add_argument('--data-dir', default='./', - help=('location of the training dataset in the ' - 'local filesystem')) - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader ' - '(default: 0 - only main)')) - - args = parser.parse_args() - if args.verbose: - args_list = [f"{key}: {val}" for key, val in args.items()] - print("PARSED ARGS:\n", '\n'.join(args_list)) - - return args - - -def train(epoch): - model.train() - # Horovod: set epoch to sampler for shuffling - train_sampler.set_epoch(epoch) - for batch_idx, (data, target) in enumerate(train_loader): - # if hvd.local_rank() == 0 and hvd.rank() == 0: - # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") - if args.cuda: - data, target = data.cuda(), target.cuda() - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_interval == 0: - # Horovod: use train_sampler to determine the number of examples in - # this worker's partition - print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( - epoch, batch_idx * len(data), len(train_sampler), - 100. * batch_idx / len(train_loader), loss.item())) - - -def metric_average(val, namegiv): - tensor = torch.tensor(val) - avg_tensor = hvd.allreduce(tensor, name=namegiv) - return avg_tensor.item() - - -# def test(): -# model.eval() -# test_loss = 0. -# test_accuracy = 0. -# for data, target in test_loader: -# if args.cuda: -# data, target = data.cuda(), target.cuda() -# output = model(data) -# # sum up batch loss -# test_loss += F.nll_loss(output, target, size_average=False).item() -# # get the index of the max log-probability -# pred = output.data.max(1, keepdim=True)[1] -# test_accuracy += \ -# pred.eq(target.data.view_as(pred)).cpu().float().sum() - -# # Horovod: use test_sampler to determine the number of examples in -# # this worker's partition -# test_loss /= len(test_sampler) -# test_accuracy /= len(test_sampler) - -# # Horovod: average metric values across workers -# test_loss = metric_average(test_loss, 'avg_loss') -# test_accuracy = metric_average(test_accuracy, 'avg_accuracy') - -# # Horovod: print output only on first rank -# if hvd.rank() == 0: -# print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format( -# test_loss, 100. * test_accuracy)) - - -if __name__ == '__main__': - # get parse args - args = parsIni() - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # Horovod: init - st = timer() - hvd.init() - torch.manual_seed(args.seed) - - # some debug - if hvd.rank() == 0 and hvd.local_rank() == 0: - print('DEBUG: sys.version:', sys.version) - print('DEBUG: torch.cuda.is_available():', torch.cuda.is_available()) - print('DEBUG: torch.cuda.current_device():', - torch.cuda.current_device()) - print('DEBUG: torch.cuda.device_count():', torch.cuda.device_count()) - print('DEBUG: torch.cuda.get_device_properties(hvd.local_rank()):', - torch.cuda.get_device_properties(hvd.local_rank())) - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - - if hvd.rank() == 0 and hvd.local_rank() == 0: - print('TIMER: initialise:', timer()-st, 's') - - if args.cuda: - # Horovod: pin GPU to local rank - torch.cuda.set_device(hvd.local_rank()) - torch.cuda.manual_seed(args.seed) - - # Horovod: limit # of CPU threads to be used per worker - torch.set_num_threads(1) - - # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} - kwargs = {'num_workers': args.nworker, - 'pin_memory': True} if args.cuda else {} - # When supported, use 'forkserver' to spawn dataloader workers instead... - # issues with Infiniband implementations that are not fork-safe - if (kwargs.get('num_workers', 0) > 0 and hasattr(mp, '_supports_context') - and - mp._supports_context and - 'forkserver' in mp.get_all_start_methods()): - kwargs['multiprocessing_context'] = 'forkserver' - - # Initialize transformations for data augmentation - transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - transforms.RandomRotation(degrees=45), - transforms.ColorJitter( - brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - - # Load the ImageNet Object Localization Challenge dataset - train_dataset = datasets.ImageFolder( - root=args.data_dir, - transform=transform - ) - # test_dataset = ... - - # Horovod: use DistributedSampler to partition the training data - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, - sampler=train_sampler, **kwargs) - - # create CNN model - model = torchvision.models.resnet152() - - # by default, Adasum doesn't need scaling up learning rate - lr_scaler = hvd.size() if not args.use_adasum else 1 - - if args.cuda: - # move model to GPU. - model.cuda() - # if using GPU Adasum allreduce, scale learning rate by local_size - if args.use_adasum and hvd.nccl_built(): - lr_scaler = hvd.local_size() - - # Horovod: scale learning rate by lr_scaler - optimizer = optim.SGD(model.parameters(), lr=args.lr * lr_scaler, - momentum=args.momentum) - - # Horovod: broadcast parameters & optimizer state - hvd.broadcast_parameters(model.state_dict(), root_rank=0) - hvd.broadcast_optimizer_state(optimizer, root_rank=0) - - # Horovod: (optional) compression algorithm - compression = ( - hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none - ) - - # Horovod: wrap optimizer with DistributedOptimizer - optimizer = hvd.DistributedOptimizer( - optimizer, - named_parameters=model.named_parameters(), - compression=compression, - op=hvd.Adasum if args.use_adasum else hvd.Average, - gradient_predivide_factor=args.gradient_predivide_factor) - - if hvd.rank() == 0 and hvd.local_rank() == 0: - print('TIMER: broadcast:', timer()-st, 's') - epoch_time_tracker = EpochTimeTracker(series_name="horovod-bl") - - et = timer() - for epoch in range(1, args.epochs + 1): - lt = timer() - train(epoch) - # test() - print('TIMER: hvd.rank():', hvd.rank(), - 'hvd.local_rank():', hvd.local_rank(), - ', epoch time:', timer()-lt, 's') - - if hvd.rank() == 0 and hvd.local_rank() == 0: - epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) - - print('TIMER: last epoch time:', timer()-lt, 's') - print('TIMER: total epoch time:', timer()-et, 's') - - if hvd.rank() == 0 and hvd.local_rank() == 0: - print('\n', torch.cuda.memory_summary(0), '\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_horovod-bl_{nnod}N.csv") - - print('DEBUG: hvd.rank():', hvd.rank(), - 'hvd.local_rank():', hvd.local_rank(), - ', torch.cuda.memory_reserved():', - int(torch.cuda.memory_reserved(hvd.local_rank())/1024/1024), 'MB') - - if hvd.rank() == 0 and hvd.local_rank() == 0: - print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(hvd.local_rank())/1024/1024), - 'MB') - - print(f" - TRAINING FINISHED") diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh deleted file mode 100644 index 33f9ca37..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/scaling-test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -rm *checkpoint.pth.tar *.out *.err *.csv - -timeout="01:01:00" -for N in 1 2 4 8 -do - sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout hvd_slurm.sh -done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py new file mode 100755 index 00000000..64b831b2 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py @@ -0,0 +1,309 @@ +""" +Scaling test of Horovod on Imagenet using Resnet. +""" +from typing import Optional +import argparse +import os +import sys +from timeit import default_timer as timer + +import torch.multiprocessing as mp +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data.distributed +import horovod.torch as hvd +import torchvision + +from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker + +from utils import imagenet_dataset, seed_worker, set_seed + + +def parse_params(): + parser = ItAIArgumentParser(description='PyTorch Imagenet Example') + + # Data and logging + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the ' + 'local filesystem')) + parser.add_argument('--log-int', type=int, default=100, + help=('#batches to wait before logging training ' + 'status. Disabled if < 0.')) + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader ' + '(default: 0 - only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + + # Model + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='SGD momentum (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # Reproducibility + parser.add_argument('--rnd-seed', type=Optional[int], default=None, + help='seed integer for reproducibility (default: 0)') + + # Distributed ML + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--fp16-allreduce', action='store_true', default=False, + help='use fp16 compression during allreduce') + parser.add_argument('--use-adasum', action='store_true', default=False, + help='use adasum algorithm to do reduction') + parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, + help=('apply gradient pre-divide factor in optimizer ' + '(default: 1.0)')) + + args = parser.parse_args() + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train( + model, optimizer, train_sampler, train_loader, args, use_cuda, epoch +): + model.train() + is_main_worker = hvd.local_rank() == 0 and hvd.rank() == 0 + t_list = [] + loss_acc = 0 + if is_main_worker: + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + # if hvd.local_rank() == 0 and hvd.rank() == 0: + # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") + t = timer() + if use_cuda: + data, target = data.cuda(), target.cuda() + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if (args.log_int > 0 and batch_idx % args.log_int == 0 + and is_main_worker): + # Use train_sampler to determine the number of examples in + # this worker's partition + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_sampler), + 100. * batch_idx / len(train_loader), loss.item())) + t_list.append(timer() - t) + loss_acc += loss.item() + if is_main_worker: + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def main(): + # Parse CLI args + args = parse_params() + + # Check resources availability + use_cuda = not args.no_cuda and torch.cuda.is_available() + is_distributed = False + if use_cuda and torch.cuda.device_count() > 0: + is_distributed = True + + # Start the time.time for profiling + st = timer() + + if is_distributed: + # Initializes the distributed backend which will + # take care of synchronizing the workers (nodes/GPUs) + hvd.init() + + # Set random seed for reproducibility + torch_prng = set_seed(args.rnd_seed, use_cuda) + + is_main_worker = True + if is_distributed and (hvd.rank() != 0 or hvd.local_rank() != 0): + is_main_worker = False + + # Get local rank + if is_distributed: + lrank = hvd.local_rank() + grank = hvd.rank() + gwsize = hvd.size() + lwsize = torch.cuda.device_count() + else: + # Use a single worker (either on GPU or CPU) + lrank = 0 + grank = 0 + gwsize = 1 + lwsize = 1 + + if is_main_worker: + print('TIMER: initialise:', timer()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff) + print('DEBUG: args.rnd_seed:', args.rnd_seed) + print('DEBUG: args.no_cuda:', args.no_cuda) + print('DEBUG: args.fp16_allreduce:', args.fp16_allreduce) + print('DEBUG: args.use_adasum:', args.use_adasum) + print('DEBUG: args.gradient_predivide_factor:', + args.gradient_predivide_factor) + if use_cuda: + print('DEBUG: torch.cuda.is_available():', + torch.cuda.is_available()) + print('DEBUG: torch.cuda.current_device():', + torch.cuda.current_device()) + print('DEBUG: torch.cuda.device_count():', + torch.cuda.device_count()) + print('DEBUG: torch.cuda.get_device_properties(hvd.local_rank()):', + torch.cuda.get_device_properties(hvd.local_rank())) + + if use_cuda: + # Pin GPU to local rank + torch.cuda.set_device(lrank) + + # Limit # of CPU threads to be used per worker + # torch.set_num_threads(1) + + # Dataset + train_dataset = imagenet_dataset(args.data_dir) + + kwargs = {} + # When supported, use 'forkserver' to spawn dataloader workers instead... + # issues with Infiniband implementations that are not fork-safe + if (args.nworker > 0 and hasattr(mp, '_supports_context') + and + mp._supports_context and + 'forkserver' in mp.get_all_start_methods()): + kwargs['multiprocessing_context'] = 'forkserver' + + if is_distributed: + # Use DistributedSampler to partition the training data + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) + ) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker, **kwargs) + else: + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) + + # Create CNN model + model = torchvision.models.resnet152() + + if use_cuda: + model.cuda() + + if is_distributed: + # By default, Adasum doesn't need scaling up learning rate + lr_scaler = hvd.size() if not args.use_adasum else 1 + # If using GPU Adasum allreduce, scale learning rate by local_size + if args.use_adasum and hvd.nccl_built(): + lr_scaler = hvd.local_size() + # Scale learning rate by lr_scaler + args.lr *= lr_scaler + + optimizer = optim.SGD(model.parameters(), lr=args.lr, + momentum=args.momentum) + + if is_distributed: + # Broadcast parameters & optimizer state + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=0) + + # Compression algorithm + compression = ( + hvd.Compression.fp16 if args.fp16_allreduce + else hvd.Compression.none + ) + + # Wrap optimizer with DistributedOptimizer + optimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + compression=compression, + op=hvd.Adasum if args.use_adasum else hvd.Average, + gradient_predivide_factor=args.gradient_predivide_factor) + + if is_main_worker: + print('TIMER: broadcast:', timer()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + epoch_time_tracker = EpochTimeTracker(series_name="horovod-bl") + + et = timer() + start_epoch = 1 + for epoch in range(start_epoch, args.epochs + 1): + lt = timer() + if is_distributed: + # Inform the sampler that a new epoch started: shuffle + # may be needed + train_sampler.set_epoch(epoch) + train(model, optimizer, train_sampler, + train_loader, args, use_cuda, epoch) + + # Save first epoch timer + if epoch == start_epoch: + first_ep_t = timer()-lt + + # Final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + + if is_main_worker: + print('TIMER: epoch time:', timer()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + + if is_main_worker: + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', timer()-lt, 's') + print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + timer()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (timer()-et-first_ep_t)/(args.epochs-1), ' s') + if use_cuda: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) + print(f'TIMER: final time: {timer()-st} s\n') + + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker.save( + csv_file=f"epochtime_horovod-bl_{nnod}N.csv") + + print(f" - TRAINING FINISHED") + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh similarity index 93% rename from tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh rename to tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh index e76532b1..02a7280a 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh @@ -24,7 +24,7 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py # set env -source ../../../../envAI_hdfml/bin/activate +source ../../../envAI_hdfml/bin/activate # job info debug=false @@ -54,7 +54,7 @@ fi export CUDA_VISIBLE_DEVICES="0,1,2,3" # launch training -TRAINING_CMD="horovod_trainer.py -c config.yaml" +TRAINING_CMD="horovod_trainer.py -c horovod-config.yaml" srun --cpu-bind=none python -u $TRAINING_CMD diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index eeae5448..82c60bc9 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -1,15 +1,9 @@ #!/bin/bash # Run all versions of distributed ML version -for fold in ddp horovod deepspeed +for name in ddp hvd deepspeed do - cd $fold rm *checkpoint.pth.tar *.out *.err *.csv - if [ $fold == "horovod" ] - then - fold="hvd" - fi # echo $fold" training: $(sbatch --nodes=1 $fold"_slurm.sh")" - echo $fold" training: $(sbatch $fold"_slurm.sh")" - cd .. + echo $name" training: $(sbatch $name"_slurm.sh")" done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh new file mode 100644 index 00000000..0964bc9e --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +rm *checkpoint.pth.tar *.out *.err *.csv + +timeout="01:01:00" +for N in 1 2 4 8 +do + sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh + sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout hvd_slurm.sh + sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh +done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/utils.py b/tutorials/distributed-ml/torch-scaling-test/utils.py new file mode 100644 index 00000000..fb6b55de --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/utils.py @@ -0,0 +1,52 @@ +from typing import Optional +import numpy as np +import random + +import torch +from torchvision import datasets, transforms + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def set_seed(rnd_seed: Optional[int], use_cuda: bool) -> torch.Generator: + """Set torch random seed and return a PRNG object. + + Args: + rnd_seed (Optional[int]): random seed. If None, the seed is not set. + use_cuda (bool): whether GPU is available. + + Returns: + torch.Generator: PRNG object. + """ + g = torch.Generator() + if rnd_seed is not None: + # Deterministic execution + torch.manual_seed(rnd_seed) + g.manual_seed(rnd_seed) + if use_cuda: + torch.cuda.manual_seed(rnd_seed) + return g + + +def imagenet_dataset(data_root: str): + """Create a torch dataset object for Imagenet.""" + transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=45), + transforms.ColorJitter( + brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + imagenet = datasets.ImageFolder( + root=data_root, + transform=transform + ) + return imagenet From 01013a6bc0638ceacb3a2382038824e9bfdb6b14 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Thu, 11 Apr 2024 19:13:59 +0200 Subject: [PATCH 092/171] Log epoch time in real-time --- src/itwinai/loggers.py | 16 +++++++++++++--- .../torch-scaling-test/DDP_trainer.py | 12 +++++++----- .../torch-scaling-test/DS_trainer.py | 11 +++++++---- .../torch-scaling-test/horovod_trainer.py | 12 +++++++----- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/itwinai/loggers.py b/src/itwinai/loggers.py index e553a1b0..d5ed0008 100644 --- a/src/itwinai/loggers.py +++ b/src/itwinai/loggers.py @@ -452,15 +452,25 @@ def log( class EpochTimeTracker: - def __init__(self, series_name: str) -> None: + def __init__(self, series_name: str, csv_file: str) -> None: self.series_name = series_name self._data = [] + self.csv_file = csv_file + with open(csv_file, 'w') as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(['name', 'nodes', 'epoch_id', 'time']) def add_epoch_time(self, epoch_idx, time): n_nodes = os.environ.get('SLURM_NNODES', -1) - self._data.append((self.series_name, n_nodes, epoch_idx, time)) + fields = (self.series_name, n_nodes, epoch_idx, time) + self._data.append(fields) + with open(self.csv_file, 'a') as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(fields) - def save(self, csv_file: str): + def save(self, csv_file: Optional[str] = None): + if not csv_file: + csv_file = self.csv_file with open(csv_file, 'w') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerow(['name', 'nodes', 'epoch_id', 'time']) diff --git a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py index 8afc6a4e..70fcecfc 100755 --- a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py @@ -6,6 +6,7 @@ import sys import os from timeit import default_timer as timer +import time import torch import torch.distributed as dist @@ -195,7 +196,11 @@ def main(): print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker = EpochTimeTracker( + series_name="ddp-bl", + csv_file=f"epochtime_ddp-bl_{nnod}N.csv" + ) et = timer() start_epoch = 1 @@ -241,10 +246,7 @@ def main(): torch.cuda.memory_summary(0)) print(f'TIMER: final time: {timer()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_ddp-bl_{nnod}N.csv") - + time.sleep(1) print(f" - TRAINING FINISHED") # Clean-up diff --git a/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py index fdeac4eb..987f6488 100644 --- a/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py @@ -6,6 +6,7 @@ import sys import os from timeit import default_timer as timer +import time import deepspeed import torch @@ -184,7 +185,11 @@ def main(): print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name="deepspeed-bl") + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker = EpochTimeTracker( + series_name="deepspeed-bl", + csv_file=f"epochtime_deepspeed-bl_{nnod}N.csv" + ) et = timer() start_epoch = 1 @@ -227,10 +232,8 @@ def main(): print('DEBUG: memory summary:\n\n', torch.cuda.memory_summary(0)) print(f'TIMER: final time: {timer()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_deepspeed-bl_{nnod}N.csv") + time.sleep(1) print(f" - TRAINING FINISHED") # Clean-up diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py index 64b831b2..42f39442 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py @@ -6,6 +6,7 @@ import os import sys from timeit import default_timer as timer +import time import torch.multiprocessing as mp import torch.nn.functional as F @@ -253,7 +254,11 @@ def main(): print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name="horovod-bl") + nnod = os.environ.get('SLURM_NNODES', 'unk') + epoch_time_tracker = EpochTimeTracker( + series_name="horovod-bl", + csv_file=f"epochtime_horovod-bl_{nnod}N.csv" + ) et = timer() start_epoch = 1 @@ -297,10 +302,7 @@ def main(): torch.cuda.memory_summary(0)) print(f'TIMER: final time: {timer()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_horovod-bl_{nnod}N.csv") - + time.sleep(1) print(f" - TRAINING FINISHED") From 71f9f9cca551e537a0dabbfb6a4cc9273785e05c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 10:27:56 +0200 Subject: [PATCH 093/171] FIX deepspeed dataloader for potential performances improvement --- .../torch-scaling-test/ddp-config.yaml | 4 +- .../torch-scaling-test/ddp_slurm.sh | 2 +- .../{DDP_trainer.py => ddp_trainer.py} | 14 ++-- .../torch-scaling-test/deepspeed-config.yaml | 5 +- .../torch-scaling-test/deepspeed_slurm.sh | 6 +- .../{DS_trainer.py => deepspeed_trainer.py} | 42 ++++++++++-- .../torch-scaling-test/horovod-config.yaml | 4 +- .../{hvd_slurm.sh => horovod_slurm.sh} | 4 +- .../torch-scaling-test/horovod_trainer.py | 64 ++++++++++--------- .../torch-scaling-test/runall.sh | 5 +- .../torch-scaling-test/scaling-test.sh | 4 +- .../torch-scaling-test/utils.py | 5 +- 12 files changed, 101 insertions(+), 58 deletions(-) rename tutorials/distributed-ml/torch-scaling-test/{DDP_trainer.py => ddp_trainer.py} (96%) rename tutorials/distributed-ml/torch-scaling-test/{DS_trainer.py => deepspeed_trainer.py} (84%) rename tutorials/distributed-ml/torch-scaling-test/{hvd_slurm.sh => horovod_slurm.sh} (95%) diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml index 583a5132..35d46333 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml @@ -1,12 +1,12 @@ # Data and logging -data_dir: tmp_data/ +data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ log_int: 10 verbose: True nworker: 4 # num workers dataloader prefetch: 2 # Model -batch_size: 64 +batch_size: 64 # micro batch size epochs: 3 lr: 0.001 momentum: 0.5 diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh index 1bef78df..327a1493 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh @@ -52,7 +52,7 @@ if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then fi # launch training -TRAINING_CMD="DDP_trainer.py -c ddp-config.yaml" +TRAINING_CMD="ddp_trainer.py -c ddp-config.yaml" srun --cpu-bind=none bash -c "torchrun \ --log_dir='logs' \ diff --git a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py similarity index 96% rename from tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py rename to tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py index 70fcecfc..30ffdab5 100755 --- a/tutorials/distributed-ml/torch-scaling-test/DDP_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py @@ -12,6 +12,8 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler import torchvision from itwinai.parser import ArgumentParser as ItAIArgumentParser @@ -84,7 +86,7 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if args.log_int > 0 and batch_idx % args.log_int == 0 and grank == 0: + if grank == 0 and args.log_int > 0 and batch_idx % args.log_int == 0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/' f'{len(train_loader.dataset)/gwsize} ' @@ -159,12 +161,12 @@ def main(): if is_distributed: # Distributed sampler restricts data loading to a subset of the dataset # exclusive to the current process - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, + train_sampler = DistributedSampler( + train_dataset, # num_replicas=gwsize, rank=grank, shuffle=(args.shuff and args.rnd_seed is None) ) - train_loader = torch.utils.data.DataLoader( + train_loader = DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.nworker, pin_memory=True, persistent_workers=(args.nworker > 1), @@ -172,7 +174,7 @@ def main(): worker_init_fn=seed_worker ) else: - train_loader = torch.utils.data.DataLoader( + train_loader = DataLoader( train_dataset, batch_size=args.batch_size, generator=torch_prng, worker_init_fn=seed_worker ) @@ -210,6 +212,8 @@ def main(): # Inform the sampler that a new epoch started: shuffle # may be needed train_sampler.set_epoch(epoch) + + # Training train(model, device, train_loader, optimizer, epoch, grank, gwsize, args) # Save first epoch timer diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml index a3529f95..21e389c1 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml @@ -1,13 +1,14 @@ # Data and logging -data_dir: tmp_data/ +data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ log_int: 10 verbose: True nworker: 4 # num workers dataloader # Model -batch_size: 64 +batch_size: 64 # micro batch size epochs: 3 lr: 0.001 +shuff: False # Reproducibility rnd_seed: 10 diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh index ce5f04b3..7e35cac0 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh @@ -5,8 +5,8 @@ #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL -#SBATCH --output=job-ds.out -#SBATCH --error=job-ds.err +#SBATCH --output=job-deepspeed.out +#SBATCH --error=job-deepspeed.err #SBATCH --time=00:30:00 # configure node and process count on the CM @@ -57,7 +57,7 @@ MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i export MASTER_ADDR export MASTER_PORT=29500 -TRAINING_CMD="DS_trainer.py -c deepspeed-config.yaml" +TRAINING_CMD="deepspeed_trainer.py -c deepspeed-config.yaml" # Run without launcher: set --ntasks-per-node=NUM_GPUS srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed diff --git a/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py similarity index 84% rename from tutorials/distributed-ml/torch-scaling-test/DS_trainer.py rename to tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index 987f6488..1e0a76da 100644 --- a/tutorials/distributed-ml/torch-scaling-test/DS_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -12,12 +12,14 @@ import torch import torch.distributed as dist import torch.nn.functional as F +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler import torchvision from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker -from utils import imagenet_dataset +from utils import seed_worker, set_seed, imagenet_dataset def parse_params(): @@ -43,6 +45,8 @@ def parse_params(): help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') # Reproducibility parser.add_argument('--rnd-seed', type=Optional[int], default=None, @@ -115,9 +119,8 @@ def main(): if is_distributed: deepspeed.init_distributed(dist_backend=args.backend) - if args.rnd_seed is not None: - # Deterministic execution - torch.manual_seed(args.rnd_seed) + # Set random seed for reproducibility + torch_prng = set_seed(args.rnd_seed, use_cuda) if is_distributed: # Get job rank info - rank==0 master gpu @@ -143,6 +146,7 @@ def main(): print('DEBUG: args.batch_size:', args.batch_size) print('DEBUG: args.epochs:', args.epochs) print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.shuff:', args.shuff) print('DEBUG: args.rnd_seed:', args.rnd_seed) print('DEBUG: args.backend:', args.backend) print('DEBUG: args.local_rank:', args.local_rank) @@ -155,6 +159,27 @@ def main(): # Read training dataset train_dataset = imagenet_dataset(args.data_dir) + if is_distributed: + # Distributed sampler restricts data loading to a subset of the dataset + # exclusive to the current process + train_sampler = DistributedSampler( + train_dataset, # num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) + ) + + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) + else: + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) + # Create CNN model model = torchvision.models.resnet152() @@ -163,7 +188,7 @@ def main(): # 2) DeepSpeed optimizer # 3) Distributed data loader deepspeed_config = { - "train_micro_batch_size_per_gpu": args.batch_size, + # "train_micro_batch_size_per_gpu": args.batch_size, "optimizer": { "type": "SGD", "params": { @@ -176,7 +201,7 @@ def main(): }, "zero_optimization": False } - distrib_model, optimizer, train_loader, _ = deepspeed.initialize( + distrib_model, optimizer, deepspeed_train_loader, _ = deepspeed.initialize( args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset, config_params=deepspeed_config) @@ -195,6 +220,11 @@ def main(): start_epoch = 1 for epoch in range(start_epoch, args.epochs + 1): lt = timer() + if is_distributed: + # Inform the sampler that a new epoch started: shuffle + # may be needed + train_sampler.set_epoch(epoch) + # Training train(args, distrib_model, train_loader, optimizer, epoch, grank, gwsize) diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml index eaddc9d2..135007b2 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml @@ -1,12 +1,12 @@ # I/O -data_dir: tmp_data/ +data_dir: tmp_data/ # /p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ log_int: 10 verbose: True nworker: 4 # num workers dataloader prefetch: 2 # Model -batch_size: 64 +batch_size: 64 # micro batch size epochs: 3 lr: 0.001 momentum: 0.5 diff --git a/tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh similarity index 95% rename from tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh rename to tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh index 02a7280a..9ac1760d 100644 --- a/tutorials/distributed-ml/torch-scaling-test/hvd_slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh @@ -5,8 +5,8 @@ #SBATCH --account=intertwin #SBATCH --mail-user= #SBATCH --mail-type=ALL -#SBATCH --output=job-hvd.out -#SBATCH --error=job-hvd.err +#SBATCH --output=job-horovod.out +#SBATCH --error=job-horovod.err #SBATCH --time=00:30:00 # configure node and process count on the CM diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py index 42f39442..009fcc9e 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py @@ -8,10 +8,12 @@ from timeit import default_timer as timer import time -import torch.multiprocessing as mp +import torch +# import torch.multiprocessing as mp import torch.nn.functional as F import torch.optim as optim -import torch.utils.data.distributed +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler import horovod.torch as hvd import torchvision @@ -76,13 +78,13 @@ def parse_params(): def train( - model, optimizer, train_sampler, train_loader, args, use_cuda, epoch + model, optimizer, train_sampler, train_loader, + args, use_cuda, epoch, grank ): model.train() - is_main_worker = hvd.local_rank() == 0 and hvd.rank() == 0 t_list = [] loss_acc = 0 - if is_main_worker: + if grank == 0: print("\n") for batch_idx, (data, target) in enumerate(train_loader): # if hvd.local_rank() == 0 and hvd.rank() == 0: @@ -95,8 +97,7 @@ def train( loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if (args.log_int > 0 and batch_idx % args.log_int == 0 - and is_main_worker): + if grank == 0 and args.log_int > 0 and batch_idx % args.log_int == 0: # Use train_sampler to determine the number of examples in # this worker's partition print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( @@ -104,7 +105,7 @@ def train( 100. * batch_idx / len(train_loader), loss.item())) t_list.append(timer() - t) loss_acc += loss.item() - if is_main_worker: + if grank == 0: print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc @@ -130,9 +131,9 @@ def main(): # Set random seed for reproducibility torch_prng = set_seed(args.rnd_seed, use_cuda) - is_main_worker = True - if is_distributed and (hvd.rank() != 0 or hvd.local_rank() != 0): - is_main_worker = False + # is_main_worker = True + # if is_distributed and (hvd.rank() != 0 or hvd.local_rank() != 0): + # is_main_worker = False # Get local rank if is_distributed: @@ -147,7 +148,7 @@ def main(): gwsize = 1 lwsize = 1 - if is_main_worker: + if grank == 0: print('TIMER: initialise:', timer()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) print('DEBUG: sys.version:', sys.version) @@ -186,32 +187,33 @@ def main(): # Dataset train_dataset = imagenet_dataset(args.data_dir) - kwargs = {} - # When supported, use 'forkserver' to spawn dataloader workers instead... - # issues with Infiniband implementations that are not fork-safe - if (args.nworker > 0 and hasattr(mp, '_supports_context') - and - mp._supports_context and - 'forkserver' in mp.get_all_start_methods()): - kwargs['multiprocessing_context'] = 'forkserver' + # kwargs = {} + # # When supported, use 'forkserver' to spawn dataloader workers instead... + # # issues with Infiniband implementations that are not fork-safe + # if (args.nworker > 0 and hasattr(mp, '_supports_context') + # and + # mp._supports_context and + # 'forkserver' in mp.get_all_start_methods()): + # kwargs['multiprocessing_context'] = 'forkserver' if is_distributed: # Use DistributedSampler to partition the training data - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, + train_sampler = DistributedSampler( + train_dataset, # num_replicas=gwsize, rank=grank, shuffle=(args.shuff and args.rnd_seed is None) ) - train_loader = torch.utils.data.DataLoader( + train_loader = DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.nworker, pin_memory=True, persistent_workers=(args.nworker > 1), prefetch_factor=args.prefetch, generator=torch_prng, - worker_init_fn=seed_worker, **kwargs) + worker_init_fn=seed_worker + ) # , **kwargs) else: - train_loader = torch.utils.data.DataLoader( + train_loader = DataLoader( train_dataset, batch_size=args.batch_size, generator=torch_prng, worker_init_fn=seed_worker - ) + ) # , **kwargs) # Create CNN model model = torchvision.models.resnet152() @@ -250,7 +252,7 @@ def main(): op=hvd.Adasum if args.use_adasum else hvd.Average, gradient_predivide_factor=args.gradient_predivide_factor) - if is_main_worker: + if grank == 0: print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') @@ -268,8 +270,10 @@ def main(): # Inform the sampler that a new epoch started: shuffle # may be needed train_sampler.set_epoch(epoch) + + # Training train(model, optimizer, train_sampler, - train_loader, args, use_cuda, epoch) + train_loader, args, use_cuda, epoch, grank) # Save first epoch timer if epoch == start_epoch: @@ -279,11 +283,11 @@ def main(): if epoch + 1 == args.epochs: train_loader.last_epoch = True - if is_main_worker: + if grank == 0: print('TIMER: epoch time:', timer()-lt, 's') epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) - if is_main_worker: + if grank == 0: print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 82c60bc9..ca58155e 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -1,9 +1,10 @@ #!/bin/bash # Run all versions of distributed ML version -for name in ddp hvd deepspeed +rm *checkpoint.pth.tar *.out *.err *.csv + +for name in ddp horovod deepspeed do - rm *checkpoint.pth.tar *.out *.err *.csv # echo $fold" training: $(sbatch --nodes=1 $fold"_slurm.sh")" echo $name" training: $(sbatch $name"_slurm.sh")" done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh index 0964bc9e..b624d2a3 100644 --- a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh @@ -2,10 +2,10 @@ rm *checkpoint.pth.tar *.out *.err *.csv -timeout="01:01:00" +timeout="02:30:00" for N in 1 2 4 8 do sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh - sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout hvd_slurm.sh + sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout horovod_slurm.sh sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/utils.py b/tutorials/distributed-ml/torch-scaling-test/utils.py index fb6b55de..cbd6aace 100644 --- a/tutorials/distributed-ml/torch-scaling-test/utils.py +++ b/tutorials/distributed-ml/torch-scaling-test/utils.py @@ -17,7 +17,7 @@ def set_seed(rnd_seed: Optional[int], use_cuda: bool) -> torch.Generator: Args: rnd_seed (Optional[int]): random seed. If None, the seed is not set. - use_cuda (bool): whether GPU is available. + use_cuda (bool): whether GPU is available. Returns: torch.Generator: PRNG object. @@ -25,10 +25,13 @@ def set_seed(rnd_seed: Optional[int], use_cuda: bool) -> torch.Generator: g = torch.Generator() if rnd_seed is not None: # Deterministic execution + np.random.seed(rnd_seed) + random.seed(rnd_seed) torch.manual_seed(rnd_seed) g.manual_seed(rnd_seed) if use_cuda: torch.cuda.manual_seed(rnd_seed) + torch.cuda.manual_seed_all(rnd_seed) return g From 52be1b3945277546277040f2cce17537b416a8bf Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 10:29:28 +0200 Subject: [PATCH 094/171] UPDATE SC bash severity --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1d491f76..e62d3d7e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,4 +50,4 @@ jobs: DISABLE_ERRORS: false # Skip linting of docs FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.* - BASH_SEVERITY: warning + BASH_SEVERITY: error From 6ae2acd1c79e7edcd3c5c74e48a84cc9dc938a77 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 10:46:41 +0200 Subject: [PATCH 095/171] FIX deepspeed and horovod trainers --- .../torch-scaling-test/deepspeed-config.yaml | 1 + .../torch-scaling-test/deepspeed_trainer.py | 8 +++++--- .../distributed-ml/torch-scaling-test/horovod_trainer.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml index 21e389c1..afcc1a88 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml @@ -3,6 +3,7 @@ data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # log_int: 10 verbose: True nworker: 4 # num workers dataloader +prefetch: 2 # Model batch_size: 64 # micro batch size diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index 1e0a76da..fdbaf73f 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -37,6 +37,8 @@ def parse_params(): parser.add_argument('--nworker', type=int, default=0, help=('number of workers in DataLoader ' '(default: 0 - only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') # Model parser.add_argument('--batch-size', type=int, default=64, metavar='N', @@ -135,7 +137,6 @@ def main(): grank = 0 lrank = 0 - # some debug if grank == 0: print('TIMER: initialise:', timer()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) @@ -143,6 +144,7 @@ def main(): print('DEBUG: args.data_dir:', args.data_dir) print('DEBUG: args.log_int:', args.log_int) print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) print('DEBUG: args.batch_size:', args.batch_size) print('DEBUG: args.epochs:', args.epochs) print('DEBUG: args.lr:', args.lr) @@ -183,12 +185,12 @@ def main(): # Create CNN model model = torchvision.models.resnet152() - # Initialize DeepSpeed to use the following features + # Initialize DeepSpeed and get: # 1) Distributed model # 2) DeepSpeed optimizer # 3) Distributed data loader deepspeed_config = { - # "train_micro_batch_size_per_gpu": args.batch_size, + "train_micro_batch_size_per_gpu": args.batch_size, # redundant "optimizer": { "type": "SGD", "params": { diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py index 009fcc9e..501b545c 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py @@ -198,8 +198,11 @@ def main(): if is_distributed: # Use DistributedSampler to partition the training data + # Since Horovod is not based on torch.distributed, + # `num_replicas` and `rank` cannot be retrieved from the + # current distributed group, thus they need to be provided explicitly. train_sampler = DistributedSampler( - train_dataset, # num_replicas=gwsize, rank=grank, + train_dataset, num_replicas=gwsize, rank=grank, shuffle=(args.shuff and args.rnd_seed is None) ) train_loader = DataLoader( From 0b62fc7fff2695da591ed9643dd5dad0ec991ca0 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 10:56:36 +0200 Subject: [PATCH 096/171] FIX some code checks --- .github/workflows/lint.yml | 1 + .vscode/settings.json | 6 +++--- README.md | 11 ++++------ .../tf-tutorial-0-basics/README.md | 6 +++--- .../torch-scaling-test/ddp_trainer.py | 20 +++++++++---------- .../torch-scaling-test/deepspeed_trainer.py | 2 +- 6 files changed, 22 insertions(+), 24 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e62d3d7e..379229a3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -43,6 +43,7 @@ jobs: VALIDATE_PYTHON_PYLINT: false VALIDATE_HTML: false VALIDATE_GITLEAKS: false + VALIDATE_BASH_EXEC: false # Only check new or edited files VALIDATE_ALL_CODEBASE: false diff --git a/.vscode/settings.json b/.vscode/settings.json index 10679610..58e902ec 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -54,9 +54,9 @@ "[python]": { "editor.defaultFormatter": "ms-python.autopep8" }, - "[markdown]": { - "editor.formatOnSave": false - }, + // "[markdown]": { + // "editor.formatOnSave": false + // }, "python.testing.pytestArgs": [ "tests" ], diff --git a/README.md b/README.md index b0f1d66d..dc9a60dc 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,9 @@ [![GitHub Super-Linter](https://github.com/interTwin-eu/T6.5-AI-and-ML/actions/workflows/check-links.yml/badge.svg)](https://github.com/marketplace/actions/markdown-link-check) [![SQAaaS source code](https://github.com/EOSC-synergy/itwinai.assess.sqaaas/raw/dev/.badge/status_shields.svg)](https://sqaaas.eosc-synergy.eu/#/full-assessment/report/https://raw.githubusercontent.com/eosc-synergy/itwinai.assess.sqaaas/dev/.report/assessment_output.json) -See the latest version of our [docs](https://intertwin-eu.github.io/T6.5-AI-and-ML/) +See the latest version of our [docs](https://intertwin-eu.github.io/itwinai/) for a quick overview of this platform for advanced AI/ML workflows in digital twin applications. -If you want to integrate a new use case, you can follow this -[step-by-step guide](https://intertwin-eu.github.io/T6.5-AI-and-ML/docs/How-to-use-this-software.html). - ## Installation Requirements: @@ -21,7 +18,7 @@ Requirements: To manage Conda environments we use micromamba, a light weight version of conda. It is suggested to refer to the -[Manual installation guide](https://mamba.readthedocs.io/en/latest/micromamba-installation.html#umamba-install). +[Manual installation guide](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html#manual-installation). Consider that Micromamba can eat a lot of space when building environments because packages are cached on the local filesystem after being downloaded. To clear cache you can use `micromamba clean -a`. @@ -44,12 +41,12 @@ MAMBA_ROOT_PREFIX='my-mamba-root' echo 'PATH="$(dirname $MAMBA_EXE):$PATH"' >> ~/.bashrc ``` -**Reference**: [Micromamba installation guide](https://mamba.readthedocs.io/en/latest/installation.html#micromamba). +**Reference**: [Micromamba installation guide](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html). ### Documentation folder Documentation for this repository is maintained under `./docs` location. -If you are using code from a previous release, you can build the docs webpage +If you are using code from a previous release, you can build the docs web page locally using [these instructions](docs/README#building-and-previewing-your-site-locally). ## Environment setup diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md index 90cfeb84..c2c49595 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/README.md @@ -1,8 +1,8 @@ # Tutorial: distributed strategies for Tensorflow -In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. -Note that the environment is tested on the HDFML system at JSC. -For other systems, the module versions might need change accordingly. +In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. +Note that the environment is tested on the HDFML system at JSC. +For other systems, the module versions might need change accordingly. Other strategies will be updated here. First, from the root of this repository, build the environment containing diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py index 30ffdab5..472e7a5a 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py @@ -89,7 +89,7 @@ def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): if grank == 0 and args.log_int > 0 and batch_idx % args.log_int == 0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/' - f'{len(train_loader.dataset)/gwsize} ' + f'{len(train_loader.dataset) / gwsize} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' f'{loss.item():.6f}') t_list.append(timer() - t) @@ -225,8 +225,8 @@ def main(): train_loader.last_epoch = True if grank == 0: - print('TIMER: epoch time:', timer()-lt, 's') - epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + print('TIMER: epoch time:', timer() - lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, timer() - lt) if is_distributed: dist.barrier() @@ -235,20 +235,20 @@ def main(): print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') - print('TIMER: last epoch time:', timer()-lt, ' s') - print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') - print('TIMER: total epoch time:', timer()-et, ' s') + print('TIMER: last epoch time:', timer() - lt, ' s') + print('TIMER: average epoch time:', (timer() - et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer() - et, ' s') if epoch > 1: print('TIMER: total epoch-1 time:', - timer()-et-first_ep_t, ' s') + timer() - et - first_ep_t, ' s') print('TIMER: average epoch-1 time:', - (timer()-et-first_ep_t)/(args.epochs-1), ' s') + (timer() - et - first_ep_t) / (args.epochs - 1), ' s') if use_cuda: print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + int(torch.cuda.memory_reserved(lrank) / 1024 / 1024), 'MB') print('DEBUG: memory summary:\n\n', torch.cuda.memory_summary(0)) - print(f'TIMER: final time: {timer()-st} s\n') + print(f'TIMER: final time: {timer() - st} s\n') time.sleep(1) print(f" - TRAINING FINISHED") diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index fdbaf73f..2fa811c3 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -91,7 +91,7 @@ def train(args, model, train_loader, optimizer, epoch, grank, gwsize): if args.log_int > 0 and batch_idx % args.log_int == 0 and grank == 0: print( f'Train epoch: {epoch} [{batch_idx * len(data)}/' - f'{len(train_loader.dataset)/gwsize} ' + f'{len(train_loader.dataset) / gwsize} ' f'({100.0 * batch_idx *len(data) / len(train_loader):.0f}%)]' f'\t\tLoss: {loss.item():.6f}') t_list.append(timer() - t) From a0f5117d0f13a7f632a215a38ec50c6c6c4c06ae Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 16:55:34 +0200 Subject: [PATCH 097/171] Unify redundant SLURM job scripts and configuration files --- src/itwinai/parser.py | 7 +- src/itwinai/torch/distributed.py | 29 +- .../torch-scaling-test/base-config.yaml | 20 ++ .../torch-scaling-test/ddp-config.yaml | 35 +- .../torch-scaling-test/ddp_slurm.sh | 66 ---- .../torch-scaling-test/ddp_trainer.py | 13 +- .../torch-scaling-test/deepspeed-config.yaml | 34 +- .../torch-scaling-test/deepspeed_slurm.sh | 74 ---- .../torch-scaling-test/deepspeed_trainer.py | 9 +- .../torch-scaling-test/horovod-config.yaml | 32 +- .../torch-scaling-test/horovod_slurm.sh | 60 ---- .../torch-scaling-test/itwinai-config.yaml | 22 ++ .../torch-scaling-test/itwinai_trainer.py | 339 ++++++++++++++++++ .../torch-scaling-test/runall.sh | 57 ++- .../torch-scaling-test/scaling-test.sh | 7 +- .../torch-scaling-test/slurm.sh | 115 ++++++ 16 files changed, 643 insertions(+), 276 deletions(-) create mode 100644 tutorials/distributed-ml/torch-scaling-test/base-config.yaml delete mode 100644 tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh create mode 100644 tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml create mode 100644 tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py create mode 100644 tutorials/distributed-ml/torch-scaling-test/slurm.sh diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index b74613a0..24c521cd 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -10,6 +10,7 @@ from jsonargparse import ActionConfigFile from jsonargparse._formatters import DefaultHelpFormatter + class ArgumentParser(JAPArgumentParser): def __init__( self, @@ -27,7 +28,11 @@ def __init__( default_meta: bool = True, **kwargs, ) -> None: - """Initializer for ArgumentParser instance. + """Initializer for ArgumentParser instance. It can parse arguments from + a series of configuration files. Example: + + >>> python main.py --config base-conf.yaml --config other-conf.yaml \\ + >>> --param OVERRIDE_VAL All the arguments from the initializer of `argparse.ArgumentParser `_ diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 527a77e4..34174346 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -244,14 +244,14 @@ def __init__( super().__init__() self.backend = backend - def _load_config(self, ds_config): + def _load_config(self, ds_config) -> None: if isinstance(ds_config, (str, Path)): with open(ds_config) as fp: self.config = json.load(fp) elif isinstance(ds_config, dict): self.config = ds_config else: - raise ValueError("ds_config is not a dictionary not a path.") + raise ValueError("ds_config is neither a dictionary not a path.") def init(self) -> None: """Initializes the distributed process group and the distributed @@ -269,11 +269,11 @@ def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, lr_scheduler: Optional[LRScheduler] = None, model_parameters: Optional[Any] = None, - **kwargs + **init_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" - if kwargs.get("config"): - kwargs["config"] = self._load_config(kwargs.get("config")) + if init_kwargs.get("config"): + self._load_config(init_kwargs.get("config")) # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization # To prioritize optim in the config, you need to pass optim=None distrib_model, optimizer, _, lr_scheduler = deepspeed.initialize( @@ -282,7 +282,7 @@ def distributed( optimizer=optimizer, lr_scheduler=lr_scheduler, dist_init_required=True, - **kwargs + **init_kwargs ) return distrib_model, optimizer, lr_scheduler @@ -350,19 +350,28 @@ def init(self) -> None: def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, lr_scheduler: Optional[LRScheduler] = None, - **kwargs + **optim_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" model.to(self.dist_device()) - self._broadcast_params(model, optimizer) - # TODO: here you may need to scale the lr + # Scale learning rate + # https://github.com/horovod/horovod/issues/1653#issuecomment-574764452 + lr_scaler = 1 + if optim_kwargs.get('op') == hvd.Adasum: + lr_scaler = hvd.local_size() + elif optim_kwargs.get('op') == hvd.Average: + lr_scaler = hvd.size() + for g in optimizer.param_groups: + g['lr'] *= lr_scaler + + self._broadcast_params(model, optimizer) distOptimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), - op=hvd.Average + **optim_kwargs ) return model, distOptimizer, lr_scheduler diff --git a/tutorials/distributed-ml/torch-scaling-test/base-config.yaml b/tutorials/distributed-ml/torch-scaling-test/base-config.yaml new file mode 100644 index 00000000..0f4bf365 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/base-config.yaml @@ -0,0 +1,20 @@ +# Data and logging +data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +log_int: 10 +verbose: True +nworker: 4 # num workers dataloader +prefetch: 2 + +# Model +batch_size: 64 # micro batch size +epochs: 3 +lr: 0.001 +momentum: 0.5 +shuff: False + +# Reproducibility +rnd_seed: 10 + +# Distributed ML +no_cuda: False + diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml index 35d46333..c9c5e369 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml @@ -1,20 +1,21 @@ -# Data and logging -data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -log_int: 10 -verbose: True -nworker: 4 # num workers dataloader -prefetch: 2 +# # Data and logging +# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +# log_int: 10 +# verbose: True +# nworker: 4 # num workers dataloader +# prefetch: 2 -# Model -batch_size: 64 # micro batch size -epochs: 3 -lr: 0.001 -momentum: 0.5 -shuff: False +# # Model +# batch_size: 64 # micro batch size +# epochs: 3 +# lr: 0.001 +# momentum: 0.5 +# shuff: False -# Reproducibility -rnd_seed: 10 +# # Reproducibility +# rnd_seed: 10 -# Distributed ML -backend: nccl -no_cuda: False +# # Distributed ML +# backend: nccl +# no_cuda: False +backend: nccl \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh deleted file mode 100644 index 327a1493..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/ddp_slurm.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DDP_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ddp.out -#SBATCH --error=job-ddp.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -# launch training -TRAINING_CMD="ddp_trainer.py -c ddp-config.yaml" - -srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $TRAINING_CMD" - diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py index 472e7a5a..54f64fef 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py @@ -109,6 +109,9 @@ def main(): if use_cuda and torch.cuda.device_count() > 0: is_distributed = True + # Limit # of CPU threads to be used per worker + # torch.set_num_threads(1) + # Start the timer for profiling st = timer() @@ -123,9 +126,9 @@ def main(): if is_distributed: # get job rank info - rank==0 master gpu lwsize = torch.cuda.device_count() # local world size - per run - gwsize = dist.get_world_size() # global world size - per run - grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank() % lwsize # local rank - assign per node + gwsize = dist.get_world_size() # global world size - per run + grank = dist.get_rank() # global rank - assign per run + lrank = dist.get_rank() % lwsize # local rank - assign per node else: # Use a single worker (either on GPU or CPU) lwsize = 1 @@ -160,7 +163,9 @@ def main(): if is_distributed: # Distributed sampler restricts data loading to a subset of the dataset - # exclusive to the current process + # exclusive to the current process. + # `mun_replicas` and `rank` are automatically retrieved from + # the current distributed group. train_sampler = DistributedSampler( train_dataset, # num_replicas=gwsize, rank=grank, shuffle=(args.shuff and args.rnd_seed is None) diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml index afcc1a88..d66ffe21 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml @@ -1,20 +1,22 @@ -# Data and logging -data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -log_int: 10 -verbose: True -nworker: 4 # num workers dataloader -prefetch: 2 +# # Data and logging +# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +# log_int: 10 +# verbose: True +# nworker: 4 # num workers dataloader +# prefetch: 2 -# Model -batch_size: 64 # micro batch size -epochs: 3 -lr: 0.001 -shuff: False +# # Model +# batch_size: 64 # micro batch size +# epochs: 3 +# lr: 0.001 +# momentum: 0.5 +# shuff: False -# Reproducibility -rnd_seed: 10 +# # Reproducibility +# rnd_seed: 10 -# Distributed ML -backend: nccl -no_cuda: False +# # Distributed ML +# backend: nccl +# no_cuda: False +backend: nccl \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh deleted file mode 100644 index 7e35cac0..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_slurm.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-deepspeed.out -#SBATCH --error=job-deepspeed.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=4 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set env vars -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i -export MASTER_ADDR -export MASTER_PORT=29500 - -TRAINING_CMD="deepspeed_trainer.py -c deepspeed-config.yaml" - -# Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed - -# # Run with deepspeed launcher: set --ntasks-per-node=1 -# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables -# export NCCL_IB_DISABLE=1 -# export NCCL_SOCKET_IFNAME=eth0 -# nodelist=$(scontrol show hostname $SLURM_NODELIST) -# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile -# # Requires passwordless SSH access among compute node -# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed -# rm .hostfile - diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index 2fa811c3..ba353025 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -47,6 +47,8 @@ def parse_params(): help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') parser.add_argument('--shuff', action='store_true', default=False, help='shuffle dataset (default: False)') @@ -148,6 +150,7 @@ def main(): print('DEBUG: args.batch_size:', args.batch_size) print('DEBUG: args.epochs:', args.epochs) print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) print('DEBUG: args.shuff:', args.shuff) print('DEBUG: args.rnd_seed:', args.rnd_seed) print('DEBUG: args.backend:', args.backend) @@ -163,7 +166,9 @@ def main(): if is_distributed: # Distributed sampler restricts data loading to a subset of the dataset - # exclusive to the current process + # exclusive to the current process. + # `mun_replicas` and `rank` are automatically retrieved from + # the current distributed group. train_sampler = DistributedSampler( train_dataset, # num_replicas=gwsize, rank=grank, shuffle=(args.shuff and args.rnd_seed is None) @@ -195,7 +200,7 @@ def main(): "type": "SGD", "params": { "lr": args.lr, - "momentum": 0.5 + "momentum": args.momentum } }, "fp16": { diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml index 135007b2..40cadbf3 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml @@ -1,22 +1,22 @@ -# I/O -data_dir: tmp_data/ # /p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -log_int: 10 -verbose: True -nworker: 4 # num workers dataloader -prefetch: 2 +# # I/O +# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +# log_int: 10 +# verbose: True +# nworker: 4 # num workers dataloader +# prefetch: 2 -# Model -batch_size: 64 # micro batch size -epochs: 3 -lr: 0.001 -momentum: 0.5 -shuff: False +# # Model +# batch_size: 64 # micro batch size +# epochs: 3 +# lr: 0.001 +# momentum: 0.5 +# shuff: False -# Debugging -rnd_seed: 10 +# # Debugging +# rnd_seed: 10 -# Distributed ML -no_cuda: False +# # Distributed ML +# no_cuda: False fp16_allreduce: False use_adasum: False gradient_predivide_factor: 1.0 \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh b/tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh deleted file mode 100644 index 9ac1760d..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/horovod_slurm.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-horovod.out -#SBATCH --error=job-horovod.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=8 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set vars -# export NCCL_DEBUG=INFO -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -TRAINING_CMD="horovod_trainer.py -c horovod-config.yaml" - -srun --cpu-bind=none python -u $TRAINING_CMD - diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml b/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml new file mode 100644 index 00000000..b3f37492 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml @@ -0,0 +1,22 @@ +# Data and logging +data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +log_int: 10 +verbose: True +nworker: 4 # num workers dataloader +prefetch: 2 + +# Model +batch_size: 64 +epochs: 3 +lr: 0.001 +momentum: 0.5 +shuff: False + +# Reproducibility +rnd_seed: 10 + +# Distributed ML +backend: nccl +no_cuda: False + + diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py new file mode 100644 index 00000000..a1eacc20 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py @@ -0,0 +1,339 @@ +""" +Show how to use DDP, Horovod and DeepSpeed strategies interchangeably +with a large neural network trained on Imagenet dataset, showing how +to use checkpoints. +""" +from typing import Optional +import os +import argparse +import sys +from timeit import default_timer as timer +import time + +import torch +import torch.nn.functional as F +import torchvision +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +import deepspeed +import horovod.torch as hvd + +from itwinai.torch.distributed import ( + TorchDistributedStrategy, + DDPDistributedStrategy, + HVDDistributedStrategy, + DSDistributedStrategy, +) +from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.loggers import EpochTimeTracker + +from utils import seed_worker, imagenet_dataset, set_seed + + +def parse_params() -> argparse.Namespace: + """ + Parse CLI args, which can also be loaded from a configuration file + using the --config flag: + + >>> train.py --strategy ddp --config base-config.yaml --config foo.yaml + """ + parser = ItAIArgumentParser(description='PyTorch Imagenet Example') + + # Distributed ML strategy + parser.add_argument( + "--strategy", "-s", type=str, + choices=['ddp', 'horovod', 'deepspeed'], + default='ddp' + ) + + # Data and logging + parser.add_argument('--data-dir', default='./', + help=('location of the training dataset in the local ' + 'filesystem')) + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader (default: 0 -' + ' only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') + + # Model + parser.add_argument('--batch-size', type=int, default=64, + help='input batch size for training (default: 64)') + parser.add_argument('--epochs', type=int, default=10, + help='number of epochs to train (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, + help='momentum in SGD optimizer (default: 0.5)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + + # Reproducibility + parser.add_argument('--rnd-seed', type=Optional[int], default=None, + help='seed integer for reproducibility (default: 0)') + + # Distributed ML + parser.add_argument('--backend', type=str, default='nccl', + help='backend for parrallelisation (default: nccl)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables GPGPUs') + parser.add_argument('--local_rank', type=int, default=-1, + help='local rank passed from distributed launcher') + + # Horovod + parser.add_argument('--fp16-allreduce', action='store_true', default=False, + help='use fp16 compression during allreduce') + parser.add_argument('--use-adasum', action='store_true', default=False, + help='use adasum algorithm to do reduction') + parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, + help=('apply gradient pre-divide factor in optimizer ' + '(default: 1.0)')) + + # DeepSpeed + parser = deepspeed.add_config_arguments(parser) + args = parser.parse_args() + + if args.verbose: + args_list = [f"{key}: {val}" for key, val in args.items()] + print("PARSED ARGS:\n", '\n'.join(args_list)) + + return args + + +def train( + model, device, train_loader, optimizer, epoch, + strategy: TorchDistributedStrategy, args +): + """ + Training function, representing an epoch. + """ + model.train() + t_list = [] + loss_acc = 0 + gwsize = strategy.dist_gwsize() + if strategy.is_main_worker(): + print("\n") + for batch_idx, (data, target) in enumerate(train_loader): + t = timer() + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if (strategy.is_main_worker() and args.log_int > 0 + and batch_idx % args.log_int == 0): + print( + f'Train epoch: {epoch} ' + f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' + f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' + f'Loss: {loss.item():.6f}') + t_list.append(timer() - t) + loss_acc += loss.item() + if strategy.is_main_worker(): + print('TIMER: train time', sum(t_list) / len(t_list), 's') + return loss_acc + + +def main(): + # Parse CLI args + args = parse_params() + + # Instantiate Strategy + if args.strategy == 'ddp': + if (not torch.cuda.is_available() + or not torch.cuda.device_count() > 1): + raise RuntimeError('Resources unavailable') + + strategy = DDPDistributedStrategy(backend=args.backend) + distribute_kwargs = {} + elif args.strategy == 'horovod': + strategy = HVDDistributedStrategy() + distribute_kwargs = dict( + compression=( + hvd.Compression.fp16 if args.fp16_allreduce + else hvd.Compression.none + ), + op=hvd.Adasum if args.use_adasum else hvd.Average, + gradient_predivide_factor=args.gradient_predivide_factor + ) + elif args.strategy == 'deepspeed': + strategy = DSDistributedStrategy(backend=args.backend) + distribute_kwargs = dict( + config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) + ) + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + strategy.init() + + # Check resources availability + use_cuda = not args.no_cuda and torch.cuda.is_available() + is_distributed = False + if use_cuda and torch.cuda.device_count() > 0: + is_distributed = True + + # Limit # of CPU threads to be used per worker + # torch.set_num_threads(1) + + # start the timer for profiling + st = timer() + + # Set random seed for reproducibility + torch_prng = set_seed(args.rnd_seed, use_cuda) + + # get job rank info - rank==0 master gpu + if is_distributed: + # local world size - per node + lwsize = strategy.dist_lwsize() # local world size - per run + gwsize = strategy.dist_gwsize() # global world size - per run + grank = strategy.dist_grank() # global rank - assign per run + lrank = strategy.dist_lrank() # local rank - assign per node + else: + # Use a single worker (either on GPU or CPU) + lwsize = 1 + gwsize = 1 + grank = 0 + lrank = 0 + + if strategy.is_main_worker(): + print('TIMER: initialise:', timer()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff) + print('DEBUG: args.rnd_seed:', args.rnd_seed) + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.no_cuda:', args.no_cuda, '\n') + + # Encapsulate the model on the GPU assigned to the current process + device = torch.device( + strategy.dist_device() if use_cuda and torch.cuda.is_available() + else 'cpu') + if use_cuda: + torch.cuda.set_device(lrank) + + # Dataset + train_dataset = imagenet_dataset(args.data_dir) + + if is_distributed: + # Distributed sampler restricts data loading to a subset of the dataset + # exclusive to the current process. + train_sampler = DistributedSampler( + train_dataset, num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) + ) + + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) + else: + train_loader = DataLoader( + train_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) + + # Create CNN model: resnet 50, resnet101, resnet152 + model = torchvision.models.resnet152() + + # Optimizer + optimizer = torch.optim.SGD( + model.parameters(), lr=args.lr, momentum=args.momentum) + + if is_distributed: + distrib_model, optimizer, _ = strategy.distributed( + model, optimizer, lr_scheduler=None, **distribute_kwargs + ) + + # Start training loop + if strategy.is_main_worker(): + print('TIMER: broadcast:', timer()-st, 's') + print('\nDEBUG: start training') + print('--------------------------------------------------------') + nnod = os.environ.get('SLURM_NNODES', 'unk') + s_name = f"{args.strategy}-it" + epoch_time_tracker = EpochTimeTracker( + series_name=s_name, + csv_file=f"epochtime_{s_name}_{nnod}N.csv" + ) + + et = timer() + start_epoch = 1 + for epoch in range(start_epoch, args.epochs + 1): + lt = timer() + if is_distributed: + # Inform the sampler that a new epoch started: shuffle + # may be needed + train_sampler.set_epoch(epoch) + + # Training + train( + model=distrib_model, + device=device, + train_loader=train_loader, + optimizer=optimizer, + epoch=epoch, + strategy=strategy, + args=args + ) + + # Save first epoch timer + if epoch == start_epoch: + first_ep_t = timer()-lt + + # Final epoch + if epoch + 1 == args.epochs: + train_loader.last_epoch = True + + if strategy.is_main_worker(): + print('TIMER: epoch time:', timer()-lt, 's') + epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) + + if strategy.is_main_worker(): + print('\n--------------------------------------------------------') + print('DEBUG: training results:\n') + print('TIMER: first epoch time:', first_ep_t, ' s') + print('TIMER: last epoch time:', timer()-lt, ' s') + print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer()-et, ' s') + if epoch > 1: + print('TIMER: total epoch-1 time:', + timer()-et-first_ep_t, ' s') + print('TIMER: average epoch-1 time:', + (timer()-et-first_ep_t)/(args.epochs-1), ' s') + if use_cuda: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) + + print(f'TIMER: final time: {timer()-st} s\n') + + time.sleep(1) + print(f" - TRAINING FINISHED") + + # Clean-up + if is_distributed: + strategy.clean_up() + + +if __name__ == "__main__": + main() + sys.exit() diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index ca58155e..f52a34ce 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -1,10 +1,55 @@ #!/bin/bash # Run all versions of distributed ML version +# $1 (Optional[int]): number of nodes. Default: 2 +# $2 (Optional[str]): timeout. Default: "00:30:00" -rm *checkpoint.pth.tar *.out *.err *.csv +if [ -z "$1" ] ; then + N=2 +else + N=$1 +fi +if [ -z "$2" ] ; then + T="00:30:00" +else + T=$2 +fi -for name in ddp horovod deepspeed -do - # echo $fold" training: $(sbatch --nodes=1 $fold"_slurm.sh")" - echo $name" training: $(sbatch $name"_slurm.sh")" -done \ No newline at end of file +echo "Distributing training over $N nodes. Timeout set to: $T" + +rm *.out *.err *.csv #*checkpoint.pth.tar + +# DDP baseline +DIST_MODE="ddp" +RUN_NAME="ddp-bl-imagenent" +TRAINING_CMD="ddp_trainer.py -c base-config.yaml -c ddp-config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh + +# DeepSpeed baseline +DIST_MODE="deepspeed" +RUN_NAME="deepspeed-bl-imagenent" +TRAINING_CMD="deepspeed_trainer.py -c base-config.yaml -c deepspeed-config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh + +# Horovod baseline +DIST_MODE="horovod" +RUN_NAME="horovod-bl-imagenent" +TRAINING_CMD="horovod_trainer.py -c base-config.yaml -c horovod-config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh + +# DDP itwinai +DIST_MODE="ddp" +RUN_NAME="ddp-itwinai-imagenent" +TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c ddp-config.yaml -s ddp" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh + +# DeepSpeed itwinai +DIST_MODE="deepspeed" +RUN_NAME="deepspeed-itwinai-imagenent" +TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c deepspeed-config.yaml -s deepspeed" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh + +# Horovod itwinai +DIST_MODE="horovod" +RUN_NAME="horovod-itwinai-imagenent" +TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c horovod-config.yaml -s horovod" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh index b624d2a3..dbec90e1 100644 --- a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh @@ -2,10 +2,9 @@ rm *checkpoint.pth.tar *.out *.err *.csv -timeout="02:30:00" +timeout="03:30:00" for N in 1 2 4 8 do - sbatch --job-name="DDP-imagenet-pure-n$N" --nodes=$N --output="job-Pddp-n$N.out" --error="job-Pddp-n$N.err" --time=$timeout ddp_slurm.sh - sbatch --job-name="HVD-imagenet-pure-n$N" --nodes=$N --output="job-Phvd-n$N.out" --error="job-Phvd-n$N.err" --time=$timeout horovod_slurm.sh - sbatch --job-name="DS-imagenet-pure-n$N" --nodes=$N --output="job-Pds-n$N.out" --error="job-Pds-n$N.err" --time=$timeout deepspeed_slurm.sh + bash runall.sh $N $timeout + echo done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/slurm.sh b/tutorials/distributed-ml/torch-scaling-test/slurm.sh new file mode 100644 index 00000000..d3391788 --- /dev/null +++ b/tutorials/distributed-ml/torch-scaling-test/slurm.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# Job configuration +#SBATCH --job-name=distributed_training +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 + +# Resources allocation +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=4 +#SBATCH --cpus-per-gpu=8 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# Load environment modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# Activate Python env +source ../../../envAI_hdfml/bin/activate + +# Job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$DEBUG" = true ] ; then + echo "DEBUG: NCCL_DEBUG=INFO" + export NCCL_DEBUG=INFO +fi +echo + +# Setup env for distributed ML +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_GPU" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_GPU +fi + +# Env vairables check +if [ -z "$DIST_MODE" ]; then + >&2 echo "ERROR: \$DIST_MODE env variable is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" + exit 1 +fi +if [ -z "$RUN_NAME" ]; then + >&2 echo "WARNING: \$RUN_NAME env variable is not set. It's a way to identify some specific run of an experiment." + RUN_NAME=$DIST_MODE +fi +if [ -z "$TRAINING_CMD" ]; then + >&2 echo "ERROR: \$TRAINING_CMD env variable is not set. It's the python command to execute." + exit 1 +fi + +# Launch training +if [ "$DIST_MODE" == "ddp" ] ; then + echo "DDP training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=1 \ + --job-name="$RUN_NAME-n$SLURM_NNODES" \ + --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + bash -c "torchrun \ + --log_dir='logs' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" +elif [ "$DIST_MODE" == "deepspeed" ] ; then + echo "DEEPSPEED training: $TRAINING_CMD" + MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i + export MASTER_ADDR + export MASTER_PORT=29500 + + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + --job-name="$RUN_NAME-n$SLURM_NNODES" \ + --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + python -u $TRAINING_CMD --deepspeed + + # # Run with deepspeed launcher: set --ntasks-per-node=1 + # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables + # export NCCL_IB_DISABLE=1 + # export NCCL_SOCKET_IFNAME=eth0 + # nodelist=$(scontrol show hostname $SLURM_NODELIST) + # echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile + # # Requires passwordless SSH access among compute node + # srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed + # rm .hostfile +elif [ "$DIST_MODE" == "horovod" ] ; then + echo "HOROVOD training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + --job-name="$RUN_NAME-imagenet-n$SLURM_NNODES" \ + --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + python -u $TRAINING_CMD +else + >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" + exit 1 +fi + From d2a9085d239224a7abc3dea90e65588a8babcdcb Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 16:58:02 +0200 Subject: [PATCH 098/171] CLEANUP unused configuration --- .../torch-scaling-test/ddp-config.yaml | 20 ----------------- .../torch-scaling-test/deepspeed-config.yaml | 21 ------------------ .../torch-scaling-test/horovod-config.yaml | 19 ---------------- .../torch-scaling-test/itwinai-config.yaml | 22 ------------------- 4 files changed, 82 deletions(-) delete mode 100644 tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml index c9c5e369..e872ffc9 100644 --- a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml @@ -1,21 +1 @@ -# # Data and logging -# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -# log_int: 10 -# verbose: True -# nworker: 4 # num workers dataloader -# prefetch: 2 - -# # Model -# batch_size: 64 # micro batch size -# epochs: 3 -# lr: 0.001 -# momentum: 0.5 -# shuff: False - -# # Reproducibility -# rnd_seed: 10 - -# # Distributed ML -# backend: nccl -# no_cuda: False backend: nccl \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml index d66ffe21..e872ffc9 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml @@ -1,22 +1 @@ -# # Data and logging -# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -# log_int: 10 -# verbose: True -# nworker: 4 # num workers dataloader -# prefetch: 2 - -# # Model -# batch_size: 64 # micro batch size -# epochs: 3 -# lr: 0.001 -# momentum: 0.5 -# shuff: False - -# # Reproducibility -# rnd_seed: 10 - -# # Distributed ML -# backend: nccl -# no_cuda: False - backend: nccl \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml index 40cadbf3..fce89755 100644 --- a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml @@ -1,22 +1,3 @@ -# # I/O -# data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -# log_int: 10 -# verbose: True -# nworker: 4 # num workers dataloader -# prefetch: 2 - -# # Model -# batch_size: 64 # micro batch size -# epochs: 3 -# lr: 0.001 -# momentum: 0.5 -# shuff: False - -# # Debugging -# rnd_seed: 10 - -# # Distributed ML -# no_cuda: False fp16_allreduce: False use_adasum: False gradient_predivide_factor: 1.0 \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml b/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml deleted file mode 100644 index b3f37492..00000000 --- a/tutorials/distributed-ml/torch-scaling-test/itwinai-config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Data and logging -data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ -log_int: 10 -verbose: True -nworker: 4 # num workers dataloader -prefetch: 2 - -# Model -batch_size: 64 -epochs: 3 -lr: 0.001 -momentum: 0.5 -shuff: False - -# Reproducibility -rnd_seed: 10 - -# Distributed ML -backend: nccl -no_cuda: False - - From 6bdc5affd8aa0aadd6b2a0ea92590ceb4bc3bc9c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 17:01:23 +0200 Subject: [PATCH 099/171] Reorg configurations --- .../torch-scaling-test/{ => config}/base-config.yaml | 0 .../torch-scaling-test/{ => config}/ddp-config.yaml | 0 .../{ => config}/deepspeed-config.yaml | 0 .../{ => config}/horovod-config.yaml | 0 .../distributed-ml/torch-scaling-test/runall.sh | 12 ++++++------ 5 files changed, 6 insertions(+), 6 deletions(-) rename tutorials/distributed-ml/torch-scaling-test/{ => config}/base-config.yaml (100%) rename tutorials/distributed-ml/torch-scaling-test/{ => config}/ddp-config.yaml (100%) rename tutorials/distributed-ml/torch-scaling-test/{ => config}/deepspeed-config.yaml (100%) rename tutorials/distributed-ml/torch-scaling-test/{ => config}/horovod-config.yaml (100%) diff --git a/tutorials/distributed-ml/torch-scaling-test/base-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/base-config.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/base-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/base-config.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/ddp-config.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/ddp-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/ddp-config.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/deepspeed-config.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/deepspeed-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/deepspeed-config.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/horovod-config.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/horovod-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/horovod-config.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index f52a34ce..57d1befe 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -21,35 +21,35 @@ rm *.out *.err *.csv #*checkpoint.pth.tar # DDP baseline DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" -TRAINING_CMD="ddp_trainer.py -c base-config.yaml -c ddp-config.yaml" +TRAINING_CMD="ddp_trainer.py -c config/base-config.yaml -c config/ddp-config.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DeepSpeed baseline DIST_MODE="deepspeed" RUN_NAME="deepspeed-bl-imagenent" -TRAINING_CMD="deepspeed_trainer.py -c base-config.yaml -c deepspeed-config.yaml" +TRAINING_CMD="deepspeed_trainer.py -c config/base-config.yaml -c config/deepspeed-config.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # Horovod baseline DIST_MODE="horovod" RUN_NAME="horovod-bl-imagenent" -TRAINING_CMD="horovod_trainer.py -c base-config.yaml -c horovod-config.yaml" +TRAINING_CMD="horovod_trainer.py -c config/base-config.yaml -c config/horovod-config.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DDP itwinai DIST_MODE="ddp" RUN_NAME="ddp-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c ddp-config.yaml -s ddp" +TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/ddp-config.yaml -s ddp" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DeepSpeed itwinai DIST_MODE="deepspeed" RUN_NAME="deepspeed-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c deepspeed-config.yaml -s deepspeed" +TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/deepspeed-config.yaml -s deepspeed" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # Horovod itwinai DIST_MODE="horovod" RUN_NAME="horovod-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c base-config.yaml -c horovod-config.yaml -s horovod" +TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/horovod-config.yaml -s horovod" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh \ No newline at end of file From 74d1d9c9a797d9d1b4966ab585f4076318250dfc Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 17:42:34 +0200 Subject: [PATCH 100/171] Refactor configurations and add documentation --- .../torch-scaling-test/README.md | 107 +++++++++++++++++- .../config/{base-config.yaml => base.yaml} | 4 +- .../config/{ddp-config.yaml => ddp.yaml} | 0 .../{deepspeed-config.yaml => deepspeed.yaml} | 0 .../{horovod-config.yaml => horovod.yaml} | 0 .../torch-scaling-test/runall.sh | 12 +- 6 files changed, 112 insertions(+), 11 deletions(-) rename tutorials/distributed-ml/torch-scaling-test/config/{base-config.yaml => base.yaml} (69%) rename tutorials/distributed-ml/torch-scaling-test/config/{ddp-config.yaml => ddp.yaml} (100%) rename tutorials/distributed-ml/torch-scaling-test/config/{deepspeed-config.yaml => deepspeed.yaml} (100%) rename tutorials/distributed-ml/torch-scaling-test/config/{horovod-config.yaml => horovod.yaml} (100%) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 7e7c6704..c815e97f 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -1,4 +1,105 @@ -# Scaling tests for PyTorch +# Scaling tests for PyTorch of ResNet152 on Imagenet -Examples of scaling tests which can be used as baselines for `itwinai` distributed. -Work in progress. +## Introduction + +This tutorial contains six training configurations: three baselines plus the itwinai +trainer, which allows to switch from DDP, Horovod, and DeepSpeed in a simplified way. + +The training scripts are: + +- `ddp_trainer.py`: baseline of distributed training with vanilla torch DDP +- `deepspeed_trainer.py`: baseline of distributed training with vanilla Microsoft DeepSpeed +- `horovod_trainer.py`: baseline of distributed training with vanilla Horovod +- `itwinai_trainer.py`: provides the same functionalities as all the above, +using the unified itwinai's distributed training interface. + +Configuration files are stored into `config/` folder. `base.yaml` provides the +configuration common to all training experiments, whereas `ddp.yaml`, `deepspeed.yaml`, +and `horovod.yaml` provide framework-specific configuration. +Thanks to `itwinai.parser.ArgumentParser`, the CLI arguments can be parsed from a list of +configuration files, while also allowing for online override. +Example: + +```bash +# Rather than requiring a LONG list of inline configuration params... +python ddp_trainer.py --data-dir some/dir --log-int 10 --verbose --nworker 4 ... + +# ...itwinai's ArgumentParser allows to load them from a set of configuration files +# with inline override, if needed +python ddp_trainer.py -c config/base.yaml -c config/ddp.yaml --log-int 42 +``` + +## Run a single training + +Training runs are meant to be submitted via SLURM, from a unified job script file: +`slurm.sh`. +You can select the distributed training algorithm and provide the command to execute +setting SLURM environment variables using the `--export` option: + +```bash +# Launch a distributed training setup with Torch DDP +DIST_MODE="ddp" +RUN_NAME="ddp-bl-imagenent" +TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" \ + --job-name="$RUN_NAME" slurm.sh +``` + +## Run all training configurations + +To run all training configurations you can use the `runall.sh` script, which provides +further insight how different training configurations can be launched using the same +SLURM job script. + +```bash +bash runall.sh +``` + +And check the newly created jobs in the SLURM queue: + +```bash +squeue -u YOUR_USERNAME +``` + +Each execution will generate a `.csv` file recording the time that each training epoch +took to complete. Below you can learn more on how to analyze these files to produce report. + +## Launch scaling test + +Similarly to `runall.sh`, there is another script which is meant to launch a scalability +analysis experiment. This will launch all the training configuration for different number +of node allocations. By default it will run the same distributed trainings on 1, 2, 4, and +8 nodes. Each independent execution will generate a separate `.csv` file which can be +analyzed later to produce a scalability report. + +Launch the scaling test: + +```bash +bash scaling-test.sh +``` + +And check the newly created jobs in the SLURM queue: + +```bash +squeue -u YOUR_USERNAME +``` + +## Analyze results + +Once all jobs have completed, you can automatically generate scalability report +using itwinai's CLI: + +```bash +# First, activate you Python virtual environment + +# For more info run +itwinai scalability-report --help + +# Generate a scalability report +itwinai scalability-report --pattern="^epoch.+\.csv$" \ + --plot-title "ResNet152 on Imagenet" --archive imagenet_results +``` + +The last command prints to terminal the average epoch time per training +configuration and per number of nodes, and it generated scaling test +analysis plot, which is saved as `.png` file. diff --git a/tutorials/distributed-ml/torch-scaling-test/config/base-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/base.yaml similarity index 69% rename from tutorials/distributed-ml/torch-scaling-test/config/base-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/base.yaml index 0f4bf365..344d4237 100644 --- a/tutorials/distributed-ml/torch-scaling-test/config/base-config.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/config/base.yaml @@ -1,5 +1,5 @@ # Data and logging -data_dir: tmp_data/ #/p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ +data_dir: /p/scratch/intertwin/datasets/imagenet/ILSVRC2012/train/ # tmp_data/ log_int: 10 verbose: True nworker: 4 # num workers dataloader @@ -16,5 +16,5 @@ shuff: False rnd_seed: 10 # Distributed ML -no_cuda: False +verbose: False diff --git a/tutorials/distributed-ml/torch-scaling-test/config/ddp-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/ddp.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/config/ddp-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/ddp.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/config/deepspeed-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/deepspeed.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/config/deepspeed-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/deepspeed.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/config/horovod-config.yaml b/tutorials/distributed-ml/torch-scaling-test/config/horovod.yaml similarity index 100% rename from tutorials/distributed-ml/torch-scaling-test/config/horovod-config.yaml rename to tutorials/distributed-ml/torch-scaling-test/config/horovod.yaml diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 57d1befe..78f1241e 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -21,35 +21,35 @@ rm *.out *.err *.csv #*checkpoint.pth.tar # DDP baseline DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" -TRAINING_CMD="ddp_trainer.py -c config/base-config.yaml -c config/ddp-config.yaml" +TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DeepSpeed baseline DIST_MODE="deepspeed" RUN_NAME="deepspeed-bl-imagenent" -TRAINING_CMD="deepspeed_trainer.py -c config/base-config.yaml -c config/deepspeed-config.yaml" +TRAINING_CMD="deepspeed_trainer.py -c config/base.yaml -c config/deepspeed.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # Horovod baseline DIST_MODE="horovod" RUN_NAME="horovod-bl-imagenent" -TRAINING_CMD="horovod_trainer.py -c config/base-config.yaml -c config/horovod-config.yaml" +TRAINING_CMD="horovod_trainer.py -c config/base.yaml -c config/horovod.yaml" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DDP itwinai DIST_MODE="ddp" RUN_NAME="ddp-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/ddp-config.yaml -s ddp" +TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/ddp.yaml -s ddp" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # DeepSpeed itwinai DIST_MODE="deepspeed" RUN_NAME="deepspeed-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/deepspeed-config.yaml -s deepspeed" +TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/deepspeed.yaml -s deepspeed" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh # Horovod itwinai DIST_MODE="horovod" RUN_NAME="horovod-itwinai-imagenent" -TRAINING_CMD="itwinai_trainer.py -c config/base-config.yaml -c config/horovod-config.yaml -s horovod" +TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/horovod.yaml -s horovod" sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh \ No newline at end of file From d1e1462f128435a1d126c7e652df10031f0a3a2f Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 17:45:25 +0200 Subject: [PATCH 101/171] Update README --- tutorials/distributed-ml/torch-scaling-test/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index c815e97f..05338c31 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -102,4 +102,8 @@ itwinai scalability-report --pattern="^epoch.+\.csv$" \ The last command prints to terminal the average epoch time per training configuration and per number of nodes, and it generated scaling test -analysis plot, which is saved as `.png` file. +analysis plot, which is saved as `.png` file. This command will also +create a `.tar.gz` archive of all the analyzed `.csv` files and +the generated plots, allowing you to easily organize different experiments +and reducing the risk of overwriting the logs generated during the scaling +test. From cc113b85eab269ddcb50f4fffa085a0a275267aa Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Fri, 12 Apr 2024 17:54:06 +0200 Subject: [PATCH 102/171] ADD report image --- .gitignore | 2 +- .../distributed-ml/torch-scaling-test/README.md | 4 ++++ .../torch-scaling-test/img/report.png | Bin 0 -> 45730 bytes 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tutorials/distributed-ml/torch-scaling-test/img/report.png diff --git a/.gitignore b/.gitignore index 187b261d..67a6c670 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -*.png +s*.png *.pdf *_logs TODO diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 05338c31..74e316c0 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -107,3 +107,7 @@ create a `.tar.gz` archive of all the analyzed `.csv` files and the generated plots, allowing you to easily organize different experiments and reducing the risk of overwriting the logs generated during the scaling test. + +Example of scalability plot generated by `itwinai scalability-report`: + +![report](img/report.png) diff --git a/tutorials/distributed-ml/torch-scaling-test/img/report.png b/tutorials/distributed-ml/torch-scaling-test/img/report.png new file mode 100644 index 0000000000000000000000000000000000000000..53bb708ac3b94aaa63942b32742c84b1566a74b4 GIT binary patch literal 45730 zcmb@uWmHw`yElxYVt^Q6&?21@(xIThA|<3lxhWDC# z|Ic~Phx6fi#(2iq+wEX4=9=rC_kCTzx|Z)7Nnxy;csJ3|(6CSjD>-NftilPz}ot~ z6&E9;>3_d~!Q4`xk>upO0WNamy@-+(8XC4H^6#Z={wzZ@v?6TOtCw>239Dmvia5f= zKejf;EOy78-oBKRTg7#v#MAbFB=#yW?veJY@3#Y@z~@clDy-j(1L*hF?_80VlY6s7 zXg0y`T;cPX=t=`~^1@Vi{8(aq&s>}cWedE?$jvN2{ z)j;Hm%eiT6vQ2mU63w4i(0e)uH0PFG}~((b3^oq8Zd2S8VJx0w1$nixqDQr4GDF= z@r~t^?YHCewT=&+<=kZz6(6}A@BSPcBUDpU3kVF%d7JkD-khAAJfl31(qMAFJ>Gg_ zoDdruJBmTAHH=(r+~_ErLSp)0!p$vDw^mX}=-S#y*}_n<{+}e?`2IYFndxbn(f)fk zhxji}>tmR-eF6jNw~K5y#?M}!{-C*SzQZv$SfyHa5VRY5xYNl`&dbZ&($^=4_w3z= z5}lTNhUce8GFg&|WEnEld&~WK<#~#;GBPsh0RaKi`Au#`@23QAgqV$0zNMSPfAmOX ztkR|}GATBeIwL(@geG$N>i{JcmB{7v46*3ieoKCwG~wVVG-@uc%JRIh`nKf$rQR&D z4>&|bNycg~mM7}m+*_q{gwcTG_cHPibEB+x3!P9=PLHim6hWn zqN1V-oYn0T)EDMv)NnT1J`PCScXf3QOHS5ESvbk>p~?TSW_lAFJ4-f8vb?-}b5O%g zQ(r$zeW1?GO+Zj^=)183(?H>3wj51QZ*O;h|7!(>vhuv0gelJ1`=|!4Hgt5%lX`>FXT^qJ2~v1mcc=V;c5q~{K}mRjb!72FQL^n zHRSa4^oA$U z%g@i((bRl$^~x2IgN=#KVtV*9TDi=O{xLjsco;pf{|dGWerUKJz24s6k1F~8ot&GS zJ4d6=RYqJ~Jm?H-_R#4YFJqfjBI{5cV>DK_Q?mMb<8YC#SWA05eW&Bw0(CW51eu@` z*t2;D-(q89`@U%lQn8R zXV|U{qa-CI!%9m_XBQW#un0Kbh)PO!SY3?OI4Kwn6=mqWdq*-fG9tFK&>0P@o3C0y z+Fzid(K9$0osgKA@!fcEK37{@JYaXNY(f;4{GTgLO+Ar_Wq#e-+A32Z`%UZhQm+($ z`!}_rB3<$qmCi>+T&DG2SocWA$}I#FZKr-+%~2}W3kwYmMc(YWLDvf|hi#pp>Z0KG z3AYRSy|odAqN1WOaR~`>RO?Cf8I%VyILJBTvCKX2Ea0xvLqhK4n2wZ&1qKGDr=>mR z~+aGvi-q-~koCfJyI%%UgqVRy`KBBz&`g#}p)!^N(Y<&uXJBSSXM zM~o*mQD?CewJue1nc|O8Dk>_-GudAq+G(xSy@ib(jb;c(DU59HEUEQ8j%v}OHIJS0 zWYl5xFH??aB+WlNZhoHPt7-+a@VQhnUu5mYxzj&;(cRr$M@NV3x$%H_rS(#e@yHg} zV7^-T@?Zhu((nks;qZQSY5eZ>iwq zF8KVJB**1s|DVM&Gh@Jd^z4Nw3e}xGcI1GK3Y%Z}-01&MY<+zl*?DpqqPRyqtp&z~#vKYJDuNvqJ4uf~nsp=2^&`)o`2jhi>MV1IKCU}0fpK<(UJ z>h)$&t9+*2gn4>)rc`2Z_o;ADI_yj|@g$zu+k{+q2nYgH&o{b?zI>sJdu}WQbuO08 z6wm2kU3Yh}TSrfi8IJItBV1}OT#D}r3CV}uU28Uzzc1ijj-X~5!hQ(iI@=r6Fc~iK zEvP%k6B83tRa4tBSy0e$X^f&*LHfbg{{B3iPXd0|1o-gIE-vm19u!Wyi!UQ--p24a zFb5O!>#dJgByd=K^nb`Wzqs_fwKdhSH&ZT89$Jjv`_0LhdL0SA-rg9nZZ2zl#4LK4 z+RZ^3(rH4wYa`KGWkwTQ7FG|M>ADH7zZc-JA%{U;Fo9jlL`?Vgdq!7*@l3=+`hm!w&lU_iuKzzMfuB ziJ`2~V19eRW0tPLL5z2BaLFhsd&|r<=;-J;tmZM{2Q%kqCnn?7oP+1a@F!eSceupx zi&|9_vXIbfT@R-)E?>TEFj7kI|A0Q#crYLC1D&0n9W5+8d~0WC=BEb++#-g)zW({q z0?Lqq4CcJ{x+!`qWT(a%qJq~P2#F)_8U6mZ!rf0oJN za6RRKOUjfzsZtwaP_Guz(jrCHkR`d)bmZQ>d%xgbVtJj|prE2DDk6f+l$xf& zW_|Sd@fPg0ogw|?yLa!pdwRZ3I-1Xb!}&@`$a3<>CHVsN#5~@soRS6i@bE;dY}Xct ziYcL6%#=;IromR0%a!wG)@?<5f3k1tdU0kqTxK2|ACHghI{3hcM@JSjO<1dIYtPN_ ze?T$H)~Mry8jMNIM*z9`YL@m(kI!(y@tn6fdXOoga%O zMvb9wck*9k_wKKaP+kg4N=mYZ{c3YO-RMvLVho=gb#Y#KG9Pc=XGjw%iZlRdZizDr zn{73hmX?-e;1)8jVd2e+V31X)GN{$L@+yqjFDV52`nDK13#Dgei46=4Ag{7IZAH#F z`HSPFtYpjYx})V5oZH!o-?Z|_BclN>poWKsWdMO$?!Nky%&&1deZuubA~7o2_~Ixh zJoDt>t#*R?`CCHxFiVSzBG3n1g7z{c;wf5mnD?A!03wD7q=ZmNCd6ph0QeMDR#vX) z(Xc(-)V%S#pF3)7f2!VlVR(M7R5XlCB8i7nxJLbtc27ELxg;j$5lTu*Dy&7P#|sO; zZ;9P}{LkJw9=~ff(tSKXeVU~*KHQvw%fGpZ8v_&&MC1n@N0f_;s|S{w)MoXsbWLJ# zuv8FHFq`SH2=X};6=S&QnVDsnnVG|u2fpS>w(+q+XYA7`}{5+>HIhsfWCVV%aP?w>?kr5F12u+O@fQKrtEaOHSd!Pk^ zf&hT^8%8r~M4=guS34AZ)mGBBv`Sa(^oKLDc z;HEkG2IKZQkj8#Fy*Ep0foia!VK$Ig@~cW&RBHtF&hY7kbpk6dyM7%b|IutX!3rFB zMq%vaOkT-?y!gz8&#N0AH*slZADw*HX$d2!86)<i-`aCGFtz$TK*sypUp_rm)&Vc!xQrkJ?6`M(A6 z?Bc`D$$Wug<&LP7nhagAniW9X<8pNvZrZ2ELb0_sw*1Y z6WsfWA=%W_ghob2W>9htmxbGoppqsPt^^znOFCNP#4dd7|MTvtQ(I-=Cbww8V?YvjsxSo0^?{ z1r-!6`27H(Eu zf+m9yO)D!aOf0N#MMb|lJF!U=oOXuY;6re5agm0%SZ36fYnYE!Hp0cNzURRc`B8^H zVrpUn7fnBIJNWxe8#=3ZrOnE{$B$cpmLkXms`1%td_qDAOH0e7jrN`lu{4>Sa=Q)n zpxr?i1^{SG%*vcW-Z~#PB|#aGl;viQjXU(98?eg)?NHgtz=H z)|X1MdCtm8_x_0ST)8tUGV=9IQ=r5C>SN$4^&oCQKPTd`e*xtVxHjj&1>=T!q$u*5 z$6VG1Nh^96FVM2ps@R|skdl*o0$e!%%r0dDJ9|>vZd1wx7!#W^SkoEk${hP?KVNoN{w=#Xf%Vw z`}fb>&W|V&aK19|l}@qnL$d3!?>FscqX_TJg?R;sBd4Xe6S6>K@fMUK7CvM5V-Ry( zDlzK!BNvN;_irJH?%#&<^-(lqdf=-H=QVad*V7mPtOCiZZ!%;o0Mip_kUn|x1Z~;f z19&GW1JO{JQqt1W)Xjl4T~s&&u?3(eT<3ZQwfq+lNZ z;c?4Uqgt`UzfKj<;SFqTg@ZRZk*BK~66i)~@C1~<8#RKK6Z_ow0aC$XOLJZ;akRH5 zLESSmGvgdbe?12n{nMvUg_d)om9}fSlKcRbQljUkre11kr3y6{z^@>C2n(MLD%@+} zLrBrgS5rCjR4vw%*z+B0ZN2H@;sSSKHD?1$J!|Pudo2&38X*V(dy~rmJr` zKhw9VF~9%J6BvH9>S;QvRxH{%K>i5 zg3T?Sz!3^+$mhH~mBr>IZC8dBn&PD;_nY&;a1*yccr-c1}elRPe9g%E~C| z>9IbvM$&4VnzpCD_U|)9bJ|~hLPeF`wf`F0Vb{RG{6ex@$Cj}beC&qCM%)%YH)+@X z0(bY9wR+F6r;#c z%Li_n%Uvnx(7$z{1Xe^fHZ_HT5ayaVHp6VQEa&X(+!tL6bfrE;(3`_T4XJb?t7>BGRYXX&$H5BAsgL~aF-cVV( zFt7pA^uWNr8KD7$jKgLbN53OMI_tHta520SyV=RMzO;%8zPY(M8H)7jQ*^4+ncX(V zx>mSqCAVRTXz4pV|aH;FJZ|nX1coj&tC+HG5qMQX`$!98{k02NQ zqEuX1R@Mxn*r4eKqfvM2RUsiEOd_5~o&rDhlU<^;E1(bErx1VF;B$Ah!pa+<`#T+- zSN8TCBW31Ub{pgH+VZa9<+(Xecp*HddEm#7@bQab1qBFig`55kd9pfM5ib8#Imetu zN>p?j>I}$?p4@ip*-luZ5mfGgUVi@kxqPW`vzk)xZ=ohyMQsSBc+3xXbPS|1z+wuD ziTMNg508p!DlzQcR@10Z?Ep||0HBYM%laL(7FL6<55Uv>fyBoFLzB&wqXwl)32;|Z zGYca^%mK5<#KgSGSLK{M3dR8@1C^?OA!#s2F6SN^|LG<=G(>?Mx#^7!4YF{1z3Auf zexLye@DoV+45IFfpFzED-MMdRn*#BLgNb;vm5M1qQ`cP^{(hB6mc@GU4G>}pWWo1G z%=>%qf(Tr_r8=CBx(~P*5!Kn)*sff?%H($L3>s`s?NN4Nuq-TMvh(&0*aS16Bf*pK zLW<<#7)T{LaJyHoU6ZM>)D0ry&8;=sN>%8W=(P#~!Qj`Y8wi~RI@twV^jCeoQmylm zFniOIM{>~QYiK1h1?n|qK#3704xv^cyO6iXvoki&MnN^}fx}A2@_ecR{BhutiolC^@fu?;+x=j-erSKahYpCT*_>cysMB*hgX? zN-SoCY?K`xE7J=K7!8e$7h>)`9RbFb4y0NXc>@50a?qLydObvX>YC?4&8jHvTsezK zNT549IzsND2V{wKI4xqJR%gtD#*0XofHY)aXJ;TMrj?Nq1e7qe9N<};JpM+Wdc2M%CQOE{&~*XZOM&k8_}hP#_{y8H(bKH;oJfO1~Esy$O{ZTE7Pf!nwV)Z*yOP`WgF7Q@}z_?4LEA ziY)QVoBZ=f99)8~{{HsR{H(0zt(oSXQ3`tcKsecZ#*w0p#rmCvg@rFD!f_2@$`B2WK62ss9}jMe^1LDW z@V?u$)TwWsz(o}_4t(usttvT;I}aW-MQgaWfU6=+g>!g(ECaP0VU2Izyvg6wShw)= z_pcusirw6N54y?C8doS`1bogh0EyG&Xm0Ud9EAR*{FvF`^cpsfOy5HIr%zW#M@NyS z41&$~l?NjkTp&1O2N7@z$jIE=+S>X%G=%haAgw#98p6UDgoKHzOK={cYdv&4gN+Q@ zar3WV0ttr~Qmgl0%1+miUKh}$Ufx03CdFZ%I8e@8Kmq#E(Gdii7!(O%0E+%qqW_x1 z6*$Jhy|peUA!Ik8#{j&1%&c==U0ofjX&Of!`9E%PNESRv-~^ADw61`$)jT?yfbbfC z^$#4`RpsR$z(+h9X8U=u;s=$FVoo1iyG{chQ2-=6_pXYQdhXgUnhBr=vk>zgse&` zDrp4;$s3F^z{fxuhEipoG&uEii(%tFDojLDl7fcjCK@+)l~#Tz?5)j|qj%_8;91yo zf#@dvnMaNW7eEo{0ku{y1uPr}4}6o7h|r{^r4?0F{M&5dy#!Jm4{DEYqV{7bTwKny z@_jc;b%}RA1{mK*j`<*RJrk{jwq{`hEBHJq+0L8)an=Pi@BR;uefKsf7DVV6x9Qf6 zaPTCCpMv9kb}*q)aSHwNw+=I)8o6wnf3A6Y$35IDe>8Y*hdis?lE1V(j}%#Q0VcSf zSE-u)kCqXsA={~!77FAOK$QHeZX%Cu^W`1)USl2~)KpX2t9)Lcnc%iiv{MVtFk|fL za!3B46Q-8e@};5~TCmbF@$uTm#%*A2_qCCdk{XOvK8J;az0;c_h(W~f`bC{j@6RI| zx$GZ+Mf^bW1{i2Q%0NbT1zdaR!d89S00jZ>f}+J~`3XdT)T}ITP!+&Mpk!f@t>K}j z_64A*Wnka~KLu3TSKf|I%$E+$0jk2(`g#W(=`XTU2E>;$HNx#J!UcqAQzwf{WJ)a~x8lJKz{uD!aNfl_yKm5ndDO>xvZ*xc+5 z)dKwuz>@jS zsfcP{QgpvJ;Dr|*tHH;|$EiLtG;|ji_caI|@b=4-7;tv+Wo2a%dCWZ=JaR6B%dErs zfItCf$H0H20n)<4`vV$*sDc*4t@2$(M?IzB#rW@#zM zmDjL`j6xzV-PHy9ILOMEK=Kd@VjIks794$z+-|ijU*!*!luC-lMzClGDI^fkVll~< zCfvjn@#m`^Qu9ZM>IMxm$6`i`W^C z=?9EGTVrHo)Je zGl*S&oHo1=q=ATy+LIuVA($L@4Pb#jsWLQob%iAHIx*|~z5^($u(WjE zo7nXQ!yp~sM70A8$XS^{WDyGzp^(6hK5ozPjz+tU~*89kdQPpV_;w)LN9}A`HMzB+@s~Z0>qx< zKR--!Oau`C_7NdSpN4(ecfoGSl*tgq`)&$Q!Q^k@C1PS?fQpUKUNuJiDUhvf9xv{= zRoUi5!N*4ghXWcHXy6^+=$=21+}zwG*m*P;4SNRuClKvGCka05je0)n0By!oJLqGp z<2CW1i_GE`!#)cke(l2NpvJRRWVaJ9^)Cn^B>P{$9_jmkF;7>>^@M-G&mGqP|7Ezw zlMx`Y!ybNbWhE#j1~c?HLo3 z=8q4T8kL?SD}3jZs^9bX$DpYoWgGzMl^^~6ksvEs&Hstz8I%$edrD6)amEFo&7O}* z9`IC<4GS|f^!7;6$O7+~TB@oAT5jU0v%i!ykvCCczkmOJkwR6gG|q23l-v@{9}hQk zWC5Zt+s4En)J!-BpKU{PQ&v~o?@otY=)zEWiXij^`xTMr;pxpon&D5}bK1^5&}9Aq zQM1a}n5ZL$=1E`(RvlQqVKFfxj*iD}x%vVEzuboT>fq$RhK&lA{eOL^huIuT#J zwhxXjv_D}Gks6;W_(?;Y4C6MzXYhunLuo&f7|@-3Q%{jj2}tCKGIu+Sk~PcniEgx# zwGA)Ga|`_RDJO_}|JuHJ$D4ZQ=YWMkoQ_wlvZV(+mIfgg)kil&p_NB;o)hS zn1}-mv_C!k=6D3aB>PRuWe`J*gD}Qw&8ncw!DmD4TIpcui8E7EDXuQFzwbit_W{6W zYikRxJ5TdmVR||lD8GJSNx;wkBybi}*#;xv$Q|N{w`-gZGQp)dJwMq%dLn{%s#MMQ zBC+ybQJxkYG}Cn7K=Ty;Im^{}pB5mnEZ8mi>eoF5q)R-4dc&tsK0^aDRyCMJ>r%}=kc z!Zy?`x8EWKWoQ}{KDtVQgf3!30@039$*VEJM`>OH~6;@tP_<&HQp)uP(;u}?c^|hT%t6lAv6ZJn> zhMBJTF33(T>f-gRFW>1~WL!C>sv~9tX892EsqkF&vycjaH1uPts&kL`6?8 zZgGA-KG0~BIa+_+8XS$0+oDtCtoDob*|Xnvv7k30iX|N1fAHK_6=B$6JBi3~BdK15 z+5pV{3i#AWs0GP%Wt59Ke_Oc4Agqy@pv&oMczDZ(U>SnKB2q2sugGZyI)%|m1fzB| zDgwO5L?4Q&G!!q7zsS({)p>`tKbO+Kx^RJ~?BU)Y&(3fe0QNk1yUn$osR&@pfvU#wB>8aZU@J@q_>;aqd@8Ji=IV+0MC9~p^ z7Z&kv`A_TD+N9nW7{uz2uX!sp700cOm;@<*+vljkCVR@AZeBnlyZll7TBPF(c`k#w zqz6-`1^`*0W30x7+N#xV!lfDV8;^J$S%C`)Fhpw`7>INx@gf}k!Gi}-2X^YQ`K!MhUPrLO^uaVa zCrGG#C#x*LX0{Ax1ScolRoxsD{RVKAM(vrNuNooGz)x&u`=^sC_c0oZTskud9T$%~ zJS?PR&0h-bMo7)3`4(_biW2K242jLt39XxPNe>x;W)ULUEoAFes7k_m#ju;J|2K|{ zXEU9K997%5KR`v8|XUz{BvhS2zJ1lEkB!pOQ| z9e^l$LCi@uQ#=KdTL}StEvaq(*Bu{xD)`PQ7EhNEXZjpk!;MavigNys_>`ofv^o)a z8zM*7Bp30Gt@tTog?)3Vuu9%!2+OFL(>BZU3Fe!2jy1^jXyI_g&20qj{UQlhuU_9y z@(m7dZfN)fsyC1RZGdB)Ou^EwIdU|6n3<`AvlR!L$v92l9-dbP1YklGTmpQYhAS~2 zH!oE|XMsIsU}(4nIas7#Zt&G`?(Aav=hIEk7stze`zkQ8?UJK)a(XTEZ?+D@YL(b# zjxd!CZiM;ds|v@UMZ8m|{Osq>SM`dn=!7Lqlw`dl52jSs{S&=ZAF4y6MfOxud`X?k z)E+=5bN9?YX?=lOPj7D)phWUt6n~o6oi#nwaQ?Zz27A7TlJD9 z%?Kayy=_+J&>g4sdyeZ&l|sXE=4&rwEB6gn=5Gs2F1qHOyV*Vr_M}VVw-&A~ls_Of z(H#G;p@1fcSJpT9-dFmj<#`;Y8ZR|cc+jE#Kto;Jz=R6+PGQlIsIzHOXC00WdlI~Z z<%tZd*F0C>lA>SF^R=!X`Po*DFtVUeukIW51vAb(B#lO;H&OXk>IB$$+VB2h3DHsh z&8K!)8h_j8KPoAfNZ2QFikXeN3q2s_(0e@9Yn&{2jyW_4PCpiwKQkH{)UpojmMhFh zd!b`Mw_rE%WV(q({L60kh3CVheiLbfIilqh8PsasmX9e$u=GZYW2GS>xmb#aYNV)Y zIZ=x=SHoDhU?-19AsaWhV0%76`%^VuaaGx8_M0Ct+c2mI3C8!=LG<|t_*c|$ zc(s6<1}<+%PODDm7%yE#(I@ULJEWG&tf3;l)A^@|U9Tf(uNbrQsf_>d-T!WT9gTN< zFnJNr<;=bSy3&Ly@c-Pp7<5_uaidWz7O&Lp@P=`#?#Qhl>DDTqC$clqO8iVblP`3` z{Ptog{-wF0`Kj1{CPQZL+`lgf4gVQLN`QS_*}pwoAnewCd@%5e?A4oz96s3<$;Odi z_>rR114GRT6HeY)*kOgVnjbGdY;#1ib#D3Oo1hG1zi``le@=^EO86GYuzg2{o;T%O z#=5oSs^xkbz4 zFFbc!)7%Dn=vV9YDGoGE|`C)Zgaf9fhX1MLg)~3_@Rj*Fjm&>cG7&3 zj-HOi+HV(zRezExB^t9idc(eYeEh-c1%5wC5$(Bd5}D2~>D+FURQ?qGHJAD{KIV%_ z$Wc=_n2!D~#-&BoIX&dn&>)yU3y}7>mOfs6#PdF^C8-k?a%Rr-pE(`pbjUmY23!m} z_B*g=cdxJhJpX-$ImGu#7!_k6MAOT5wr>b!_dn80ZTlRS-* z6ye`oPHf@+WdE^5>^A@bLRJYV_K)UfVZvu?E``ziC&N1&OHA#m@!eIL=Ato?B=npf zR#o*C&Wn6Y)Uo#RWsPMyw=&rr_}6D`tx1ha&yr^V`{610j^+wS%B!GB~` z{)&z-JH?OW4DaaD`7c!FB4-Y#&p&z=^3sq{y(*#1`s4|jd*{AuQ+Na=tDUjrhGmTf zyA37zlSJ7%Z;QQ+6CGVq`@y0#f&2Oa^t;=CE`)S=@DGSnbpBd;Q3sLs4tUYHQyIAg zj&^ya%VyahkKlv_5el|7UCT4acsHDa?#IqE{VM-!Al*}R`}2Aw^*_Py;URn3dacd= zNn<&+IKlpR4r?P2a)`_TLgn*SeUT2BC7I#$&Te2-vEF>pb;fX6R=M(fdbAYdsQ9Re zpTx}JyoxISKAH}5>wwK?fv1$1;@hr2&OCh{*{+UxZ`4GdrgOaZO1iNfSN}$S#WTH& z()PeC>8m1YxIKQ+-a(V!<|atH{McW{1vlnpl_&0o`i5Rc&20Ivcy4fhgKUC``5|~? z)79Hs-_qiH3H@3~L`1s!`tousCWR%i0)=oI`WBrt|wX7+BVFARC!0ah&8h8p3CrnbyE9b|OO zCPz+c&+=SPy+6AV$)tp`j8P1v59pIyUX=(Vvmo8Qp*>GeZfl6&B($FrIz?!}c6ID0 zdx*kndw75R4b#3D?|R|Sy4ry|eskrf6JDs5feq8HE5w??vZ8u7r!1yG=-%oKs_03| z;A;!5{np({Tvu_|^nu*!roWV%aDRZL7wI9!fdEi5cBA%Q$|Mg{DQsnX?y4JoR?6|P`lXq%gN z_Qxv>;Z;VKIt|}Teh^&e&0o{H_p9ZvZ;UXCVA$4Uk|nsidob0Amc8|uGc_%m`_-Eq zvfEdA=lg1HSpC&HA30pVp+_$LTruEfMW=|{Va(!^mCsluE;I8pv$Goy#;Vd}Q(rNy z`ldb!PuHp~*x>j1FPz5_piwCl7Z)cAbshp_U;^d#n)B3z>c5bSU@IN=Df287T7l%I zmZs)UB&h*fIT|cPJ;VWke6SdFk$`sT>*E8@LA^!|vQ62_-)SINMEdO69Y~%)s=mJs z(nUd#+J(acm!AgDe5oe`1dL$EL^u+6iHSSG#6Utc$oL8nekzUfq%Sf#aEn<2o;M4| z&f$cVm<)wwXH!{QpPyyOK)MDXE1WH#Xe(@=3K3q72D(j19dUGFvr2lEq0; zKGf(d!z12lI$Et!1F9mKc99B8u5O8{LYHR=Datl&g=rB-HejqyHckH}fX+Nz? ztQH&|?;9^v^9n6Ybow}LC7|vZ-(XRT4aJPvbiPGw+|66Jp8m%N=mp-ewYz%-krhD9 zlZHe+!3YE+G`B8BSQs^G|A2M^y4hNtX($<(df=l(FsR{u0115dH#{1=<*I#1e0D;k zdT(c!1&0I{;WFexA>tRbr}6!4t}VL%<)lB%X>67!^(O8YX@^`)wFM^fI*6V-$#}Bb0!U@7-+%9V{H2S4ey5A zS!3}xHIWMYyQT^XCT@71P&8g1f^UmhzT_Wj>&VHW~jfQ?je~e5EU0;NC%k;0IH5R z*xS*;F!+}i4SbtDE@Vaubg7DgyS%TVW)?(`)#Jd>A_Q0v$wlaa&?(Utck=pC#B6}h zaFmHyXA$qW=Y{0VWKo1p43-yqn0lqhtjp?gVQ)=BXe}|ssh5;Y&K9_~K%?c`!KTx> z(GYELBhXvRMCPOZNa>DAKL;}@X;*>j1?oeVO-;L7Sa6&hqoz&hAEILLzJjlF||#oqE;o7#eraR{bD?MunX@XX6w zpquY`tspXu}%6Jd^2rD z8d_=Q^<&0TL!}FsgUy;Xrwl}T)pacj3_N*)F*cgA^a}TWJ8*pWvCXYnY+*ss^j>nP z-(dR9Ols7wti`r36dB`V(VNNfC&5>}qNuU(pfE;;); z+00$?IPHX&z`5OB9SVOU==Dxh)BXGR=MV(*IKHO@#vwRApFvsz9W>kzm-ZeZVLRxr zn_F8cFl>m4&lUh@X$MB|?mv3u+uOH=J(PW4|t%fUw3m28=JHK!;IO zR%V8IL@wKv-}_Y~hNT&gmvXI#qm873i+BrrzEAJZ8L+MKwS_zR{w;spTU3Y5pi95) zyt!VSZe2>m6V%=4X&rUjL%_d8l(#FF9WBxO#w*E-$a1x+5MyKQ;ac8#4eOUnlW#Dc zCJ9M!pFVu}@W$`GK`_h$ON0IW!_lZA(K1*B0s19~&%wC0sQXGZ4DmofLj>ZlS3A;3 zAo-FGHogd~3^I($3d_*a*(r4lX*zP4Q-uLY7>9BVgP0>ZOok5)4WST0(He%qY=|HZ z8j8nmM)w#7(ZHGv3l0uuwU~ajX9@RV2(Fe4G-7gyCbtX>$j5Qpdq@u%F)@g^h9NJ!)C2FNQE_o|=I^ZwH3*UILLy5HLKBT@ zxG(JsJUu;85QB_>p(O~WBfU*5w39R=nemkFt%izPZgpi5>)szZY8qmqr0HXh_wQ=WH{7=JD?gfCu=!Lik7G^ebG0_0iX0gmM2aE0&y%fLw zwyfyZb5HnbY4ZpQvczLQdV3>b*haah{}`w0Fz^oZ6dJrNtRtpj}S{-(NKNO-{pmliGi8+Qc zEJQ;u%WNCr~7`T(MC0-8iX<^J&Rcb&(3L}H)cIZln;M7{nxp?jA z5YCk+=7_J>-F+7nLh$Sm4r3Z!YEn_<33!iGP^ejkaA~^#Dte0#zBjSu-X@kzI5U-W zOWaL~glEMr=aXa6{B{bYNtT)$qDK7cg2WD2q%-G4%*yW(dN4P9Q4&54di*);+4e#6 zu0La!ts7rphrg61l_XfSEmE}`f3C!U44De)F@z zkDQ*J{j~4I^Svs>4fZvt%E8bCYFLOJK|ob-INW>!6=({&WB?Iw92Bka&!Us|!xkOs zi7$f^mbv}TR%>)8XTE-sgc4BJ!! z)~#DzDY!YE)=b>C1oimrzk3ELONFqejpQg-E_#QZO<+J5T%Tg0=;iU)A2~1*249x2 z0|VUEmWkDlFzJ5uy?F7J$-g$Y(?#&e<|0LN!KTXZsz8Lp*35$zsgZ9*O*6TKeFG_&Tbr56lHf(LOqA#U3;RPPy}Bs5Df{tB0v{|`B`ac;WJx=LlDf! zCyf1XFVp^=-M()2s`IA=>hYLBm82Q#~|eV2n? zqn5zf*ci#Q{+ssvM0yuee;|!oynp`=vP3Wj>GPrBvf{(apw4|~uly(mz<@WX2w%v5 z`Dlf|anE`AuBh78$he9(`AJVJuT|7dvJ}Y?94=rk2`_Zfosop8`s?dn?%G@P+de@5 zCNJO{FR$T?)&+SzPEX!1dgJ?*P6wq`9~EJ}oUR%-f^dI_Q<~E2AEasJzQ&P1o>rbza}&M1?95oX{tX zM5i*b_NGWS%S7ZGdPmu=b9>zHW#%k5r<{MdFvFO*5F3Rhl^fXfCXe-T9{U~FTntes zClfEsrrBa!DoiZDlsV#jE$oCiR>NQ1%zSEyQ*d6~)rCaY+u?jennD}+>|x)Qrs<;c z@P=XVw;-koxl=F6q(q;p_zn$!v%3~&k16CG`!0Un>NF~)DzOaa^^*{t+03`$m{1hQ z|7ibZMnG;a)<76Pba%%{>(_%cm3)HR#d=zxi!oc^W{LRJ94En-(GcO%$RW8J2L{GP z@k!)COkKyq(w%Pb1)CNRmcns&(fya|Px2A5fC+A*`-Jze$>WJsg{|2?h-#`$HoxaU z`@qTx9q(W_^Hafd%h&54sA(8N-E7aj6c!7dX{y>>T+EZ?VuMeGc$oO}NmhnfF!5P` zn5{nArFrY9zhQs=1|)MqkW=qI*SfMFi^-P4E6{c;-&S;=H61QdenWIXIXxvNE*Iy+jjjm@64}`t8(RKer!d} zpW@|b3@fInf;=~8tCY}UoQJRZ!?v@vacQGJw}()(par*Wh%*p;Zacq!#CsA}RvZX3 z0q@`3-qwQn3G33|PjAcQyEQd1=&b1<$>?5Bb}X6u8EJ1TS<%#s;rlo8+0TpkZ4QU{ zg&}Sn+lwbJxfkB-ZCyLnwfLmWHtE|&RkEK%YSeNC2aK)#r9i_hNrTu}T){SevyH(k zx6PHBJVcny#>w`Yu%_)cu93J7GRWj>P|gk8Fca}mg;|D&g)I($rv-!zd08f~HwoI( zQd2h%4um0b1LFe_LW`%xhJpg$_5gF$oQ{r;iJFR$nyW^|wh<&5jZ`T2ggL)ZS_=7vT?^m3WW4gm!kbj*fVP7Qp%lAa@DE*E^W9`W^bT z;{DWR*p6tQfu|r}ZUD3Jz>VNr2S{KhT!9Y=uoj5hFyIvp?lPF7l!#yiQpEFof5bB% z1)%`uWHFJia)O8>pn&erYUo}BkAKy2pA<3`YU=cd9#)mk?=MlBsJn!I($6W6EK8Th zHaAh-NeVq~?WcXUmMpn;$Xos!XVm5pb0;8-itX%Y_;000eGqBsA%E1DPv68?gANHj z>NS`abnkS7G-SR?nE}s~BE)uKlt>#!NLWq&-uVDTetL27`U_B*U~+5=jsRK z`%xZa!kBy#e3=Fsl9d7q3yvevtp{{n*$i<0e(fx17#SPi$H#Ajpar6;><#y8>;4wy zB3=ym>Q2uue-Robc|Lkf85h{r8A^~W*&;en0weC8g6j))sIZ2k;)@Vx`)kj`FPdJP zy(o845Kof`VLS@gH*97y?7qB1G}gVhP2(khs{l!La;>l9?cMvi!)`nJeWfAjakz2p z*skGXd6lthW+td%|K-`U!aOh!FYl%6H*dnEus`seoMmc><&2g;<^;Ot^Sq_;WobSj<>vD@TGIE4Qn75Kxs%p~`L`t59v%G;-vvQ2!zC(H z6Tg)HU-8b??%}FZQZwVkkers$>e+e7JhUS$adDvm+lbaKzL*Q$} zcdK-7Jq8-Ct0_%3=kQ-%J!?lt2U}6Dfr~w%O7}|t% zB;@TwaiJlueK~{G0C&{7cUM+MmX+hq5`rO}Nyts2t3xu_ai+8U>)rom(UV+U-2Vdf z*>K>?WBC3u&8c6W;2|?X#%*`|k#nuXjvkFdzBt5zlpqcU{7@6-C11;!Bk{Ml`wfwB zrhko%A>wz4UR=|5$Q&JS366rc0!4q)|Hsx_M`g7}ZNC;Gs30JXAdPfNr$~1yAP7iz zH;8mgNrQlt(p`dpbT>#h0*Zo^)S1iuo_CDzobR7K#@=J>hv!-Ajv3eWo8ne0e|3a@ zy*{{1eJWc3y#SLQZdV`F)m9j+F0`{X>1x|MZ}ykKszi?`gsC3_(s%Nd6@ zYnQ>hugOMy@x#@+>gvA=fN2XX41SYq356nBHa09Md4O2t7!*_?VPUFJck|?hC5bhA%F0QI^i(CAho==@%Sh#vtO}s)U+I{-q5l-7|bHk_=N=nvN+ng+R=+ z1CX>$dV1J!=~_T%0k3j99D9(vs#ZCvd*8l@@NvHM3kpFU-BQSo$)dqOnlo$*s30J) zP~EP;cAc!2EQmdg`8Jx5dSf?HVfr*BpCZRzu;_7Fw1jEK=PMfB&klE^o_AuQ84-5oTzclCs0l|X^=+!Phus5SzcEq&F(&=lx`R+aXsN~~7fFPD-lRYWr z?H6Z4M;{#T^55^7b&wE_ytOnyrusYNo3vYecnVq-D@MNv%OAW@m7LOK0m7AcOre&p zLB9_FGaDVh|LiJV3}sKGNYAsv!yI4#HKsX1si*Kh$GP!RnnuWe^McV#|Lw%-tXXk9Y^LOV|azZn!T4yN! zb%vMxuyJt%ptuEhIbz|2D{5)9Cupsu_qn^{qnv(Oj=Xsto%DBWt@RBzBz4+5`V-%s zd)huqI3=`~!p@|1dF$qgr(2LvcixxY8(lTrWQTe^TQbr4{9468nzEKQzM@}1jm*Cu zcq{4>_^Oc}3L>IC*)swKX=0@__H7oHSKrHW{wb^6_e(uiyH9YjW<6c+qAIEi5k9*r zsaT+h|RY9>scomtJUbuI`5x^tpS(`|5U;L zM_SHbDj^vIizQ-=#f-yDEH$~NnN^}RP)@SH)!#{RF!QGKlr@27M2;%+93YlGxMNs2r2+wA|faUhu-MnY4sW2 z9x?J)vYejcjlJ*}4Zfb>T_wC2^o}7g-hiJMFv1KsEH`2|paVk(C|L~-Q>Xj*S)*8@yt{|8pOkS`8rkLGRyJQU z;cs-{cpp3(*J>DbQjz~y>DWuiSy1k3?~a3+tNz5q?n<`@DNcPao_gG)iO=q;dnHe% zBFI?Y(2y^LR&_oC_z_4tm7Bf2;41KgNuAV-rL5f4=a*1v0WXH#vS~6l1 zk$*5KKQ*$EscL)0eM=rZVgnlnIG$R8F9KE!M3$%fGfBwirOX9x8#G)2(R&_oN=nw(b2yz_m$i!qAqpc%qLOP*--C4{CcfO+~JR>S1G~VT`ytb zSN6QmMc%n8Hn~98G$z9{jLY_0odYdEMiKTCWP#8{M@3}H;FsUi@c~#1C?Fnd{)G%^ z*vG8L1KgKzwMfg!fr0u(TJezP@EI{n#5Mtbe2*~k?y5}5^e`^Y_NeI}O9E`pB zRahT@(pmum<-7#sE7;UBZ`0EUz%D*CGLi-I`K>jGpg;}q7Fy@osR$m-^~d&hFYF&s zQ076G2%a_i6?mYyFOG9~0N73@Yi_wUa5DSFiux0y?oIJy@;;0OAO`M?pFwoYHEFL61*jl0q9#=_QA7~mUa`8kS$OHV?-f_&_2C3 zxO~B!bkM}f&JN`tA!&K}r;3VE;G$aRJ8YeajgD@Cgc$lBWwm__S3p%90&W#32T(%^ zgOJ1IrTSnfSEdC|ykqFi$h*gC)A5@=YT|R!U%I;A)?207GMV_wU5q%aF8|y&U0{?O zv?f2yWsb(8k^07$=wuo7JmE^*;bHrjOeWp^gA?)5fxC3;U!ZZL3~&c<<=sFmT6NAp zh(HzF!(j`=1W`#zTFrWug1#!sit=(eeY=3ddR4-EJI^9NAiWHmlH-NUWI64oI8_Ln z8A0OaeV&C$guMob76?&5DS<9Pb{p@%RBwKM9!d3nFF*a?RB!x8zw>$@VZF<4u{R`7 zX7n0`!C+GB?tY{>Jw1~Sk(A~%{BL#Fsy@D6=*S7h zg-o;kT)7$khyguWY@#h0;4I|y=m=n{ipVIW$g-qEbudQMA@ zt4AIxLo6MWBC^`P(7u&>;Gvd;jczo2{pd&?&UQil%^<07sD*m+qwIqlGe9T!w) z0XFgf#cjtup%0KnxH(@!8ipx+_Btnr8d|rHc5>0d`{LoI>X(xsuYUL4P`G z%~W4e@ZitKzz>AJ<5OE(+sH}?3*cr5Es$_irCZz569jidxMoXP+MS>D(Piio(209oLet=l8ycV>jM^F0v$piEZ=T6Y~^bKP9Iq*g>f z4qA7_3*>Y58}*w3-V4!td*pCtz&Fr;`SMF4Zv|_SMbFmTaq+Z}tR#oq$;8gf^DVE6 zg}nklMNvo8dl;@9j*z&;i2p5W4xVR2OXJ{{t~C-D`}pYjxcTx#Gv;6QkBt>iZu(;; z$i+|<)S?S}95N!1FzC4=SU_koAVy$-d5VdMd?BuZzU==i67S>z3HHkRy2hd%GdP=$ zkB@a$LgrD~lfvOJ!FN|2J}p)JuW1U7Fn|T*J`KZLR+*Ee;NeLGRR#wbTl)I?c-)ui z^l$1HqCIbm9iy$ZHGi7@)mfZGk%Ct!4LiQDV5|>URs&4*feTYrB9_zX<%b0VaN z-9V_7AfX%_9HkW%+1m*(>MYz77D&%JD#lEX`KxnvXY*#~;-#eQ8FN0)s1zVlOcp^` zZjQ7M6&BJ{0*V1Zo;ea!7OH;;+!wePLb)26U(ITc2Bsmt-%4H!O9e6Ae-Ey*2 zOUEiP+{_P8l0(N#zENrO=?k4PH1c@`>ii+Oz55o5|NdFZ;hHZ|M329qprS&J7o<|S z2xW(3qffl~osfl15%q<+fYr{7@`0TLg>wgv0*TQ!pHgNS$4wfRIo!0f@F}kf>ACWE z{wZ3oXw0y-b+2tGU;kzP?cOv$ABo6u?SE<0kQ9K^09|4NHVi=a1gaBj*tKomoGGl1 zwq^2FwBlF#u8;HcJpGH85d|(0@5<;GEWx$}YvvG-FBLyP=Ga)~_laC8BdXznj~qfr#I~15uB6|o zRqJvX*0wyMNb$SJuF^eFxg+N`vE*RxeT2Ivr{{wj`>@gCKc)QYTQ6yAkmo~5Bg^8W?C#p< z=MD~c&A3N5Urt2CERDOlQbhBFIYI&B>!&~_tseYqG}08?>!PHW?C;_@qNJiUu`SGG z_a?paXSN;U_zbLhmd^iVT+0KU+rf=@K&}BB3%*PaEFu4z-F7xPs3L`@vnE$9Sxn!r z4hb*3!$;&3qWz4TG((D_UE*n*bq?(3n*qxKqO$;W$m70O_3~!7CU&=+>aU^p2Pcc7 zW9Nm^15PMQf@<%-JR&Feg+r!HzwIVySzd2ozCoyC&1ZYJLF^ZxB@ONO%+0%CRR>WX zyVHc)>iDW<{pXOvw~Ty*HKWo~Mhv|fLSE<%kz{F* zJVWj8izYLYdqP_&K+9Np_0g@?h98i#+_-V0)UXrdO76Kpgl`IJ`LsO;u^9J=GXLA% zpBLdjn_}pOvW$eu_%5CW#Kc>TiN?mglRC4ZtQ(yT;(vmNLx!RGX@c5r&RWZAPVUFw zCJIw*Av%;TyT72_oQ6g!2qrZ0xU9a#fw5`?R6Sy!W;@LRo3M<93&~#+AWc8AbOTs@ zs8-$3I4z?+FopQ7w7rn#VGDa#SgmBiX2MA3fQTZAV88th5fz>LY+>G1#v2PCgw@}g z7xs@EGZpYG@_%ax(0QB%F%nSWT$1lDj9>LOAFc{$kdab^Hh{Y5r$|)#_!U!% z{NrpgV=0&MhmIxuZ{H<`7^*wab2+uo&OekSedJ?9?LIFTV=eF`7o|p~)^4L>cXQ}l z)Xk1%Li^UvJP}`2&~TvF5!pwZweg12 zNPat#a~tuWZI?6;lvx6{24L#BA_sTS(Dad9kk=s)swP(eQ@{>zydvEVP}w5?@t?{z zT^Q9I1UHCD7(u*&(ih<-_9R>$vE4*20}x^X3ywn*KtisUMn*;mCJ^d{L4JPMV@>2~ zKCAE*U7AKjziLhR{zQ4cl$=98z~iokX9}O8K~BlME@%_&Mz3Z0oJLf76m;F8Vpe06 zmCMt=!O0la?3?H(ee8-$XGPs4jcad)jr6urv4OB7me<2cZ zWd$4kOL!H)2fhY<5-83{0yRLp=o1vcR@!Clk(kjr?AtTv@>fqbZ!+4<3^RO}e8t@# z-D?#0X+z{BaA1=dzGNKXL2zQ7@NdgWHZ?PBLJS#ab&CM6Q|~|dTj9kjzmrE$mR3~W zf01TRXm-|(+>vGrxP*B&VPGNvuu4gj8a@dLkeUnKAL!}nK`+=JNb~g@+eWkssYtig z6By@T_FrzpQT|lNL-by#77{dcL>NNQ0LWiF{joNqx-@#FsbukW^}ED1?R&Z!@4q0^M|gDs zkzos25Ew#|EOy>1*}_GMq|`!Qe@lUYTv=I(khfsRZaKlK5C^#~LKg(0KsM|WL>q1~ z$&A>dk-ieR1*WU5h%YWK06qkb0;sLfcf3P;^Ggo~ysX;uA{GE(H^?P&`F=o`whW&N zUa@ld`!(Vzu0e2QW(;};kLtwWRjav;JP+V33E98R+s})3{2_!s$4DAq{i?yng{a27 z{kYtjZq=YwM$|R_8VZ6PABcyZJ3oT90Tm_qKsS7f6MxM@x(mF(uYgMiXdeWpZw-4; zp(_s>r<0{1G9W;p!XX1SDittvzzKXM5qi7e7NLef{p-jG;tB^Ty`GLd_KnQfunh=+ zf(uW9QM)?)rWNtMb*zru(-d2@gAa;*uI1L-*(M*Kv||i_heuNO^u*}#BTBzxt8LeM zM{={Hm7&MotIkIF7x}X(hw|r%D@IJl$9OmHc752HX-98?$R5N@V)^KW)8@FCY zm0!;V`TimTxNq^2{v-ct^2dufDF(?E+KgB?O?*QJK{z5?D(aa+FZGOjL!)S&+bPnE zo=X~g64h4|o~f2I($XJf%C69l+1A{<8vAR2VQas8dpRTl_uJwxkf|c&EMV@TGsr>^ z6{QOXc7uSMjyPU;92i6m-SpcO0rHT`;^)V`CP<3aJ?IgNW$qWx&h0ueAmt zn~(%}-4V(#43sbceL{JEZ>_0qwEPjbGV_OO%oP}=j~P>*r8@nC*{3HTc;BVcQf2LID;R- zv`zOufu*}@XC%h20G0Qi!)x@T?UCK3JLT!7`akr=a%^<$4j)U>#F85sMVDT(q%)_= z%P8^4w8{M%Hx5y?sAO7ca_K%t8Uf5g#MgH^C%u>V!oYm7ZymBq77Z zVLam%`egj^osZkTp9|(wZsXq&H6%h+&@9#&=1~P|Lc|0y@X3q6-v)(1o!fqHhtcPd z+8-Ayl#zA6nrgx419v{S)s;ZqD=|nQ@0es-{nR?oT}kz?P@0x=k?cY+loXyiWr2+YY;V5?EMCnN`dAG9K_^VDaT zDZkJyuJyrBgi$FiJ`5k!c~-`&s;U5s49A559M09%&P}@{fER|6U~F>o-+_72!|Kny zr{XbGAQugZjFezb$Sz_^&!cWkv7W7MftwU)K9o?BSxP}$upRpKuv1P^XLvoUNg$g#?~pd#>VG9DA1mX(lf6$$fEuE^1gr8 z2fH|IQqtm!OF%pl!j@$us7D#GZ7^Ejddx7KMJ4{E4T_EX80 zxC?xQ@QMnD^zGc7oalG&a=p_8Lkyv00N|mziG=P~5TQkb2?daPCu!yMKoLQp)8I$K z(^8x|{rO^^7Ad-j`HE!%-*D|D{oZ`~&N)kD+kPhrh{>U_+6@B~2-u8n3>#%nKd6iP zrYV%U;pg1NzUDe!n&U|+8P4b!&Cc!$4zRN3a~b!k2abL0rTOUtc&B_jTo>UO-nh?I zTSy-;@jeVWXY?HUuMB;g^J*@DS0D;~4^0H#5x+NYejOaZ$hD{0p0G`yfTjm7Cjrmn zw@_V2M!O*63!L6NCk$&W6slSI z7o{lFWSUPSNv`MJ37;KN-jwNzjb-EYy19=dsZ2#>_%b21I^ipN1r5oNwYa?G3wkXX zy7_(#JyBN{YM7Z*=U@^+^Mz4q5oUqOf_mZvybj|lQm}C0wpoRE0-7e7BDhh2Sp@1f zl-rVMg50TB-{5`gMSB$atL^6Ul+6jc zy1Dh}(ZWcQ{Q=3)ux5i22 zuil<4;qX3lg+hlMq#`F~!-|$s-v&a0_m9(TWY;8L3~^6n8fcsJjM<6E-p0ltU0&K- zkg9Ve%qbwzULc^DO-qV9;Vs9G_`dL-D*i=(Sc4hhMLvAV_PBfJ4j2xPeCR#l?1JnA zruK<}5G8BU*)CSP zADBZv)CchZuiG9KSVLfRAhC;!3(R*bQYHl;<_4tJ8&Kv0$c=J?vlXN!NT4f<&@+E& zARHgy1B0FH>)S_9YImJ71YM$(Xg5_qiu3E67fI$=7snNv`)z?O`&=L)F@ji$n#N!? zCd4b2;No#f&sq4@xWnS^Rd>?3(#MQHjUj5giGrSr%(7AxKK}}pVnC$6ac`9c3q7bX zv+WiuAw+s}p9LUxjEJuRCMF`#X4r7XzgAmj_ZvIo8)^z1wp8WPrwx`~isXoB3V0St zH{-UhL_37cRNbz-CMOWX`Y8G<#^-v*esk%5^!VpGlMiS?W?lNQkp=xe4{U`fqT>FA0zR*V0LwPFpgXzdtJlu6UEdt{-o z7lJd3Ng_HTOgqdz-rhrt;dLZ8(M4Pqh{su?k$&JbTl#o}>1Qt5o41PaTful&fD9CJ z1l(^-Ow5G?QY)HA0iPv!p;SSNGPEi>%S70cD&MTUudMwCr>~U7NM`M4@l*rx+g}?Q zvnTo}W;J5NRoq{iBpBH>AK;lh-f(%!@okd1Z6LWgaVxCzDq8F5M<`YR{u1U^_OPY! ze1T-pK|b^myuv0JPXNI2yI{$ok~UcU;42_c|TgJ8QDIm?_K&&4eKY44D`70?>NmDO&_9&b~Rt~3Prt3Bn~qm zkmgPd6;y%Rpg4&177-Md{-iQE&x%}s$ILD@W5zDU_>Q4b_^B4SWijwQx>>7Pnx*1^ zFBh^j-IIYfSN%XODKT!MrE$wJi^-87dMr#f^zWZNmUhf|l3{iFP-Gk3kAWZ#3Iujj+lU{);%Fkl%L!f zw{8L4?=^U^k$kVZnz_#%*2jHb=@@7cA*0~HA@aNbt4-}Lk+fwZo>Qbxh)T|r?_17l zmaD;MBX;TD+>!YVnnTsxoz=y6aJH40H`Oz3#PC(_d>UD3z9HW`m31%bDV9H`hVS(} zNZ}CF7UE1sKw1zKfN$K;ly02@>&DWY(~oalAR~=RNWd44xCq&M(4$(QhKt?y=QlrA zFgCT(7LQ8PJL=~_7mLyfHeIg2u6QZ(lMtJ{eCh2+`K>DH-4xmN3iLmhg*Oglo`-a1 z-D;J5FXB52l@vtK2RpAl1tak76#EncO$+FjnjCCeU>Jl4*ebtwZxfb^bM>w7i`Yb6etH z>ijKZ=4&*f$HDjMY!Q!H0qi)y?{xt6H|+wPW+Nbj6Z@13x^#VrECGBd1f`@=8!=&0 zJ^^yBzB|p@{mNVP`BQx;ib1ciCt1k%@&rNe&st;|ku;4mM)zHLbmc50h4(<|FA)(~z_fa~l!cw#4;Ia#|k%RPds%=$oc{7poJ?iJ$ zgMKQHCEDkJAsMRddyN*5o!)&tVLum|qoSjE3SeP2!Z|ho0rFVF>TGgDneZwdhKpDJ zQ6YIf4f@AQreH=xDxF%TwmE;hJIl#$4+Opv)>5}O$;&_;z*{b( zgXPC+Yd-P;4YWJ@kQQHogcuelApn*jdZQad2fXeoGP&d#j6nnq>L1d{6cdd{H!h>) z=BBJSlNj>%3IsLCPeaJ|DHXvUUTXe)=c8739-nAs6F*K&ld+4vJfDWcipT>OOZl&c z%0Qpi^u3k`$q_Q@4=}B;*^!xx$Z#PrVo$(?%_*n*!1pWy>!!YyRRpNsgm67?k&K4% z^xo;)ReL{su0rt4LOCagFxR9ehV_q&ff2jh_TQJ*DZb8sg??_u8ycM5Q^--v(zbGm zoUm`36%oJ#vh*w*)4Y6q_D~jrNsM1WXDuZs_rYW^9jeQqxY7LgSGz{xR)>e`O>#gv zpOLWSIzc_V@<~}}1eel-I-Y@n^~~a~^b?HOSNgs=K1OnP6|>|2@{9JEhf5R5o0>mw zvfLa9Ct@bprVnd)4hMf5xP@7)zZsm1x%JL**Hia`FtP@|o8&8nX9YX`lbr+8Ht%_J838{> ztty&UT-8%q0ZD~>{@3V~7CB)^12U}@-pqrem0a7&xb75H3e$p-%Pe8n+lJ&w9}JL z-fqrvI>vU#rGF{Zh$@j3^mZ;o!LlAzn4#H7U5@&!apyU{(ltGkie>8b+K1V6H1tz*|2YCO{sI5rp>=2Suzq4_aXJn>4;G1?JvuI_bGYb_cAezH(gI@L( zqy&O;8*jDlPWm8S`egQV2vN$z8f#hl6!C@_Apy8voZB)YzxF!(iV#iZ`&1!_z zY9DRADbO^i=#2$KouNkT?{YR0H2gyjV#- zG?JU~cVjGu@~>L4^I_Xjg>0eey_ZtRel=@n$+G+9OB>2Q8LEK+QRvRH;|GpMsa#eA zUA2D&Jk1poh?p6n91$PQGYMCr^O^-kEU;4T&i^_cY{PFEfnf}=r@lf{;{<{dV8Yt_ z(j!F}5Y;32^r1sIRvOX;+{B??NsFa~8UP!bl=LV^w;4kmmlzM2*7m$U_ z6b{!O9Dd<+yft#7cQR9oQl(-%%8+~e;=sbe;Mw&+?jvkbz;3lu=m{!7kY|7}!6%Wr zGW4(+Va^O9V*;=x;w;-!4*_9HIW%89Jw5lT(%=ack7^d%TI%uOMQY}+uRIwWrQ^H1 z85-uSo{BA#9Zw|NJwsgT<#=;cgNcny=)qi9k?lv?BtDuQw(osW(c{A{Z|3{wP8vUQ z+~h;?p7KSUd|i{zVJPBFsDd7B!!Xq$;8-4PL$AUXfFGXf582qFq4j}GF$bx=ZiBU! z77@f1Ta)k@uE3085X?oF*mFgvnJBNGH({TO4}Aqy)=91fS@CS6f3X2)-gZV?iM^)f z^4ySgNUr!VdrQ?PY(IubN8PQ4Ftj2)7d%Jq>`cGLbE71`MrD<@5Nim&p``Pk28F zA0b^PZMxYTuKjhQSXxe+o6+#``%_6)#W;+!kH?{U853W{6h#}b={=cnl*IMchI~K$ z3mE3Ho*vA_{-XfH7e$&>u#}P@1Z%#$;0JF$FHK{k{Te#-IfyoDpa+b6dyxKfY54$3 z1zem`|5(ZJ;=yD1rp#_>a0VuZ{ExQEuIYL!X8aUw>$V(Wb#QQ()U~sls#UKj@1s*o zVP7HSbH3r>)KYR}SgFu2>3m64W-KHZciB}ktm`_H!rHlz){)bW1OQzH`OO~XB`?h= z^n6F5WM&3~dcpclKtxmy)0bcn_;|0+=?`xB0$IwsP>x)gv1wj|v<@mp=9Lk^|4nz< zc@*D6agNQhb+#aBka6qRS?35`3Z@Xo#Gf;aE>sB&Zr$-DWi>jz%vN~ykx^zrC&GRM zgH(IcIIxB4v9M$2$4@8>XR=B)A97q7LxJ(3uC^d}(#@gg8w=VYsQyzkGh=}8^FI31 z$TcKZ9a5@5+eBtsKqVRG#cWl=6a^UT(*p;pw5)7zd^4ggfm9p7#Z!FKG3GQI(_A>= zIw>zn9m04jV(O|39PNl6bFtqVOY3gA#eXLAysY2_+Q<4N{3o~JU_{RqsmTXb;cAk# zb@2&WDI}~qpOYPS#Nd`g+=Ni9gT|=83rslg%-25h`7!6FgL`n%`Fm1-!R6lD7xz$5 zT-s-qugPDLE_D!kjwEC{4BN*f zz26kcKH9x(dK-`+&mL%fd+MT`#c7Vz=K+>jaHw3>?je}URRHpCU3vNeqF38BNQm3l5SBE8>^>e;L z?`#&&W~3W!B$wuEaGW2Qy!TmWAWi>vD=@cGE-^IPOsHC@?)dEQbrj6-q-pniorev! zkK}&nqLF`H(9C=@3L3&OX3<+$=|Wp!!9B0BexNa+;r|CcJ7HxzalZ7++B%x&&j+1W zBHWhRisF}JD$mXzYlUqUR=P*LKvA&VVZZ$}TU1Z6rDx+R(Nl6slkP11?SeTja{uED zwfsdYf2BzXe1HAXbX4)BTp|DtS z70ag|G{0I7>Up%Zhw`FHT?U>xJSj8rdmPvFS9m$=h<{(tT9WB<%iKj7o(U~>s{g%I zA+O}`9C;O1+mEjWjpc;6+-NJ>(0^GRv_0N-yqcElzA8I)<2W1t^pT~)YP!`6p^DU= zy#;gC-u6JuQ}HYeh$Ezz{BIwCRUv}Chb0{p6%=EoNwJ^`G~9|#W%6{)Zc1+S~Hk< zn(HH+cHZgp-YSaJ(S^tti)fblQsk~YJ+q%)Ykl(dU6xT(M^)8nYg`OvVCw|guGjb zHHk-QN#O&@iZoDPOJui5iN)Sq<|S{D4yIpREr<aT-`D^5=FmOEi4el0nfGkAMpCFOf4L97^oZ+LQ2OV8gRl6T1W5BQ z)z9cVf+9PITHoq^4jTGa%#v$r*IXMxTE9>v+d6L zmj5_$W7Y3W>5up9mlL%}!5^1o4fpJ+Oz* zF+rYv_xJk`P)kT&oyGl5eeQA7K~k1Nk3{i$$}Pt7ir;5WOk8Jo?1*q*Pygt*b*D^_ zx8T@a(kAIK6K|%`-LYAcKWI+;#pHia3|30K#(bN=yB(d@!?2k;6Q3v|p{tzUW!FU? z>p#OYUUc(jYVyV7iQH>j{ddAPi(34ir#F?+xEmV_`kIJEj7bmlB zJ^w=J?*-*Mre__yWz9HK6v5qT!qh9yl+5Kd(j#Hc3ovoLz__rf1I{Z*QH?p3va{M!4s1E6^2XvYya8G5_eO z$)v=fOk=BWygFA$;=EO*uYVzITdbSF?QuGko0~0nj3*X+=?m|Tp7p76a5L`UNVe<= z@3sjenU;am!aMKJ63R18T-9#ezvNDc7H9@|Dl0mE?eSF>l*&v-4&J6PRpWl!&g7N9 zO@q@Ea4(Ay`BvBH$8gIp_q_3}^u9*pO1i#1drht9<0oH_*<91x#>f9)*FkRyXF2pq zKHE)AjhGbcfsr@Ng_Hg`!#r>AvbZi;o8&rc!YN!w0nZAZN;9Q*VywusdY=P(k~+qT z{f{oW&FeqaCEu+cnPiwr{;eu%PtQ$%meTES zL+($93Ck+~iuK04@m!sgrPQ=YF~ z?u}oVeO~AlQSt5p0rt}1s(bMs#S8c(6Zj-0<*m-cn@F9TI2(1kXHI~#I19`{=txC#fZHh44AyZXruvg%s>=tbp98yfxZH6jmenNan9 z01mfe>GBS`L1g>eGD<)GG8bEigD;2Am=w~QL)v!D#%di1O7JkAaWw`j{}l4B*Gr&A zQ7yY`*_l7Vb<4g&xq(6Kmh#%a4;$X96xR!-*|NOf?Gno-Y%VBgz z;Q2Y!OCTY+_7J87!bOjRg_UXiTugxLsWOlIIUU15@!|%tu%TOL40r3o`!H>n3!70_ zqN-iFCnb+-=L8~m#|Q1u++U~tRy@l!89c>2b(m}xj<&ky);1^cqxcrNzw0%1Kcun^ zIdB`uE?|4Xh;@*&eC_N68d$!unOK~RsQ%T45oau?$Dw1V1-0`o?e+=Oo!L>N=K~KP z8@CUC%Ml(gA^m#Pp^|F8&^gcO%c=AHefU?ZS~5TFlNEiIe~*P-iHipctyNQd9=o4A z;fI|-Ws-^~1f7Xe*e_y(yPU@Dt_$gGrh@1Lk|u|T?uwk&1L=3H`O(7oNo#e(ItCvS zdR%VvJWJmQUvBd}_gS1iqo(mcerdt{-^UlQI-~+65a~Jd^6~-$PQW?k?96j-e?ROU zbU&^RnOB)sePb{A9_96}vx(u;M{k8a@J14dkzOn z>XEBtONEDwG(>wu7=wf^L8OHW6)2wECQ1xw9FX`23SF#zgMN<>5? z#EOi2c@@TPL9yeaJliKo;}d+2ZI(HVDS+v?og;?XN?W-(RN=T-CIgpT z{DgjYTiV!|1_J{l2VLRg!bg}j1M{BoUqfvS8^{cx;X7FlW0A20DP?$BnO%tUlP7KV zpIH-*E*P>mPEuC|`wUM@W}dq_Qzx*qC=ZsAhJ9mfRUE{$VX@I6nI*i>7jmy8*Hv#m zXfW-m+9xNx>oa0a>fh}77u9vmXTpUr*3DgV#9Y&b8N`B3J~bB8Dpn|t`;GdHes^q_ zi1&BZ__;o3)K+)5Q1SWT=SGjpBg?@ny;n)5Twn9~-VIqu8kia&Q|_I=zT12HtEnb& z%U61JhM-)bgoD#9B0&qqkQP~Y%XK;Cw#n>9=uqYdbp{NRZij9%GSm>Zka`89a=T7X z?bnYV)IEKEdvKa=F3tG4qZ~Y|eo07Xb4s8oiK$bewQOu4vyn(3@h_ z#dJ)d^}J0kj+GcXL8qT3fm8KK@#6%h0_|ro zq0wS`(sX&&fAE`E;Ixf#-$kb-3^|aP#5+gBB#HFNyuDOE>S}D&R`lD+K>ixf>M2$x zmKVWy(@f;170Z`;6YE+$H}JW?-eG&{ZMiRr6~==`zS8&o*TQ!R_b2jera;KXeE9PY zdYT+@Lh=|-4T)kyY%S;Ke*IAi63pZ$u0aZ%HFPId3gqxqqi)dt){*&~Afu*opWkOL zBr~1-v&G^1+BX)%!=XEXc*3~!3{@4TW~Xyx&m8y6xAP*KcO$kneb<7CS0ruvI1r;q zOW&LdSq5TfDez&lO87Un;YW&i5K^YPevEsM%sk<@zm-pr? zPiyg&q8DN6`vTsTXr1hi0!tBF+5gT;F@eAvw94PjoV2o7o+58xwV`{gvZ(u%X+AGo zQ*wYRRsZE~!gQG-KeO{s$6KWSG}Iw~n}1{n$}$GBZ1QZ4-9Cz3`L5Ng`R{;-T#F|} zhN0nMN5{Kj<)GSgN)1rxlZJj??>ti_k>n39G`Ry)+RUXX=K-IexJl~Xp>|a ze(`s3@T6I}M74h+LNqfrq_nd2XddINF}7B1FlwJ%)|B8|nATr+`Xh;3w=7z8QHI62JB6iR>DZb}VP@-1WZ0 zwWBI^*Q{&2+Txb^EM9~}6FIH_y(~+JdvIkWyl)Z@+8|Hwr$Fr!;+n^DR($b+<3w4* zrtoow9e}-xhXc>a(Csf@_p5kVkG=2>wDYcaQKBF}@d|dTc#_KffMf07$OF5siSg3JpfmVA)}MlDRAQWztRLW= z?XIlJtKc@~9=DwRuH4KlG_8G$H9WU8o@M-I;g_+zB=hroVz8amwk5M98DFth+P7!h!k!7 zbgs!Y`&Hkydr*pdedD#s9210^zE;{L8?>er5ra`Se+Z{s_I@gGP!s(n%v>KF2@njV}F{ILB|ff-Sh z7b=JYU-=>#PR)d36TJ=H3qelE*qiWQ&wX#`9$mxodX$WcNj5axKzVzHk?Kp+v8l49 zC8Ld^caWFxPEK+Js?gHavCmH7KQ)5bybEX*1ii00pmNny!$TF|LQ_~~39!Ln8Fm)F z6w+3yxz99c!`1xtG0Wjj?|RXKN`HqR7L&w&OpBMf(Alj|c7CPbYYD=cdaJCF-&+Vf z%g4;Rw@!|qJwhy}^edlNgb*x}M_Q4f533ytJQn0;YufGz#=nSay6N)U;o@ooRyLc> z0WJYS!c-Kz3uQvs>WUv!9ZhdnJtKN<58onk`X{ICZiiy}Os$n&Kvmi#_Dixa2wGP_ zpGcTAG*dfv>Cc`;x723~SGU|nPV;LW5t_XmiO3ppclY@$eUilQ!VEH_n5A}R0mfj=Affn$S! zko^s}{O-trqBJFS9S_dDcKrA)Em7}im8`id}P!(oakDBAV`a6IpQY6T;(MPSM> zR4>inY2>`#$J3i9P4At#;kN{{=b77ngT7AL8ZLtdNFl{_bVy*@Dt(!kx3>_? z@P670oy9ArRa%u=R!NgbokG2a3%JUV1eXkY_a{48l(JRpJ9k-9G*TR!?s!Se~>!+#6J@u%pdddPp>8>erN|bUI7VzOYqf&w~8s zW?F9U1Z3DjbhK}L{5Vcc5o?Qd-Y8-h0U5&}D2quO`d1`Ufguk9%>g~-T141>-2nqUr$|WaM8fl;{+6YAnHWu0rI_!!Y{NLJGhDAp!tdYX~e{V4IV~e!%eK3l&0T+}9 zf^aZ?o4?!FVHm^RJ8$1TSskNYHFY4+b>Z>5j%sLpY-&CA#ERUH^0Fn_Da^L|_e;x_ z1h@9}n-|v^%Lr=IQkqlO12Uhq+_%0lku^Mx^6QFar+1P4Ao5a8B*$Twj?saMq4#LP zCkk360GPM|bAgEfoX#cdQT%m2e64K?Q@SZd&Lm?X+=jKuloV$Ebxt>Pqns{~{PKJj zR2G4XZRT_RvP+~-?=R6Gvb%cqV>Y2bTCz>UFRX2EVl+}-#zj@$ymB;xHPqw5y#l;`} zH;uY4wa%?8$9`sa%)DUTe!NwNS;$#d{^)N{ZhDhYAa*ix)UZLChGX>q6n5tERBmk_ zSDJ7dOqEn9vydTkoJ^Tg+YpV)SagmGsfd#y^HGM(Lw05|L?}a~ka;R}LS+aUibS&a z^IM(gd56#Ed7rmG8pOWuweEXe>so7Fzwb{Hxndq$m1u08Y{~FRrE9M0hpBbLbJ=qr z`;!(r50Z{@r+R2U?yvUr*^&0n?EsbX#GORWnygLX&$N24cBrJyie$zs#a2&J$5a-U zSA5`}x5*#f|D(?N*?ku^CIiHNO%a`o*9}Q0dG~BoaQ$|&;2N=gwE2|5#GkmoI=Iq; zePNI}E>8K~lD21Xtc%%`3*-_;cLq%wPBlEPsR?K4b-iYRpIWq4c$k1$i+?7(KLm(A^k-@`Tb9FqBHZU9mQ?Ghjx`KfRau|mSL0FDT5jQ6rR>|* zq~>+WBZTR9n;`ldT;5z16xeLX zy-_9ClbS%2U+|FHapN09u|Br4_!7l%k2fu6qqlpTcb;FLsJ-u+W7A62AGX_CT=c8vxp7GI^|nKvvY+c zxq8{R$9uQdq<%c6oO!B{OlJZ*7qiY-I=+ym*SZ@`rM5j!XOVOZ9*oRat+h}H2nt!t z&h0YXZJ5{$+-%eGjMNMo#oY7C)1rC}~!2j7^TAhiqu(;di`pe;$9{r|-bkby;Qk z)7N7vZH%GxrIjIf0{(d+WD$`+Mpe>~Fl>rtH*8W?j!sl%-NNmfCOg&Su>5Ies}xtr z&3k`;_ev(iEQL!SR?S+CI??*C)if!wyVv;b)-@{)zk^Iw>2sgMz0!k~H?=J+CXLY3 z>pt`FE9}>xD~2}gZf2yi?&kRh%t5fE->FwuH zc9qtqUqttW=N>t#X8k&HM`=%~%>BFaZy&#{D|@5<_xrzEs5)~y=Q#7C#8+k25IRTr zK*+U4O~0qdYBrWpJGj@0yX!NF0-6mWM zx;P3IayC{7R%#ZvJ+O}wCc!D4zdAGcR7-i2S2&A8<>b0!aSs(;zp6Ztn=*gftKNFv zJKwR;Xp-$`>*RGWitawP`q&=jhrx_~)P`lj(4Jc3(!NreTvM|blJUVeo{jwTW9fHR zrPJGaW}_{+Ov*gIM=1JfC?wVaE0F~)~ujuWZ5bTd|JFL;wooh*BYgN^WbcBT9y2* zgb$AfZ!BstGdmkhv;{@nPm10#o8Tje>%CV*_VKczfe+k;R@EhgW@%d2WY5x2%%YRn zysB}(d!%jup@olc9fg}pczcu*Yn|$%D_mtZOLTWjn38t2{*hHOzH8SmXhx1a^kMVp zs|Xp8v{(_cCinEFs(zKmsqi;Cm7D#tLzH))@RRwGR3TMdVEV*_G*!ha(*955f0N{F z-#z8Vk~~jc&FDK>(AeN{yE;FAY?WJy=3%Pe%`&Z+9eIU|)B)3Cddl3;!(BeYV#7D~ zUyEHnnl&g3D3)z=tZB-AC8cdhR-_eg-K##ZbF*gh$1!j1XKnMHQt|}|-G16;{x{!k zxH9=tG~CM677iKqQM{r|i#hA~Ug}|HzU<_rUy-X#;YdB1byaU$$Sqh$x1o4m?d`~= z-G*>d0%mV5HWsC`1-?&6mU8eMNqL*?!M5X82{r*Z1tXa~W3@Q&xtz z1|o@$`1X}wMur_sBCz6V(uYm5ScO2Xfy;n?5%;!jcP9h7emJz0r?|~6snN3A!>br* zsZFhASATD?(^^M4`f`HgX(nG!xNt@~t#|x|S$c2FlVH9lw6dXCKSy$V}weZ3A`6z>_ChW02aZD}r)-zyu@#!ucr7jL3H&fsZm>9;_c^TZ7S(%{l2b{Qb^y}a@t1le|=`P`rOf*w5!{a(_ZMHF3KH>7CDe8vMgazi5hJ?eu^4OJog7GuzI| z5?A9%BNJnD^Lpj!a_^E7`Lwh&0ARdPd=oWZq56K&1^FRyZ$<;~K0+;ew?|M>F$PKr zQU(hPG^~V1u)p0}Y0jtV`v@r!#GZjWFLSZ6)j%t{qSWo11PJKW0|PwB##B^PK>ObV z1y5^vi{A3kj2$1y3t)hHB`5P1>r=W>xk8RvAaw_n<}$GQ@U-?qu`S$hOrNOS3yY86 z4zd@QlCyBd^{=7@G;CeNG{_5;DN+uv_JE$R09K5kprELUm`_GM5?ewe#=1f{P#rZg zL{bmWbuCIMDx!IA%&`J`3K1PKX(px1u=!3rpYTP&OQ^o3B@KjvAG1kxYgGJqWt)Y9 z6q9-CuIOXuG4*2mZgc3*DJM_?Cbe&$4Y;PFM+HThL$pb@i_*M=lQ9_?&Pv#Zo4SvH11G-^Sh+z-!{erW7=yw1>{&ikdShAY zcl-A3y5B*dA`2av^@n&%#%Wy;E-1Q;a{vx%A7#X6kf;)aH3)=5z95poG^G4nzZ;>3 zPIhRnh8=@=;lqb9=sr?|4gW_V2mDYTN>m9Qpe_{abEuo_4G4fQyU*0M+C9UdI~x z7Qz<4lM3=0Rp8hHA$`T_YtYXA1&I)>~GDgRRiKcfm^8JPv~p%0+>`>_B4u z*6%hAqpbtzS5zPpydoy+tW&W^C|9n?!M;SR5!Hz(Fgasq=jZ3Q78Sk8&_%)oC=kGY zJhNwb|8QvX$8{|hKGFMdrP##EdK2G>@JG(hQ06X^k_o3ex#o47Xcp02pi?a^v zlcDL@->EdL_^;^S`v1-c|0AmYoKh%QmXP4dr>Bd;jsFUsCRbOlUcK=B`(>E5BR0K7 z85Tn3Usaw}U7l%BCZbxt>_;FfkM0ce*E*fU`VZL3ea&p=}FU6Qb=6mnU$u%Rj&1wzjilJWOMG3(^6i&GICT_{(2) zbL&K~W}CqIX(k*F5QQ<~s%`|2_5y%}QryDwh!g(OyeNGbjR*)}njLK&g(k}LaY%~r zFI)E#l41wo^5`!V3|oTsg@V4l*j=%FvlbB+M%}PSswH9f(FHEYimyk@<~1lBgh+tH z!TFxAfN|D#cNd>@xEki=wX*xqQ^#?>2D7&U)Gih4v!--lqq){83*18}S!6Kq)!q`v zLhz*%A*G*bQJ!vt7b2eiIVf<*y=@~8rhhHL|H6k;d6X2bS+96lvxJ-{VU+8YuS zM6f>?1gZruN=p?aPM7OoZ0qN|sc@4Ab-;bD{a~unBW>zgKt&K0&)C}shK6oLS|IwU z84`+*o#`9pLC$g#lUKo;oD(G*Ktu)f3;j&L zabmhECN@^}tAC?Vl6wvMV*vspi094%!Vl%d^4L5C0BMN&h%%cU=!l_Lzjm7)16>oC znN(F8CU6Z*NN`Y>#VEaxYIlAiA;#oxdm@VikpXNGr@4<5P(+{(wJPwyj#ma6N21c(}m7%zrX zdNj}&4vJaW3IyIoYq6DQ3ANrLfO3n?+tr0|r$@)diDBzgk6f|Ot||H+0GnIHlT@x5 z!RY14Y4^`sS`?09IP#b<4CCqo<3RP9sL~d!@MRn1OnQi#oC$_zS(U-u@`kAF{jESQ z!7v>+ONrU<_9#U-FbpDZ20eH8AMD>&3*)KodG!h#JHrqsn{jAS@P?#9&AKI9Tusda zGrlQIxUz*`Qk_oZ9^Z%QgdR{AD`j=d($+Q&@osbRq3FE_&i55%k`GQu$VCmYC`t`P zj$0f#l3e=uu_%xG%uW0ssh}6YgxTXEbST#3lgdVJIRgOVGz)&)z@wT21)vD!k1{Ejf%IH<|MVGjEsyFN@SI^%eV=G41C(Ix7SXqVakC&=FQAZ zkwdYve83VzfjXBtPPF=%InfZAE1%2}5RyulAqU+nB_;Cek#z$bz+2I{CF333y!`z4 zYrpV}j-W#Jep;I4`SSso-Vwiu?ma514#NK+ngO1Vl(h8sMk~N=IJrffE?pv!?x)l{ zyK;{k5p4UnZInbT1?1K)BjN(DX*=?k@UOh;c|tgof3izC7C zpoa&|;+441g3}uNExbO$7TA#9rJ@p#X$ZdDgO(EzROsmHio6>6_|czA-Hq`nFh4v! z-HOq&7lyS6ROVOn3kz400t{~MI7`nd!kK!t045vIM`lTr`1>-lO(}W#x~{GwgHTCL)7`rb)lF8Q zxW@{2uwBig+}w?*N_BPLfyF4tFOev{?u2@a#gl> zxdMUD5+})H>1>VA5Utti`xssp*!K6y%X2KS@LW|@NVo^M2Zif%=O}TgML}W?VFLra z=bCXkl4$M;5UY-!o+u@c{0RLZfwO_@E7_^Kj>1V+9j3qHjev(6FTo=f%;TOuzjYmG zhg~1k$LcV2MKVmRLviEDd3hG&RtRL$?h6Uz1Kc5c zz!zC7_|SF8k3fo$kkb3=hg1c*sUlK+=#y3ihTdlYW|JYb#y>9Z4Zyi5JOXDY1#>7) z5N=@x%XeSA%s^%tJYyZy?vonrOHNMYW~M8b{zUU=!<9+5kVxfK(Ug!AJ#A^(0540` zg+WHF$%A%Q?8Opc^B(i3#7;rgT>;18Ipzf^JsvcLDy+|K7iQieMukU2XaYV{@Fs&q zY7mNTj#h0ljSNOzsaE;Y)DwZ<+0V-YmEQ9d_tsPFumy?Cvw?vmWN7bE2ZUgmIKCBt zJ3(N-bi)N%w{ES+Ne~k=vnAdbt%0_OMMXp28Kj+XuXuV`SC3qTK=o7?HB2us0q zs0KqTZp^#FXDA2{hMj|>76Lewi=S#=7a9}8h4dG}+Z#DNQNLq)*GAsao?BlC{g}%2 zZ&S@;dFATlA;Xme6A9QpD??YlM?yoKh^wosNj6eWr!767XT?Xuaf&QXutRYlPud_! zwW8-?E~bMQm`nh4(~0?Zz&}fy`eSsV1k2}Nk#P{4Gqi4Vh{W=a|K8gu6nnrHEqk5~ zkHrc+2>YmAnyEqsqV>Y)raYh4?gk z?=-wq_ZQWmB?$X0STy7+{cFAapEA4uyj{>iXZ#W;q2u#wmQ^j BDf0jT literal 0 HcmV?d00001 From 8e418aa952525f4c371e5128deca6a4b5f10e0c6 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 15 Apr 2024 15:15:06 +0200 Subject: [PATCH 103/171] Improve plot resolution --- src/itwinai/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 437188d6..9c846da5 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -132,7 +132,7 @@ def scalability_report( sp_up_ax.grid() plot_png = f"scaling_plot_{plot_title}.png" plt.tight_layout() - plt.savefig(plot_png, bbox_inches='tight', format='png') + plt.savefig(plot_png, bbox_inches='tight', format='png', dpi=300) print("Saved scaling plot to: ", plot_png) if archive is not None: From ae816c09e396274507d39ebbc1741592a30d9938 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 15 Apr 2024 16:32:51 +0200 Subject: [PATCH 104/171] UPDATE scaling test --- .gitignore | 2 +- .../distributed-ml/torch-scaling-test/runall.sh | 15 +++++++++------ .../distributed-ml/torch-scaling-test/slurm.sh | 6 +++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 67a6c670..03431546 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ exp_data/ # Custom envs .venv* -envAI_hdfml* +envAI_* # Logs logs/ diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 78f1241e..a186c504 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -14,6 +14,9 @@ else T=$2 fi +# Common options +CMD="--nodes=$N --time=$T --account=intertwin --partition=batch slurm.sh" + echo "Distributing training over $N nodes. Timeout set to: $T" rm *.out *.err *.csv #*checkpoint.pth.tar @@ -22,34 +25,34 @@ rm *.out *.err *.csv #*checkpoint.pth.tar DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD # DeepSpeed baseline DIST_MODE="deepspeed" RUN_NAME="deepspeed-bl-imagenent" TRAINING_CMD="deepspeed_trainer.py -c config/base.yaml -c config/deepspeed.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD # Horovod baseline DIST_MODE="horovod" RUN_NAME="horovod-bl-imagenent" TRAINING_CMD="horovod_trainer.py -c config/base.yaml -c config/horovod.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD # DDP itwinai DIST_MODE="ddp" RUN_NAME="ddp-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/ddp.yaml -s ddp" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD # DeepSpeed itwinai DIST_MODE="deepspeed" RUN_NAME="deepspeed-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/deepspeed.yaml -s deepspeed" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD # Horovod itwinai DIST_MODE="horovod" RUN_NAME="horovod-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/horovod.yaml -s horovod" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" --nodes=$N --time=$T slurm.sh \ No newline at end of file +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/slurm.sh b/tutorials/distributed-ml/torch-scaling-test/slurm.sh index d3391788..e47ce5b3 100644 --- a/tutorials/distributed-ml/torch-scaling-test/slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/slurm.sh @@ -1,5 +1,7 @@ #!/bin/bash +# SLURM jobscript for JSC systems + # Job configuration #SBATCH --job-name=distributed_training #SBATCH --account=intertwin @@ -23,7 +25,9 @@ ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py # Activate Python env -source ../../../envAI_hdfml/bin/activate +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" +source ../../../envAI_${sysN}/bin/activate # Job info echo "DEBUG: TIME: $(date)" From c8a3617cee4a3bdad40f28becf68c5efbe3d5904 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 15:34:50 +0200 Subject: [PATCH 105/171] UPDATE launcher scripts --- .gitignore | 1 + pyproject.toml | 3 +- src/itwinai/cli.py | 8 ++--- .../torch-scaling-test/runall.sh | 15 ++++---- .../torch-scaling-test/scaling-test.sh | 2 +- .../torch-scaling-test/slurm.sh | 34 +++++++++++-------- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 03431546..dd495607 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ s*.png *.pdf *_logs +logs_* TODO /data nohup* diff --git a/pyproject.toml b/pyproject.toml index 15637745..10b54316 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,8 @@ dependencies = [ "jsonargparse[signatures]", "pyyaml", "omegaconf", + "rich>=13.5.3", + "typer>=0.9.0", # "wandb>=0.15.11", # "mlflow>=2.7", # "jsonargparse[signatures]>=4.17.0", @@ -49,7 +51,6 @@ dependencies = [ # torch = [] # tf = [] distributed = ["deepspeed>=0.13.1", "horovod[tensorflow,keras,pytorch]>=0.28.1"] -cli = ["rich>=13.5.3", "typer>=0.9.0"] dev = [ "pytest>=7.4.2", "pytest-mock>=3.11.1", diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 9c846da5..49d1d7bd 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -42,7 +42,7 @@ def scalability_report( records in sub-folders. Example: - >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \ + >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \\ >>> --plot-title "Some title" --logy --archive archive_name """ # TODO: add max depth and path different from CWD @@ -51,7 +51,7 @@ def scalability_report( import shutil import pandas as pd import matplotlib.pyplot as plt - import numpy as np + # import numpy as np regex = re.compile(r'{}'.format(pattern)) combined_df = pd.DataFrame() @@ -124,8 +124,8 @@ def scalability_report( sp_up_ax.legend(ncol=1) sp_up_ax.set_xticks(df["NGPUs"].values) - sp_up_ax.set_yticks( - np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1)) + # sp_up_ax.set_yticks( + # np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1)) sp_up_ax.set_ylabel('Speedup') sp_up_ax.set_xlabel('NGPUs (4 per node)') diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index a186c504..973d59b5 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -16,43 +16,46 @@ fi # Common options CMD="--nodes=$N --time=$T --account=intertwin --partition=batch slurm.sh" +PYTHON_VENV="../../../envAI_hdfml" echo "Distributing training over $N nodes. Timeout set to: $T" +rm -rf logs_slurm +mkdir logs_slurm rm *.out *.err *.csv #*checkpoint.pth.tar # DDP baseline DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD # DeepSpeed baseline DIST_MODE="deepspeed" RUN_NAME="deepspeed-bl-imagenent" TRAINING_CMD="deepspeed_trainer.py -c config/base.yaml -c config/deepspeed.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD # Horovod baseline DIST_MODE="horovod" RUN_NAME="horovod-bl-imagenent" TRAINING_CMD="horovod_trainer.py -c config/base.yaml -c config/horovod.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD # DDP itwinai DIST_MODE="ddp" RUN_NAME="ddp-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/ddp.yaml -s ddp" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD # DeepSpeed itwinai DIST_MODE="deepspeed" RUN_NAME="deepspeed-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/deepspeed.yaml -s deepspeed" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD # Horovod itwinai DIST_MODE="horovod" RUN_NAME="horovod-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/horovod.yaml -s horovod" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" --job-name="$RUN_NAME-n$N" $CMD \ No newline at end of file +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh index dbec90e1..29a32705 100644 --- a/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh +++ b/tutorials/distributed-ml/torch-scaling-test/scaling-test.sh @@ -3,7 +3,7 @@ rm *checkpoint.pth.tar *.out *.err *.csv timeout="03:30:00" -for N in 1 2 4 8 +for N in 1 2 4 8 16 32 64 128 do bash runall.sh $N $timeout echo diff --git a/tutorials/distributed-ml/torch-scaling-test/slurm.sh b/tutorials/distributed-ml/torch-scaling-test/slurm.sh index e47ce5b3..93dd4349 100644 --- a/tutorials/distributed-ml/torch-scaling-test/slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/slurm.sh @@ -24,13 +24,11 @@ # Load environment modules ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py -# Activate Python env -sysN="$(uname -n | cut -f2- -d.)" -sysN="${sysN%%[0-9]*}" -source ../../../envAI_${sysN}/bin/activate - # Job info echo "DEBUG: TIME: $(date)" +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" +echo "Running on system: $sysN" echo "DEBUG: EXECUTE: $EXEC" echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" @@ -56,27 +54,33 @@ fi # Env vairables check if [ -z "$DIST_MODE" ]; then - >&2 echo "ERROR: \$DIST_MODE env variable is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" + >&2 echo "ERROR: env variable DIST_MODE is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" exit 1 fi if [ -z "$RUN_NAME" ]; then - >&2 echo "WARNING: \$RUN_NAME env variable is not set. It's a way to identify some specific run of an experiment." + >&2 echo "WARNING: env variable RUN_NAME is not set. It's a way to identify some specific run of an experiment." RUN_NAME=$DIST_MODE fi if [ -z "$TRAINING_CMD" ]; then - >&2 echo "ERROR: \$TRAINING_CMD env variable is not set. It's the python command to execute." + >&2 echo "ERROR: env variable TRAINING_CMD is not set. It's the python command to execute." exit 1 fi +if [ -z "$PYTHON_VENV" ]; then + >&2 echo "WARNING: env variable PYTHON_VENV is not set. It's the path to a python virtual environment." +else + # Activate Python virtual env + source $PYTHON_VENV/bin/activate +fi # Launch training if [ "$DIST_MODE" == "ddp" ] ; then echo "DDP training: $TRAINING_CMD" srun --cpu-bind=none --ntasks-per-node=1 \ --job-name="$RUN_NAME-n$SLURM_NNODES" \ - --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ bash -c "torchrun \ - --log_dir='logs' \ + --log_dir='logs_torchrun' \ --nnodes=$SLURM_NNODES \ --nproc_per_node=$SLURM_GPUS_PER_NODE \ --rdzv_id=$SLURM_JOB_ID \ @@ -92,8 +96,8 @@ elif [ "$DIST_MODE" == "deepspeed" ] ; then srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ --job-name="$RUN_NAME-n$SLURM_NNODES" \ - --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ python -u $TRAINING_CMD --deepspeed # # Run with deepspeed launcher: set --ntasks-per-node=1 @@ -109,8 +113,8 @@ elif [ "$DIST_MODE" == "horovod" ] ; then echo "HOROVOD training: $TRAINING_CMD" srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ --job-name="$RUN_NAME-imagenet-n$SLURM_NNODES" \ - --output="job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="job-$RUN_NAME-n$SLURM_NNODES.err" \ + --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ + --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ python -u $TRAINING_CMD else >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" From 82eb1ae41b2fac115cb65a01d74e874f2563facc Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 16:52:01 +0200 Subject: [PATCH 106/171] FIX linter --- .github/workflows/lint.yml | 2 ++ .vscode/settings.json | 3 --- src/itwinai/cli.py | 2 +- src/itwinai/tensorflow/trainer.py | 1 + src/itwinai/torch/trainer.py | 12 ++++++------ tests/components/test_components.py | 2 +- tutorials/distributed-ml/jube-tutorial/README.md | 2 +- .../torch-scaling-test/config/base.yaml | 4 ---- .../torch-scaling-test/deepspeed_trainer.py | 2 +- .../distributed-ml/torch-scaling-test/runall.sh | 4 ++-- 10 files changed, 15 insertions(+), 19 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 379229a3..8eca0a3c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -44,6 +44,8 @@ jobs: VALIDATE_HTML: false VALIDATE_GITLEAKS: false VALIDATE_BASH_EXEC: false + VALIDATE_CHECKOV: false # activate to lint k8s pods + VALIDATE_SHELL_SHFMT: false # Only check new or edited files VALIDATE_ALL_CODEBASE: false diff --git a/.vscode/settings.json b/.vscode/settings.json index 58e902ec..6f581e8c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -54,9 +54,6 @@ "[python]": { "editor.defaultFormatter": "ms-python.autopep8" }, - // "[markdown]": { - // "editor.formatOnSave": false - // }, "python.testing.pytestArgs": [ "tests" ], diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 49d1d7bd..275d853a 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -42,7 +42,7 @@ def scalability_report( records in sub-folders. Example: - >>> itwinai scalability-report --pattern="^epoch.+\.csv$" --skip-id 0 \\ + >>> itwinai scalability-report --pattern="^epoch.+\\.csv$" --skip-id 0 \\ >>> --plot-title "Some title" --logy --archive archive_name """ # TODO: add max depth and path different from CWD diff --git a/src/itwinai/tensorflow/trainer.py b/src/itwinai/tensorflow/trainer.py index ce6bab15..d8c40012 100644 --- a/src/itwinai/tensorflow/trainer.py +++ b/src/itwinai/tensorflow/trainer.py @@ -7,6 +7,7 @@ from ..components import Trainer, monitor_exec from itwinai.tensorflow.distributed import get_strategy + def import_class(name): components = name.split('.') mod = __import__(components[0]) diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index 141ed32f..f0ad1c03 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -26,12 +26,12 @@ from ..loggers import LogMixin, Logger, ConsoleLogger from ..utils import dynamically_import_class from ..cluster import ClusterEnvironment -from .distributed import ( - TorchDistributedStrategy, - DDPDistributedStrategy, - DSDistributedStrategy, - HVDDistributedStrategy -) +# from .distributed import ( +# TorchDistributedStrategy, +# DDPDistributedStrategy, +# DSDistributedStrategy, +# HVDDistributedStrategy +# ) def preproc_dataloader(dataloader: DataLoader, gwsize, grank): diff --git a/tests/components/test_components.py b/tests/components/test_components.py index 364b4917..3ec55453 100644 --- a/tests/components/test_components.py +++ b/tests/components/test_components.py @@ -105,7 +105,7 @@ def test_adapter(): assert result == (0, 0, 0, 0) adapter = Adapter( - policy=[f"{prefix}{i%2}" for i in range(4)] + policy=[f"{prefix}{i % 2}" for i in range(4)] ) result = adapter.execute(0, 1, 2, 3) assert result == (0, 1, 0, 1) diff --git a/tutorials/distributed-ml/jube-tutorial/README.md b/tutorials/distributed-ml/jube-tutorial/README.md index 0f564c99..8dc6514c 100644 --- a/tutorials/distributed-ml/jube-tutorial/README.md +++ b/tutorials/distributed-ml/jube-tutorial/README.md @@ -1,3 +1,3 @@ # Benchmarking tutorial using JUBE -TODO: complete \ No newline at end of file +TODO: complete diff --git a/tutorials/distributed-ml/torch-scaling-test/config/base.yaml b/tutorials/distributed-ml/torch-scaling-test/config/base.yaml index 344d4237..3cbadd07 100644 --- a/tutorials/distributed-ml/torch-scaling-test/config/base.yaml +++ b/tutorials/distributed-ml/torch-scaling-test/config/base.yaml @@ -14,7 +14,3 @@ shuff: False # Reproducibility rnd_seed: 10 - -# Distributed ML -verbose: False - diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index ba353025..691712e8 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -94,7 +94,7 @@ def train(args, model, train_loader, optimizer, epoch, grank, gwsize): print( f'Train epoch: {epoch} [{batch_idx * len(data)}/' f'{len(train_loader.dataset) / gwsize} ' - f'({100.0 * batch_idx *len(data) / len(train_loader):.0f}%)]' + f'({100.0 * batch_idx * len(data) / len(train_loader):.0f}%)]' f'\t\tLoss: {loss.item():.6f}') t_list.append(timer() - t) loss_acc += loss.item() diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 973d59b5..4f9efdcf 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -15,8 +15,8 @@ else fi # Common options -CMD="--nodes=$N --time=$T --account=intertwin --partition=batch slurm.sh" -PYTHON_VENV="../../../envAI_hdfml" +CMD="--nodes=$N --time=$T --account=atmo-rep --partition=booster slurm.sh" +PYTHON_VENV="../../../envAI_juwels" echo "Distributing training over $N nodes. Timeout set to: $T" From b3b6b1e77f0a2c55264f94a54789c4c2995a5049 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 17:02:22 +0200 Subject: [PATCH 107/171] REMOVE jube tutorial --- .../jube-tutorial/DDP_trainer.py | 479 ------------------ .../distributed-ml/jube-tutorial/README.md | 3 - .../jube-tutorial/general_jobsys.xml | 142 ------ .../distributed-ml/jube-tutorial/jube_ddp.sh | 61 --- 4 files changed, 685 deletions(-) delete mode 100644 tutorials/distributed-ml/jube-tutorial/DDP_trainer.py delete mode 100644 tutorials/distributed-ml/jube-tutorial/README.md delete mode 100644 tutorials/distributed-ml/jube-tutorial/general_jobsys.xml delete mode 100644 tutorials/distributed-ml/jube-tutorial/jube_ddp.sh diff --git a/tutorials/distributed-ml/jube-tutorial/DDP_trainer.py b/tutorials/distributed-ml/jube-tutorial/DDP_trainer.py deleted file mode 100644 index 08fcbf98..00000000 --- a/tutorials/distributed-ml/jube-tutorial/DDP_trainer.py +++ /dev/null @@ -1,479 +0,0 @@ -""" -Scaling test of torch Distributed Data Parallel on Imagenet using Resnet. -""" -import argparse -import sys -import os -import time -import random -import numpy as np -import logging - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -import torchvision -from torchvision import datasets, transforms - -import argparse - -#from itwinai.parser import ArgumentParser as ItAIArgumentParser -#from itwinai.loggers import EpochTimeTracker - - -def pars_ini(): - parser = argparse.ArgumentParser(description='itwinai - parsed arguments') - - # IO parsers - parser.add_argument('--data-dir', default='./', - help=('location of the training dataset in the ' - 'local filesystem')) - parser.add_argument('--restart-int', type=int, default=10, - help='restart interval per epoch (default: 10)') - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') - - # model parsers - parser.add_argument('--batch-size', type=int, default=64, - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, - help='learning rate (default: 0.01)') - parser.add_argument('--momentum', type=float, default=0.5, - help='momentum in SGD optimizer (default: 0.5)') - parser.add_argument('--shuff', action='store_true', default=False, - help='shuffle dataset (default: False)') - - # debug parsers - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run with seed (default: False)') - parser.add_argument('--nseed', type=int, default=0, - help='seed integer for reproducibility (default: 0)') - parser.add_argument('--log-int', type=int, default=10, - help='log interval per training') - parser.add_argument('--benchrun', - action='store_true', default=True) - - # parallel parsers - parser.add_argument('--backend', type=str, default='nccl', - help='backend for parrallelisation (default: nccl)') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader ' - '(default: 0 - only main)')) - parser.add_argument('--prefetch', type=int, default=2, - help='prefetch data in DataLoader (default: 2)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables GPGPUs') - - args = parser.parse_args() - - if args.verbose: - args_list = [f"{key}: {val}" for key, val in args.items()] - print("PARSED ARGS:\n", '\n'.join(args_list)) - return args - - -def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): - model.train() - lt_1 = time.perf_counter() - loss_acc = 0 - if grank == 0: - print("\n") - for batch_idx, (data, target) in enumerate(train_loader): - # if grank == 0: - # print(f"BS == DATA: {data.shape}, TARGET: {target.shape}") - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: - print( - f'Train epoch: {epoch} [{batch_idx * len(data)}/' - f'{len(train_loader.dataset)/gwsize} ' - f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: ' - f'{loss.item():.6f}') - - loss_acc += loss.item() - if grank == 0: - logging.info('epoch time: {:.2f}'.format(time.perf_counter()-lt_1)+' s') - return loss_acc - - -def test(model, device, test_loader, grank, gwsize): - model.eval() - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) - correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset) - if grank == 0: - print( - f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) - return acc_test - - -def save_state( - epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, is_best -): - """Save training state.""" - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = par_allgather_obj(is_best, gwsize) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - - -def seed_worker(worker_id): - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def par_allgather_obj(obj, gwsize): - """Gathers any object from the whole group in a list (to all workers)""" - res = [None]*gwsize - dist.all_gather_object(res, obj, group=None) - return res - - -def main(): - # get parse args - args = pars_ini() - - # check CUDA availibility - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # get directory - program_dir = os.getcwd() - - # start the time.time for profiling - st = time.time() - - # initializes the distributed backend which will take care of synchronizing - # nodes/GPUs - if torch.cuda.is_available(): - dist.init_process_group(backend=args.backend) - - # deterministic testrun - if args.testrun: - torch.manual_seed(args.nseed) - g = torch.Generator() - g.manual_seed(args.nseed) - - # get job rank info - rank==0 master gpu - if torch.cuda.is_available(): - # local world size - per node - lwsize = torch.cuda.device_count() if args.cuda else 0 - gwsize = dist.get_world_size() # global world size - per run - grank = dist.get_rank() # global rank - assign per run - lrank = dist.get_rank() % lwsize # local rank - assign per node - else: - gwsize = 1 - grank = 0 - - # some debug - if grank == 0: - print('TIMER: initialise:', time.time()-st, 's') - print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:', sys.version, '\n') - - print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.restart_int:', args.restart_int, '\n') - - print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - print('DEBUG: args.lr:', args.lr) - print('DEBUG: args.momentum:', args.momentum) - print('DEBUG: args.shuff:', args.shuff, '\n') - - print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:', args.testrun) - print('DEBUG: args.nseed:', args.nseed) - print('DEBUG: args.log_int:', args.log_int, '\n') - - print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.nworker:', args.nworker) - print('DEBUG: args.prefetch:', args.prefetch) - print('DEBUG: args.cuda:', args.cuda) - print('DEBUG: args.benchrun:', args.benchrun, '\n') - - # encapsulate the model on the GPU assigned to the current process - device = torch.device( - 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) - if args.cuda: - torch.cuda.set_device(lrank) - # deterministic testrun - if args.testrun: - torch.cuda.manual_seed(args.nseed) - - # dataset - # Initialize transformations for data augmentation - transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - transforms.RandomRotation(degrees=45), - transforms.ColorJitter( - brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - - # Load the ImageNet Object Localization Challenge dataset - train_dataset = datasets.ImageFolder( - root=args.data_dir, - transform=transform - ) - # test_dataset = ... - - # restricts data loading to a subset of the dataset exclusive to the - # current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - # test_sampler = torch.utils.data.distributed.DistributedSampler( - # test_dataset, num_replicas=gwsize, rank=grank, - # shuffle=args.shuff) - - # distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False - - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size, - # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - # persistent_workers=pers_w, prefetch_factor=args.prefetch, - # **kwargs) - else: - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size) - # test_loader = torch.utils.data.DataLoader( - # test_dataset, batch_size=args.batch_size) - - if grank == 0: - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model - model = torchvision.models.resnet152().to(device) - - # distribute model to workers - if torch.cuda.is_available(): - distrib_model = nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device) - else: - distrib_model = model - - # optimizer - # optimizer = torch.optim.Adam(distrib_model.parameters(), lr=args.lr) - optimizer = torch.optim.SGD( - distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - - # resume state - start_epoch = 1 - best_acc = np.Inf - nnod = os.environ.get('SLURM_NNODES', 'unk') - res_name = f'ddp-{nnod}N-checkpoint.pth.tar' - if os.path.isfile(res_name) and not args.benchrun: - try: - if torch.cuda.is_available(): - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) - else: - checkpoint = torch.load(program_dir+'/'+res_name) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: restarting from {start_epoch} epoch') - else: - print(f'WARNING: restarting from {start_epoch} epoch') - except Exception: - if torch.cuda.is_available(): - if grank == 0: - print('WARNING: restart file cannot ' - 'be loaded, restarting!') - else: - print('WARNING: restart file cannot be loaded, restarting!') - - if start_epoch >= args.epochs: - if torch.cuda.is_available(): - if grank == 0: - print('WARNING: given epochs are less than the one in the' - ' restart file!\n' - 'WARNING: SYS.EXIT is issued') - dist.barrier() - dist.destroy_process_group() - sys.exit() - else: - print('WARNING: given epochs are less than the one in the ' - 'restart file!\n' - 'WARNING: SYS.EXIT is issued') - sys.exit() - - # start trainin/testing loop - if grank == 0: - print('TIMER: broadcast:', time.time()-st, 's') - print('\nDEBUG: start training') - print('--------------------------------------------------------') - #epoch_time_tracker = EpochTimeTracker(series_name="ddp-bl") - - et = time.time() - for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training - if args.benchrun and epoch == args.epochs: - # profiling (done on last epoch - slower!) - with torch.autograd.profiler.profile(use_cuda=args.cuda, - profile_memory=True) as prof: - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) - else: - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) - - # # testing - # acc_test = test(distrib_model, device, - # test_loader, grank, gwsize, args) - - # save first epoch timer - if epoch == start_epoch: - first_ep_t = time.time()-lt - - # final epoch - if epoch + 1 == args.epochs: - train_loader.last_epoch = True - # test_loader.last_epoch = True - - if grank == 0: - print('TIMER: epoch time:', time.time()-lt, 's') - #epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) - # print('DEBUG: accuracy:', acc_test, '%') - if args.benchrun and epoch == args.epochs: - print('\n----------------------------------------------------') - print('DEBUG: benchmark of last epoch:\n') - what1 = 'cuda' if args.cuda else 'cpu' - print(prof.key_averages().table( - sort_by='self_'+str(what1)+'_time_total')) - - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0 and not args.benchrun: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best) - # reset best_acc - best_acc = min(loss_acc, best_acc) - - # finalise - # save final state - if not args.benchrun: - save_state(epoch, distrib_model, loss_acc, - optimizer, res_name, grank, gwsize, True) - if torch.cuda.is_available(): - dist.barrier() - - # some debug - if grank==0: - print(f'\n--------------------------------------------------------') - logging.info('training results:') - logging.info('first epoch time: {:.2f}'.format(first_ep_t)+' s') - logging.info('last epoch time: {:.2f}'.format(time.time()-lt)+' s') - logging.info('total epoch time: {:.2f}'.format(time.time()-et)+' s') - logging.info('average epoch time: {:.2f}'.format((time.time()-et)/done_epochs)+' s') - if epoch>1: - logging.info('total epoch-1 time: {:.2f}'.format(time.time()-et-first_ep_t)+' s') - logging.info('average epoch-1 time: {:.2f}'.format((time.time()-et-first_ep_t)/(args.epochs-1))+' s') - if args.benchrun: - tot_ep_tm2 = tot_ep_t - first_ep_t - last_ep_t - logging.info('total epoch-2 time: {:.2f}'.format(lt-first_ep_t)+' s') - logging.info('average epoch-2 time: {:.2f}'.format((lt-first_ep_t)/(args.epochs-2))+' s') - # memory on worker 0 - if args.cuda: - logging.info('memory req: '+str(int(torch.cuda.max_memory_reserved(0)/1024/1024))+' MB') - logging.info('memory summary:\n'+str(torch.cuda.memory_summary(0))) - - # timer for current epoch - if grank==0: - logging.info('epoch time: {:.2f}'.format(time.perf_counter()-lt_1)+' s') - - if grank == 0: - print(f'TIMER: final time: {time.time()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - #epoch_time_tracker.save( - # csv_file=f"epochtime_ddp-bl_{nnod}N.csv") - - print(f" - TRAINING FINISHED") - - # clean-up - if torch.cuda.is_available(): - dist.barrier() - dist.destroy_process_group() - - -if __name__ == "__main__": - main() - sys.exit() diff --git a/tutorials/distributed-ml/jube-tutorial/README.md b/tutorials/distributed-ml/jube-tutorial/README.md deleted file mode 100644 index 8dc6514c..00000000 --- a/tutorials/distributed-ml/jube-tutorial/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Benchmarking tutorial using JUBE - -TODO: complete diff --git a/tutorials/distributed-ml/jube-tutorial/general_jobsys.xml b/tutorials/distributed-ml/jube-tutorial/general_jobsys.xml deleted file mode 100644 index 273b2224..00000000 --- a/tutorials/distributed-ml/jube-tutorial/general_jobsys.xml +++ /dev/null @@ -1,142 +0,0 @@ - - - - General benchmark script - - - - - 1,2,4,8 - - 8 - - DDP_trainer.py - - - - - if [ -f /etc/FZJ/systemname ]; then cat /etc/FZJ/systemname | tr -d "\n"; else uname -n | head -c 3; fi - sbatch - $iterNO - $iterNW - ready - jube_ddp.sh - - { "hdfml": 4, - }["${systemname}"] - - intertwin - - 00:30:00 - - { "hdfml": "batch", - }["${systemname}"] - - - 00:10:00 - - { "hdfml": "batch", - }["${systemname}"] - - - - - { - "hdfml": "ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py", - }["${systemname}"] - - source /p/project/intertwin/rakesh/T6.5-AI-and-ML/bench/../envAI_hdfml/bin/activate - { - "hdfml": "export CUDA_VISIBLE_DEVICES=0,1,2,3" - }["${systemname}"] - - - - - - $job_file - $script - - - - - - - - - - - - - - - - - - - - - paramset - executeset - envirset - files,sub_job - echo "nID: $jube_wp_id" - - $submit_cmd $job_file - - - - - - ${jube_wp_id} - ${nodes} - ${nnw} - \s*INFO: total epoch-2 time:\s+$jube_pat_wrd\s* - \s*INFO: average epoch-2 time:\s+$jube_pat_wrd\s* - ${avgEpochT}/${nodes} - \s*INFO: memory req:\s+$jube_pat_wrd\s* - ${memory}/1024 - - - - - pattern - - stdout - job.out - - - - - - analyse - - ID - Nnodes - Nworkers - calcTime - avgEpochT - Naet - memoryGPU -
-
- - - - analyse - - ID - Nnodes - Nworkers - calcTime - avgEpochT - Naet - memoryGPU -
-
- -
-
- - - diff --git a/tutorials/distributed-ml/jube-tutorial/jube_ddp.sh b/tutorials/distributed-ml/jube-tutorial/jube_ddp.sh deleted file mode 100644 index 2c6b2446..00000000 --- a/tutorials/distributed-ml/jube-tutorial/jube_ddp.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=JUBE_DDP -#SBATCH --account=#ACC# -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err -#SBATCH --time=#TIMELIM# - -# configure node and process count on the CM -#SBATCH --partition=#QUEUE# -#SBATCH --nodes=#NODES# -#SBATCH --ntasks-per-node=#NGPU# -#SBATCH --cpus-per-task=#NW# -#SBATCH --gpus-per-node=#NGPU# -#SBATCH --exclusive - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../dist_trainer_v2/envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -# launch training -TRAINING_CMD="#SCRIPT#" - -srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - $TRAINING_CMD" From ac1bfe57ffc2d3632389028c2f5edb0ba7ec09f7 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 17:13:45 +0200 Subject: [PATCH 108/171] Restore ConfigParser --- src/itwinai/parser.py | 177 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index 24c521cd..0001627b 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -5,11 +5,186 @@ import logging import os -from typing import List, Type, Union, Optional +from typing import Dict, Any, List, Type, Union, Optional from jsonargparse import ArgumentParser as JAPArgumentParser from jsonargparse import ActionConfigFile from jsonargparse._formatters import DefaultHelpFormatter +import json +from omegaconf import OmegaConf +from pathlib import Path + +from .components import BaseComponent +from .pipeline import Pipeline +from .utils import load_yaml + + +def add_replace_field( + config: Dict, + key_chain: str, + value: Any +) -> None: + """Replace or add (if not present) a field in a dictionary, following a + path of dot-separated keys. Adding is not supported for list items. + Inplace operation. + Args: + config (Dict): dictionary to be modified. + key_chain (str): path of nested (dot-separated) keys to specify the + location + of the new value (e.g., 'foo.bar.line' adds/overwrites the value + located at config['foo']['bar']['line']). + value (Any): the value to insert. + """ + sub_config = config + for idx, k in enumerate(key_chain.split('.')): + if idx >= len(key_chain.split('.')) - 1: + # Last key reached + break + + if isinstance(sub_config, (list, tuple)): + k = int(k) + next_elem = sub_config[k] + else: + next_elem = sub_config.get(k) + + if not isinstance(next_elem, (dict, list, tuple)): + sub_config[k] = dict() + + sub_config = sub_config[k] + if isinstance(sub_config, (list, tuple)): + k = int(k) + sub_config[k] = value + + +class ConfigParser: + """ + Parses a pipeline from a configuration file. + It also provides functionalities for dynamic override + of fields by means of nested key notation. + Args: + config (Union[str, Dict]): path to YAML configuration file + or dict storing a configuration. + override_keys (Optional[Dict[str, Any]], optional): dict mapping + nested keys to the value to override. Defaults to None. + Example: + >>> # pipeline.yaml file + >>> pipeline: + >>> class_path: itwinai.pipeline.Pipeline + >>> init_args: + >>> steps: + >>> - class_path: dataloader.MNISTDataModuleTorch + >>> init_args: + >>> save_path: .tmp/ + >>> + >>> - class_path: itwinai.torch.trainer.TorchTrainerMG + >>> init_args: + >>> model: + >>> class_path: model.Net + >>> loss: + >>> class_path: torch.nn.NLLLoss + >>> init_args: + >>> reduction: mean + >>> from itwinai.parser import ConfigParser + >>> + >>> parser = ConfigParser( + >>> config='pipeline.yaml', + >>> override_keys={ + >>> 'pipeline.init_args.steps.0.init_args.save_path': /save/path + >>> } + >>> ) + >>> pipeline = parser.parse_pipeline() + >>> print(pipeline) + >>> print(pipeline.steps) + >>> + >>> dataloader = parser.parse_step(0) + >>> print(dataloader) + >>> print(dataloader.save_path) + """ + + config: Dict + pipeline: Pipeline + + def __init__( + self, + config: Union[str, Dict], + override_keys: Optional[Dict[str, Any]] = None + ) -> None: + self.config = config + self.override_keys = override_keys + if isinstance(self.config, (str, Path)): + self.config = load_yaml(self.config) + self._dynamic_override_keys() + self._omegaconf_interpolate() + + def _dynamic_override_keys(self): + if self.override_keys is not None: + for key_chain, value in self.override_keys.items(): + add_replace_field(self.config, key_chain, value) + + def _omegaconf_interpolate(self) -> None: + """Performs variable interpolation with OmegaConf on internal + configuration file. + """ + conf = OmegaConf.create(self.config) + self.config = OmegaConf.to_container(conf, resolve=True) + + def parse_pipeline( + self, + pipeline_nested_key: str = "pipeline", + verbose: bool = False + ) -> Pipeline: + """Merges steps into pipeline and parses it. + Args: + pipeline_nested_key (str, optional): nested key in the + configuration file identifying the pipeline object. + Defaults to "pipeline". + verbose (bool): if True, prints the assembled pipeline + to console formatted as JSON. + Returns: + Pipeline: instantiated pipeline. + """ + pipe_parser = JAPArgumentParser() + pipe_parser.add_subclass_arguments(Pipeline, "pipeline") + + pipe_dict = self.config + for key in pipeline_nested_key.split('.'): + pipe_dict = pipe_dict[key] + # pipe_dict = self.config[pipeline_nested_key] + pipe_dict = {"pipeline": pipe_dict} + + if verbose: + print("Assembled pipeline:") + print(json.dumps(pipe_dict, indent=4)) + + # Parse pipeline dict once merged with steps + conf = pipe_parser.parse_object(pipe_dict) + pipe = pipe_parser.instantiate_classes(conf) + self.pipeline = pipe["pipeline"] + return self.pipeline + + def parse_step( + self, + step_idx: Union[str, int], + pipeline_nested_key: str = "pipeline", + verbose: bool = False + ) -> BaseComponent: + pipeline_dict = self.config + for key in pipeline_nested_key.split('.'): + pipeline_dict = pipeline_dict[key] + + step_dict_config = pipeline_dict['init_args']['steps'][step_idx] + + if verbose: + print(f"STEP '{step_idx}' CONFIG:") + print(json.dumps(step_dict_config, indent=4)) + + # Wrap config under "step" field and parse it + step_dict_config = {'step': step_dict_config} + step_parser = JAPArgumentParser() + step_parser.add_subclass_arguments(BaseComponent, "step") + parsed_namespace = step_parser.parse_object(step_dict_config) + return step_parser.instantiate_classes(parsed_namespace)["step"] + class ArgumentParser(JAPArgumentParser): def __init__( From ce5e011e9786256d686f92c06144e72bca5aef01 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 17:17:53 +0200 Subject: [PATCH 109/171] FIX type hinting --- src/itwinai/parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index cbd930be..0001627b 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -5,7 +5,7 @@ import logging import os -from typing import List, Type, Union, Optional +from typing import Dict, Any, List, Type, Union, Optional from jsonargparse import ArgumentParser as JAPArgumentParser from jsonargparse import ActionConfigFile from jsonargparse._formatters import DefaultHelpFormatter @@ -186,7 +186,6 @@ def parse_step( return step_parser.instantiate_classes(parsed_namespace)["step"] - class ArgumentParser(JAPArgumentParser): def __init__( self, From 2ceb9322a5080cc60525f6c78abb96219c5eb321 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 17:29:11 +0200 Subject: [PATCH 110/171] ADD dev dependencies --- Makefile | 2 -- env-files/torch/createEnvJSC.sh | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 52183fd2..9883659d 100644 --- a/Makefile +++ b/Makefile @@ -11,13 +11,11 @@ torch-gpu-jsc: env-files/torch/createEnvJSC.sh tf-gpu-jsc: env-files/tensorflow/createEnvJSCTF.sh sh env-files/tensorflow/createEnvJSCTF.sh - # Install PyTorch env (CPU only) torch-cpu: env-files/torch/pytorch-env-cpu.yml micromamba env create -p ./.venv-pytorch --file env-files/torch/pytorch-env-cpu.yml -y micromamba run -p ./.venv-pytorch python -m pip install -e .[dev] - # Install TensorFlow 2.13. Creates ./.venv-tf folder. # Ref: https://www.tensorflow.org/install/pip#step-by-step_instructions tf-2.13: env-files/tensorflow/tensorflow-2.13.yml diff --git a/env-files/torch/createEnvJSC.sh b/env-files/torch/createEnvJSC.sh index 6b0fa226..68cf292c 100644 --- a/env-files/torch/createEnvJSC.sh +++ b/env-files/torch/createEnvJSC.sh @@ -185,7 +185,7 @@ done # Install itwinai pip install --upgrade pip -pip install -e . +pip install -e .[dev] # cleanup rm -rf horovod *.tar.gz From 6dd74caca05729f8d6cf6bfe92f8f71fafbb4ee3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 16 Apr 2024 18:41:50 +0200 Subject: [PATCH 111/171] REMOVE experimental scripts --- experimental/cli/example.yaml | 9 - experimental/cli/itwinai-conf.yaml | 14 - experimental/cli/itwinaicli.py | 29 -- experimental/cli/mycode.py | 35 -- experimental/cli/parser-bk.py | 46 --- experimental/cli/parser.py | 29 -- experimental/cluster.py | 97 ----- experimental/distrib_launcher.py | 117 ------ experimental/distributed_tools.py | 68 ---- experimental/example_0.py | 125 ------ experimental/example_1.py | 106 ----- experimental/example_2.py | 107 ----- experimental/example_3.py | 77 ---- experimental/launcher.py | 295 -------------- experimental/launcher_factory.py | 144 ------- experimental/strategy.py | 150 ------- experimental/trainer/DS_config.json | 15 - experimental/trainer/general_startscript | 135 ------- experimental/trainer/general_trainer.py | 482 ----------------------- experimental/workflow/train.yaml | 53 --- src/itwinai/experimental/executors.py | 127 ------ 21 files changed, 2260 deletions(-) delete mode 100644 experimental/cli/example.yaml delete mode 100644 experimental/cli/itwinai-conf.yaml delete mode 100644 experimental/cli/itwinaicli.py delete mode 100644 experimental/cli/mycode.py delete mode 100644 experimental/cli/parser-bk.py delete mode 100644 experimental/cli/parser.py delete mode 100644 experimental/cluster.py delete mode 100644 experimental/distrib_launcher.py delete mode 100644 experimental/distributed_tools.py delete mode 100644 experimental/example_0.py delete mode 100644 experimental/example_1.py delete mode 100644 experimental/example_2.py delete mode 100644 experimental/example_3.py delete mode 100644 experimental/launcher.py delete mode 100644 experimental/launcher_factory.py delete mode 100644 experimental/strategy.py delete mode 100644 experimental/trainer/DS_config.json delete mode 100755 experimental/trainer/general_startscript delete mode 100755 experimental/trainer/general_trainer.py delete mode 100644 experimental/workflow/train.yaml delete mode 100644 src/itwinai/experimental/executors.py diff --git a/experimental/cli/example.yaml b/experimental/cli/example.yaml deleted file mode 100644 index ef6a342e..00000000 --- a/experimental/cli/example.yaml +++ /dev/null @@ -1,9 +0,0 @@ -server: - class_path: mycode.ServerOptions - init_args: - host: localhost - port: 80 -client: - class_path: mycode.ClientOptions - init_args: - url: http://${server.init_args.host}:${server.init_args.port}/ \ No newline at end of file diff --git a/experimental/cli/itwinai-conf.yaml b/experimental/cli/itwinai-conf.yaml deleted file mode 100644 index 0cb662df..00000000 --- a/experimental/cli/itwinai-conf.yaml +++ /dev/null @@ -1,14 +0,0 @@ -pipeline: - class_path: itwinai.pipeline.Pipeline - steps: [server, client] - -server: - class_path: mycode.ServerOptions - init_args: - host: localhost - port: 80 - -client: - class_path: mycode.ClientOptions - init_args: - url: http://${server.init_args.host}:${server.init_args.port}/ \ No newline at end of file diff --git a/experimental/cli/itwinaicli.py b/experimental/cli/itwinaicli.py deleted file mode 100644 index 6a22bfb1..00000000 --- a/experimental/cli/itwinaicli.py +++ /dev/null @@ -1,29 +0,0 @@ -""" ->>> python itwinaicli.py --config itwinai-conf.yaml --help ->>> python itwinaicli.py --config itwinai-conf.yaml --server.port 333 -""" - - -from itwinai.parser import ConfigParser2 -from itwinai.parser import ItwinaiCLI - -cli = ItwinaiCLI() -print(cli.pipeline) -print(cli.pipeline.steps) -print(cli.pipeline.steps['server'].port) - - -parser = ConfigParser2( - config='itwinai-conf.yaml', - override_keys={ - 'server.init_args.port': 777 - } -) -pipeline = parser.parse_pipeline() -print(pipeline) -print(pipeline.steps) -print(pipeline.steps['server'].port) - -server = parser.parse_step('server') -print(server) -print(server.port) diff --git a/experimental/cli/mycode.py b/experimental/cli/mycode.py deleted file mode 100644 index 5da07624..00000000 --- a/experimental/cli/mycode.py +++ /dev/null @@ -1,35 +0,0 @@ -# from dataclasses import dataclass -from itwinai.components import BaseComponent - - -class ServerOptions(BaseComponent): - host: str - port: int - - def __init__(self, host: str, port: int) -> None: - self.host = host - self.port = port - - def execute(): - ... - - -class ClientOptions(BaseComponent): - url: str - - def __init__(self, url: str) -> None: - self.url = url - - def execute(): - ... - - -class ServerOptions2(BaseComponent): - host: str - port: int - - def __init__(self, client: ClientOptions) -> None: - self.client = client - - def execute(): - ... diff --git a/experimental/cli/parser-bk.py b/experimental/cli/parser-bk.py deleted file mode 100644 index 8f87bf37..00000000 --- a/experimental/cli/parser-bk.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Provide functionalities to manage configuration files, including parsing, -execution, and dynamic override of fields. -""" - -from typing import Any -from jsonargparse import ArgumentParser, ActionConfigFile, Namespace - -from .components import BaseComponent - - -class ItwinaiCLI: - _parser: ArgumentParser - pipeline: BaseComponent - - def __init__( - self, - pipeline_nested_key: str = "pipeline", - args: Any = None, - parser_mode: str = "omegaconf" - ) -> None: - self.pipeline_nested_key = pipeline_nested_key - self.args = args - self.parser_mode = parser_mode - self._init_parser() - self._parse_args() - pipeline_inst = self._parser.instantiate_classes(self._config) - self.pipeline = pipeline_inst[self.pipeline_nested_key] - - def _init_parser(self): - self._parser = ArgumentParser(parser_mode=self.parser_mode) - self._parser.add_argument( - "-c", "--config", action=ActionConfigFile, - required=True, - help="Path to a configuration file in json or yaml format." - ) - self._parser.add_subclass_arguments( - baseclass=BaseComponent, - nested_key=self.pipeline_nested_key - ) - - def _parse_args(self): - if isinstance(self.args, (dict, Namespace)): - self._config = self._parser.parse_object(self.args) - else: - self._config = self._parser.parse_args(self.args) diff --git a/experimental/cli/parser.py b/experimental/cli/parser.py deleted file mode 100644 index f400466f..00000000 --- a/experimental/cli/parser.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Example of dynamic override of config files with (sub)class arguments, -and variable interpolation with omegaconf. - -Run with: ->>> python parser.py - -Or (after clearing the arguments in parse_args(...)): ->>> python parser.py --config example.yaml --server.port 212 -See the help page of each class: ->>> python parser.py --server.help mycode.ServerOptions -""" - -from jsonargparse import ArgumentParser, ActionConfigFile -from mycode import ServerOptions, ClientOptions - -if __name__ == "__main__": - parser = ArgumentParser(parser_mode="omegaconf") - parser.add_subclass_arguments(ServerOptions, "server") - parser.add_subclass_arguments(ClientOptions, "client") - parser.add_argument("--config", action=ActionConfigFile) - - # Example of dynamic CLI override - # cfg = parser.parse_args(["--config=example.yaml", "--server.port=212"]) - cfg = parser.parse_args() - cfg = parser.instantiate_classes(cfg) - print(cfg.client) - print(cfg.client.url) - print(cfg.server.port) diff --git a/experimental/cluster.py b/experimental/cluster.py deleted file mode 100644 index 78ae8ead..00000000 --- a/experimental/cluster.py +++ /dev/null @@ -1,97 +0,0 @@ -import abc -import os -import time - -from lightning.pytorch.plugins.environments import ( - ClusterEnvironment as LightningClusterEnvironment, - SLURMEnvironment as LightningSLURMEnvironment, - TorchElasticEnvironment as LightningTorchElasticEnvironment, - LightningEnvironment -) - - -class ClusterEnvironment(LightningClusterEnvironment): - @abc.abstractmethod - def num_nodes(self) -> int: - """Returns the number of nodes allocated for the current job.""" - - @abc.abstractmethod - def job_id(self) -> str: - """Returns the current job ID inferred from the cluster.""" - - -class SLURMEnvironment(LightningSLURMEnvironment): - def num_nodes(self) -> int: - """Returns the number of nodes allocated for the current job.""" - if os.environ.get('SLURM_JOB_NUM_NODES'): - return int(os.environ['SLURM_JOB_NUM_NODES']) - if os.environ.get('SLURM_NNODES'): - return int(os.environ['SLURM_NNODES']) - raise RuntimeError('Number of nodes not found in SLURM env variables') - - def job_id(self) -> str: - """Returns the current job ID inferred from the cluster.""" - return os.environ['SLURM_JOB_ID'] - - -class TorchElasticEnvironment(LightningTorchElasticEnvironment): - def num_nodes(self) -> int: - """Returns the number of nodes allocated for the current job.""" - gwsize = int(os.environ['WORLD_SIZE']) - lwsize = int(os.environ['LOCAL_WORLD_SIZE']) - return gwsize//lwsize - - def job_id(self) -> str: - """Returns the current job ID inferred from the cluster.""" - return os.environ['TORCHELASTIC_RUN_ID'] - - -class LocalEnvironment(LightningEnvironment): - - _job_id: str = None - - def world_size(self) -> int: - # if os.environ.get('WORLD_SIZE'): - # return int(os.environ.get('WORLD_SIZE')) - print( - "WARNING: world_size() method in 'LocalEnvironment' returns " - f"a fixed-value placeholder world_size={self._world_size}. " - "Use it carefully!" - ) - return self._world_size - - def global_rank(self) -> int: - # if os.environ.get('RANK'): - # return int(os.environ.get('RANK')) - print( - "WARNING: global_rank() method in 'LocalEnvironment' returns " - f"a fixed-value placeholder global_rank={self._global_rank}. " - "Use it carefully!" - ) - return self._global_rank - - def num_nodes(self) -> int: - """Returns the number of nodes allocated for the current job.""" - return 1 - - def job_id(self) -> str: - """Returns the current job ID inferred from the cluster.""" - if self._job_id is None: - self._job_id = str(time.time()) - return self._job_id - - -def detect_cluster() -> ClusterEnvironment: - """Defines a protocol to select the ClusterEnvironment - depending on availability and priority. - """ - - if SLURMEnvironment.detect(): - cluster = SLURMEnvironment() - elif TorchElasticEnvironment.detect(): - cluster = TorchElasticEnvironment() - elif LocalEnvironment.detect(): - cluster = LocalEnvironment() - else: - raise NotImplementedError("Unrecognized cluster env") - return cluster diff --git a/experimental/distrib_launcher.py b/experimental/distrib_launcher.py deleted file mode 100644 index d8f4e881..00000000 --- a/experimental/distrib_launcher.py +++ /dev/null @@ -1,117 +0,0 @@ -import os - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset - -from strategy import Strategy, DDPStrategy -from launcher import DummyTorchElasticLauncher, TorchElasticLauncher -from launcher_factory import ( - LauncherFactory, - SimpleLauncherFactory, - TorchElasticLauncherFactory -) -from distributed_tools import DistributedTooling - - -class UniformRndDataset(Dataset): - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn(a, strategy: Strategy): - """Dummy training function.""" - strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -LAUNCHER = 'torch-elastic-no' -STRATEGY = 'ddp' - -RUN_ID = "my_run_id" -MIN_NODES = 1 -MAX_NODES = 1 -NPROC_PRE_NODE = 4 -MAX_RESTARTS = 2 - -if __name__ == "__main__": - # # STRATEGY BUILDER - - # # Instantiate Launcher Factory - # # launcher = DummyTorchElasticLauncher( - # # n_workers_per_node=NPROC_PRE_NODE, - # # min_nodes=MIN_NODES, - # # max_nodes=MAX_NODES - # # ) - # # launcher = TorchElasticLauncher( - # # rdzv_id=RUN_ID, - # # nproc_per_node=NPROC_PRE_NODE, - # # nnodes=f"{MIN_NODES}:{MAX_NODES}", - # # max_restarts=MAX_RESTARTS - # # ) - # if LAUNCHER == 'torch-elastic': - # launcher_builder: LauncherFactory = TorchElasticLauncherFactory() - # else: - # launcher_builder: LauncherFactory = SimpleLauncherFactory() - - # # Instantiate launcher - # launcher = launcher_builder.createLauncher( - # n_workers_per_node=NPROC_PRE_NODE - # ) - - # # Instantiate Strategy - # if (STRATEGY == 'ddp' - # and torch.cuda.is_available() - # and torch.cuda.device_count() > 1): - # strategy = DDPStrategy(cluster=None, backend='nccl') - # else: - # raise NotImplementedError - - dist_tools = DistributedTooling(n_workers_per_node=NPROC_PRE_NODE) - launcher, strategy = dist_tools.getTools('ddp') - - # CLIENT CODE - # Launch training from launcher - launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/distributed_tools.py b/experimental/distributed_tools.py deleted file mode 100644 index 83bf241f..00000000 --- a/experimental/distributed_tools.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Tuple -import abc - -from launcher import Launcher -from strategy import Strategy, DDPStrategy -from launcher_factory import TorchElasticLauncherFactory - - -class Assembler(abc.ABC): - """Abstract Assembler class.""" - - -class DistributedTooling(Assembler): - """ - Assembles a set of objects used to enable distributed ML. - Suggests working presets of Launcher and Strategy, providing - an easy entry point for the end user. - """ - - def __init__(self, n_workers_per_node: int = 1) -> None: - super().__init__() - self.n_workers_per_node = n_workers_per_node - - def getTools(self, strategy: str) -> Tuple[Launcher, Strategy]: - if strategy == 'ddp': - return self.getTorchDDPTools() - if strategy == 'deepspeed': - return self.getDeepSpeedTools() - if strategy == 'horovod': - return self.getHorovodTools() - raise ValueError(f"Unrecognized strategy={strategy}") - - def getTorchDDPTools(self) -> Tuple[Launcher, Strategy]: - """ - Returns a suggested preset of Launcher + Strategy - for torch distributed data parallel. - """ - import torch - if not torch.cuda.is_available(): - raise RuntimeError( - "Torch DDP cannot be used. GPUs not available." - ) - if not torch.cuda.device_count() > 1: - raise RuntimeError( - "Torch DDP cannot be used. Only one GPU is available." - ) - launcher_builder = TorchElasticLauncherFactory() - elastic_launcher = launcher_builder.createLauncher( - n_workers_per_node=self.n_workers_per_node - ) - strategy = DDPStrategy(backend='nccl') - return elastic_launcher, strategy - - def getDeepSpeedTools(self) -> Tuple[Launcher, Strategy]: - """ - Returns a suggested preset of Launcher + Strategy - for DeepSpeed distributed ML. - """ - # TODO: complete - raise NotImplementedError - - def getHorovodTools(self) -> Tuple[Launcher, Strategy]: - """ - Returns a suggested preset of Launcher + Strategy - for Horovod distributed ML. - """ - # TODO: complete - raise NotImplementedError diff --git a/experimental/example_0.py b/experimental/example_0.py deleted file mode 100644 index 5a67cfd8..00000000 --- a/experimental/example_0.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Run this with torchrun -""" - -import os - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset - -from strategy import Strategy, DDPStrategy, HorovodStrategy - - -class UniformRndDataset(Dataset): - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn(a, strategy: Strategy): - """Dummy training function.""" - strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -def trainer_entrypoint_fn_mario(a, strategy: Strategy): - """Dummy training function.""" - - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - - strategy.setup(model, train_set, optim) - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -STRATEGY = 'ddp' - - -if __name__ == "__main__": - - # Instantiate Strategy - if STRATEGY == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - - strategy = DDPStrategy(cluster=None, backend='nccl') - elif STRATEGY == 'horovod': - strategy = HorovodStrategy() - else: - raise NotImplementedError - - # Launch distributed training - trainer_entrypoint_fn("foobar", strategy) diff --git a/experimental/example_1.py b/experimental/example_1.py deleted file mode 100644 index 3cc2e452..00000000 --- a/experimental/example_1.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Introduction of launcher. Torchrun is not needed anymore. -""" -import os - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset - -from strategy import Strategy, DDPStrategy, HorovodStrategy -from launcher import TorchElasticLauncher, SimpleLauncher - - -class UniformRndDataset(Dataset): - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn(a, strategy: Strategy): - """Dummy training function.""" - strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -LAUNCHER = 'torch-elastic' -STRATEGY = 'ddp' -RUN_ID = "my_run_id" -MIN_NODES = 1 -MAX_NODES = 1 -NPROC_PRE_NODE = 4 -MAX_RESTARTS = 2 - -if __name__ == "__main__": - - # Instantiate Launcher Factory - if LAUNCHER == 'torch-elastic': - launcher = TorchElasticLauncher( - rdzv_id=RUN_ID, - nproc_per_node=NPROC_PRE_NODE, - nnodes=f"{MIN_NODES}:{MAX_NODES}", - max_restarts=MAX_RESTARTS - ) - elif LAUNCHER == 'simple-launcher': - launcher = SimpleLauncher( - nproc_per_node=NPROC_PRE_NODE - ) - else: - raise NotImplementedError - - # Instantiate Strategy - if STRATEGY == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - - strategy = DDPStrategy(cluster=None, backend='nccl') - elif STRATEGY == 'horovod': - strategy = HorovodStrategy() - else: - raise NotImplementedError - - # CLIENT CODE - # Launch training from launcher - launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/example_2.py b/experimental/example_2.py deleted file mode 100644 index 14685753..00000000 --- a/experimental/example_2.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -Unified interface for launchers. -Most of the complexity is hidden inside "factory" classes. -""" - -import os - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset - -from strategy import Strategy, DDPStrategy, HorovodStrategy -from launcher_factory import ( - LauncherFactory, - SimpleLauncherFactory, - TorchElasticLauncherFactory -) - - -class UniformRndDataset(Dataset): - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn(a, strategy: Strategy): - """Dummy training function.""" - strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -LAUNCHER = 'torch-elastic' -STRATEGY = 'ddp' -NPROC_PRE_NODE = 4 - -if __name__ == "__main__": - # STRATEGY BUILDER - - # Instantiate Launcher Factory - if LAUNCHER == 'torch-elastic': - launcher_builder: LauncherFactory = TorchElasticLauncherFactory() - elif LAUNCHER == 'simple-launcher': - launcher_builder: LauncherFactory = SimpleLauncherFactory() - else: - raise NotImplementedError - - # Instantiate launcher - launcher = launcher_builder.createLauncher( - n_workers_per_node=NPROC_PRE_NODE - ) - - # Instantiate Strategy - if STRATEGY == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - - strategy = DDPStrategy(cluster=None, backend='nccl') - elif STRATEGY == 'horovod': - strategy = HorovodStrategy() - else: - raise NotImplementedError - - # CLIENT CODE - # Launch training from launcher - launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/example_3.py b/experimental/example_3.py deleted file mode 100644 index d38dd78c..00000000 --- a/experimental/example_3.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Hide the selection of launcher and strategy inside a class. -""" -import os - -import torch -from torch import nn -from torch.utils.data import DataLoader, Dataset - -from strategy import Strategy -from distributed_tools import DistributedTooling - - -class UniformRndDataset(Dataset): - def __init__(self, x_size: int, y_size: int, len: int = 100): - super().__init__() - self.x_size = x_size - self.y_size = y_size - self.len = len - - def __len__(self): - return self.len - - def __getitem__(self, index): - return torch.rand(self.x_size), torch.rand(self.y_size) - - -def trainer_entrypoint_fn(a, strategy: Strategy): - """Dummy training function.""" - strategy.setup() - print(f"{a}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") - - # Local model - model = nn.Linear(3, 4) - optim = torch.optim.Adam(model.parameters(), lr=1e-3) - loss_fn = nn.MSELoss() - # Distributed model - model: nn.Module = strategy.distribute_model(model) - optim: torch.optim.Optimizer = strategy.distribute_optimizer(optim) - - # Data - train_set = UniformRndDataset(x_size=3, y_size=4) - train_loader = DataLoader(train_set, batch_size=10, num_workers=1) - # Distributed dataloader - train_loader: DataLoader = strategy.distribute_dataloader(train_loader) - - for epoch in range(2): - for (x, y) in train_loader: - # print(f"tensor to cuda:{strategy.device}") - x = x.to(strategy.device) - y = y.to(strategy.device) - - optim.zero_grad() - y_pred = model(x) - loss = loss_fn(y_pred, y) - loss.backward() - optim.step() - - if strategy.is_main_worker(): - print(f"Loss [epoch={epoch}]: {loss.item()}") - - strategy.teardown() - return 123 - - -STRATEGY = 'ddp' -NPROC_PRE_NODE = 4 - - -if __name__ == "__main__": - dist_tools = DistributedTooling(n_workers_per_node=NPROC_PRE_NODE) - launcher, strategy = dist_tools.getTools('ddp') - - # CLIENT CODE - # Launch training from launcher - launcher.run(func=trainer_entrypoint_fn, args=("foobar", strategy)) diff --git a/experimental/launcher.py b/experimental/launcher.py deleted file mode 100644 index d9733b8f..00000000 --- a/experimental/launcher.py +++ /dev/null @@ -1,295 +0,0 @@ -import datetime -import os -import shutil -import abc -import time -import uuid -from typing import Callable, Tuple, Any, Union, List, Optional - -from torch.distributed.elastic.agent.server.local_elastic_agent import ( - LocalElasticAgent -) -from torch.distributed.elastic.agent.server import WorkerSpec -from torch.distributed.elastic.rendezvous.dynamic_rendezvous import ( - DynamicRendezvousHandler -) -from torch.distributed.elastic.rendezvous.c10d_rendezvous_backend import ( - C10dRendezvousBackend -) -from torch.distributed import TCPStore -from torch.distributed.elastic.multiprocessing import Std, start_processes - -from torch.distributed.launcher.api import LaunchConfig, elastic_launch -from torch.distributed.run import config_from_args - -from cluster import ClusterEnvironment, detect_cluster - - -class Launcher(abc.ABC): - cluster: ClusterEnvironment - - @abc.abstractmethod - def run(self, *args) -> Any: - """Launches the distributed execution.""" - - -class DummyTorchElasticLauncher(Launcher): - """Simplified Torch Elastic launcher.""" - - def __init__( - self, - cluster: Optional[ClusterEnvironment] = None, - n_workers_per_node: int = 1, - min_nodes: int = 1, - max_nodes: int = 1, - max_restarts: int = 1 - ) -> None: - super().__init__() - # detect_cluster() is preferred - self.cluster = cluster if cluster is not None else detect_cluster() - print(f"DummyTorchElasticLauncher with cluster '{self.cluster}'") - self.n_workers_per_node = n_workers_per_node - self.min_nodes = min_nodes - self.max_nodes = max_nodes - self.max_restarts = max_restarts - self.run_id = str(time.time()) - - if cluster.creates_processes_externally and n_workers_per_node > 1: - print("WARNING: the cluster may already spawn worker " - "processes for you... Consider setting " - "'n_workers_per_node=1'") - - g_world_size = cluster.num_nodes() * self.n_workers_per_node - - store = TCPStore( - host_name=cluster.main_address, - port=cluster.main_port, # could conflict! - world_size=g_world_size, - is_master=cluster.global_rank() == 0, - timeout=datetime.timedelta(seconds=3) - ) - backend = C10dRendezvousBackend(store, self.run_id) - self.rdzv_handler = DynamicRendezvousHandler.from_backend( - run_id=self.run_id, - store=store, - backend=backend, - min_nodes=self.min_nodes, - max_nodes=self.max_nodes - ) - - def run( - self, - func: Callable, - args: Tuple = (), - redirect: bool = False, - log_dir: str = 'launcher_logs', - tee_ranks: Union[str, int, List[int]] = None - ) -> List[Any]: - """Launches the distributed execution with Torch Elastic.""" - # Suppress all printing to console: - # redirects={0: Std.ALL} # do no print, but save to file. - # linked to Agent's log_dir - redirects = Std.ALL if redirect else Std.NONE - - # Fore back printing to console, while redirecting to file - # tee={0: Std.ALL} reactivates print to console + save to - # log file for RANK 0 - if tee_ranks == 'all': - tee = Std.ALL - elif tee_ranks is None: - tee = Std.NONE - elif isinstance(tee_ranks, int): - tee = {tee_ranks: Std.ALL} - elif isinstance(tee_ranks, list): - # tee_ranks is a list of int - tee = {rnk: Std.ALL for rnk in tee_ranks} - else: - raise ValueError(f"unrecognized 'tee_ranks={tee_ranks}'") - - spec = WorkerSpec( - role="worker", - local_world_size=self.n_workers_per_node, - entrypoint=func, - args=args, - rdzv_handler=self.rdzv_handler, - max_restarts=self.max_restarts, - # monitor_interval=monitor_interval, - redirects=redirects, - tee=tee - ) - - agent = LocalElasticAgent(spec, start_method="spawn", log_dir=log_dir) - # try: - run_result = agent.run() - if run_result.is_failed(): - print(f"worker 0 failed with: {run_result.failures[0]}") - result = None - else: - print(f"worker 0 return value is: {run_result.return_values[0]}") - result = run_result.return_values - # except Exception ex: - # # handle exception - return result - - -class TorchElasticLauncher(Launcher): - """ - Official Torch Elastic launcher. - Does NOT support passing values as environment variables. - - Adapted from: - https://github.com/pytorch/pytorch/blob/main/torch/distributed/run.py - """ - - def __init__( - self, - nnodes: str = '1:1', - nproc_per_node: str = '1', - rdzv_backend: str = 'static', - rdzv_endpoint: str = '', - rdzv_id: str = 'none', - rdzv_conf: str = '', - standalone: bool = False, - max_restarts: int = 0, - monitor_interval: float = 5, - start_method: str = 'spawn', - role: str = 'default', - module: bool = False, - no_python: bool = False, - run_path: bool = False, - log_dir: Optional[str] = None, - redirects: str = '0', - tee: str = '0', - node_rank: int = 0, - master_addr: str = "127.0.0.1", - master_port: int = 29500, - local_addr: Optional[str] = None - ) -> None: - super().__init__() - # emulate CLI args - # TODO: include logic for 'action=check_env' or 'action=env' - self.nnodes = nnodes - self.nproc_per_node = nproc_per_node - self.rdzv_backend = rdzv_backend - self.rdzv_endpoint = rdzv_endpoint - self.rdzv_id = rdzv_id - self.rdzv_conf = rdzv_conf - self.standalone = standalone - self.max_restarts = max_restarts - self.monitor_interval = monitor_interval - self.start_method = start_method - self.role = role - self.module = module - self.no_python = no_python - self.run_path = run_path - self.log_dir = log_dir - self.redirects = redirects - self.tee = tee - self.node_rank = node_rank - self.master_addr = master_addr - self.master_port = master_port - self.local_addr = local_addr - # Placeholders - self.training_script = "placeholder.py" - self.training_script_args = [] - - def config_from_args( - self - ) -> Tuple[LaunchConfig, Union[Callable, str], List[str]]: - return config_from_args(self) - - def run( - self, - func: Callable, - args: Tuple = () - ) -> Any: - if self.standalone: - self.rdzv_backend = "c10d" - self.rdzv_endpoint = "localhost:29400" - self.rdzv_id = str(uuid.uuid4()) - # log.info( - # f"\n**************************************\n" - # f"Rendezvous info:\n" - # f"--rdzv_backend={self.rdzv_backend} " - # f"--rdzv_endpoint={self.rdzv_endpoint} " - # f"--rdzv_id={self.rdzv_id}\n" - # f"**************************************\n" - # ) - - config, _, _ = self.config_from_args() - elastic_launch( - config=config, - entrypoint=func, - )(*args) - - -class SimpleLauncher(Launcher): - """Simple launcher based on multiprocessing. - Use ONLY for single node applications. - """ - - def __init__( - self, - nproc_per_node: int, - run_id: Optional[str] = None, - master_addr: str = "127.0.0.1", - master_port: int = 29500 - ) -> None: - super().__init__() - self.nproc_per_node = nproc_per_node - self.run_id = run_id if run_id is not None else f"RunID:{time.time()}" - self.master_addr = master_addr - self.master_port = master_port - self.log_dir = f'{self.__class__.__name__}_logs' - if os.path.exists(self.log_dir): - shutil.rmtree(self.log_dir) - os.makedirs(self.log_dir) - - def run( - self, - func: Callable, - args: Tuple = () - ) -> Any: - # Adapted from: - # https://pytorch.org/docs/stable/elastic/multiprocessing.html - w_args = {i: args for i in range(self.nproc_per_node)} - # Emulates the env variables set by torch Elastic - w_envs = { - i: dict( - RANK=str(i), - LOCAL_RANK=str(i), - GROUP_RANK=str(0), - ROLE_RANK=str(i), - WORLD_SIZE=str(self.nproc_per_node), - LOCAL_WORLD_SIZE=str(self.nproc_per_node), - ROLE_WORLD_SIZE=str(self.nproc_per_node), - TORCHELASTIC_RUN_ID=str(self.run_id), - MASTER_ADDR=str(self.master_addr), - MASTER_PORT=str(self.master_port) - ) - for i in range(self.nproc_per_node) - } - ctx = start_processes( - name=self.__class__.__name__, - entrypoint=func, - args=w_args, - envs=w_envs, - log_dir=self.log_dir - ) - ctx.wait() - return ctx.return_values - - -class DeepSpeedLauncher(Launcher): - """Official DeepSpeed launcher.""" - - def __init__(self) -> None: - super().__init__() - - def run( - self, - func: Callable, - args: Tuple = () - ) -> Any: - # TODO: complete - raise NotImplementedError diff --git a/experimental/launcher_factory.py b/experimental/launcher_factory.py deleted file mode 100644 index fce12a0c..00000000 --- a/experimental/launcher_factory.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Factories to instantiate Launcher classes. -They introduce a level of indirection to provide a unified interface -for all the launchers. The common interface is provided by the -`createLauncher` factory method. -""" - -from typing import Optional, Dict, Any -import abc - -from launcher import ( - Launcher, - TorchElasticLauncher, - SimpleLauncher, - DeepSpeedLauncher -) -from cluster import detect_cluster - - -class LauncherFactory(abc.ABC): - """ - Factory class to instantiate a Launcher classes. - It introduces a level of indirection to provide a unified interface - for all the launchers. The common interface is provided by the - `createLauncher` factory method. - """ - - def createLauncher( - self, - n_workers_per_node: int, - run_id: Optional[str] = None, - master_addr: Optional[str] = None, - master_port: Optional[int] = None, - **kwargs - ) -> Launcher: - """ - Simplifies the instantiation of a Launcher. - Advanced configuration is pre-computed in the body - of this method, leaving few parameters to the end user. - """ - - -class TorchElasticLauncherFactory(LauncherFactory): - """Factory class to instantiate a TorchElasticLauncher class.""" - - def createLauncher( - self, - n_workers_per_node: int, - run_id: Optional[str] = None, - master_addr: Optional[str] = None, - master_port: Optional[int] = None, - **kwargs - ) -> Launcher: - """ - Simplifies the instantiation of a TorchElasticLauncher. - Advanced configuration is pre-computed in the body - of this method, leaving few parameters to the end user. - """ - cluster = detect_cluster() - - kwargs['nproc_per_node'] = n_workers_per_node - # If given, propagate the args - if run_id: - kwargs['rdzv_id'] = run_id - if master_addr: - kwargs['master_addr'] = master_addr - if master_port: - kwargs['master_port'] = master_port - - # Compute and add TorchElastic specific args, if not - # provided as **kwargs - n_nodes = cluster.num_nodes() - safe_add(kwargs, 'nnodes', f"{n_nodes}:{n_nodes}") - safe_add(kwargs, 'rdzv_id', cluster.job_id()) - is_host_flag = '1' if cluster.node_rank() == 0 else '0' - safe_add(kwargs, 'rdzv_conf', f'is_host={is_host_flag}') - safe_add(kwargs, 'rdzv_backend', 'c10d') - safe_add( - kwargs, - 'rdzv_endpoint', - f'{cluster.main_address}:{cluster.main_port}' - ) - safe_add(kwargs, 'max_restarts', 3) - - return TorchElasticLauncher(**kwargs) - - -class SimpleLauncherFactory(LauncherFactory): - """Factory class to instantiate a SimpleLauncherFactory class.""" - - def createLauncher( - self, - n_workers_per_node: int, - run_id: Optional[str] = None, - master_addr: Optional[str] = None, - master_port: Optional[int] = None, - **kwargs - ) -> Launcher: - """ - Simplifies the instantiation of a SimpleLauncher. - Advanced configuration is pre-computed in the body - of this method, leaving few parameters to the end user. - """ - - kwargs['nproc_per_node'] = n_workers_per_node - # If given, propagate the args - if run_id: - kwargs['run_id'] = run_id - if master_addr: - kwargs['master_addr'] = master_addr - if master_port: - kwargs['master_port'] = master_port - - return SimpleLauncher(**kwargs) - - -class DeepSpeedLauncherFactory(LauncherFactory): - """Factory class to instantiate a DeepSpeedLauncher class.""" - - def createLauncher( - self, - n_workers_per_node: int, - run_id: Optional[str] = None, - master_addr: Optional[str] = None, - master_port: Optional[int] = None, - **kwargs - ) -> Launcher: - """ - Simplifies the instantiation of a DeepSpeedLauncher. - Advanced configuration is pre-computed in the body - of this method, leaving few parameters to the end user. - """ - # TODO: complete - raise NotImplementedError - return DeepSpeedLauncher(...) - - -def safe_add(map: Dict, key: str, value: Any) -> None: - """ - Add a key-value pair to a dict if the key - is not already present. - """ - if map.get(key) is None: - map[key] = value diff --git a/experimental/strategy.py b/experimental/strategy.py deleted file mode 100644 index 59dd7a4f..00000000 --- a/experimental/strategy.py +++ /dev/null @@ -1,150 +0,0 @@ -import os -import abc -from typing import Any, Optional - -import torch -from torch import nn -from torch.nn.parallel import DistributedDataParallel -from torch import optim -from torch.utils.data import DataLoader, DistributedSampler -from torch.distributed import init_process_group - -# from lightning.pytorch.plugins.environments import ClusterEnvironment -from cluster import ClusterEnvironment, detect_cluster - - -class Strategy(abc.ABC): - cluster: ClusterEnvironment - - @property - @abc.abstractmethod - def device(self) -> int: - """Device used by this worker""" - - @abc.abstractmethod - def setup(self) -> None: - """Setup the strategy once in a distributed environment.""" - - @abc.abstractmethod - def teardown(self) -> None: - """Frees the distributed strategy resources.""" - - @abc.abstractmethod - def is_main_worker(self) -> bool: - """Returns True if called from the main process of the pool.""" - - @abc.abstractmethod - def _is_env_setup(self) -> bool: - """Checks whether the distributed environment is correctly setup.""" - - @abc.abstractmethod - def distribute_model(self, model: Any) -> Any: - """Distributes a neural network.""" - - @abc.abstractmethod - def distribute_optimizer(self, optimizer: Any) -> Any: - """Distributes an optimizer.""" - - @abc.abstractmethod - def distribute_dataloader(self, dataloader: Any) -> Any: - """Distributes a dataloader.""" - - -class DDPStrategy(Strategy): - def __init__( - self, - backend: str = 'nccl', - cluster: Optional[ClusterEnvironment] = None - ) -> None: - super().__init__() - self.cluster = cluster - self.backend = backend - - @property - def device(self) -> int: - """Returns the local rank. Assumes one worker per GPU.""" - return self.cluster.local_rank() - - def setup(self, **kwargs) -> None: - """Setup the strategy in a distributed context.""" - if not self._is_env_setup(): - raise RuntimeError( - "Distributed environment not setup correctly. Use a launcher.") - - # detect_cluster() is preferred - if self.cluster is None: - self.cluster = detect_cluster() - print(f"DDPStrategy executed on '{self.cluster}' cluster") - - # Initializes the default distributed process group - # and the distributed package - init_process_group(backend=self.backend) - - def teardown(self) -> None: - torch.distributed.barrier() - torch.distributed.destroy_process_group() - - def _is_env_setup(self) -> bool: - if (os.environ.get('RANK') is not None): - # and torch.distributed.is_available()): - return True - return False - - def is_main_worker(self) -> bool: - return self.cluster.global_rank() == 0 - - def distribute_model(self, model: nn.Module) -> nn.Module: - model = model.to(f"cuda:{self.device}") - return DistributedDataParallel( - model, - device_ids=[self.device], - output_device=self.device - ) - - def distribute_optimizer( - self, - optimizer: optim.Optimizer - ) -> optim.Optimizer: - return optimizer - - def distribute_dataloader( - self, - dataloader: DataLoader, - shuffle: bool = True - ) -> DataLoader: - """Makes a torch DataLoader distributed by substituting its sampler.""" - sampler = DistributedSampler( - dataloader.dataset, - num_replicas=self.cluster.world_size(), - rank=self.cluster.global_rank(), - shuffle=shuffle - ) - # Recreate dataloader, with updated sampler - return DataLoader( - dataloader.dataset, - batch_size=dataloader.batch_size, - sampler=sampler, - num_workers=dataloader.num_workers, - collate_fn=dataloader.collate_fn, - pin_memory=dataloader.pin_memory, - drop_last=dataloader.drop_last, - timeout=dataloader.timeout, - worker_init_fn=dataloader.worker_init_fn, - multiprocessing_context=dataloader.multiprocessing_context, - generator=dataloader.generator, - prefetch_factor=dataloader.prefetch_factor, - persistent_workers=dataloader.persistent_workers, - pin_memory_device=dataloader.pin_memory_device - ) - - -class LocalStrategy(Strategy): - ... - - -class HorovodStrategy(Strategy): - ... - - -class DeepSpeedStrategy(Strategy): - ... diff --git a/experimental/trainer/DS_config.json b/experimental/trainer/DS_config.json deleted file mode 100644 index 544cab17..00000000 --- a/experimental/trainer/DS_config.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "train_micro_batch_size_per_gpu": 32, - "gradient_accumulation_steps": 1, - "optimizer": { - "type": "Adam", - "params": { - "lr": 0.01 - } - }, - "fp16": { - "enabled": false - }, - "zero_optimization": false -} - diff --git a/experimental/trainer/general_startscript b/experimental/trainer/general_startscript deleted file mode 100755 index 455466b4..00000000 --- a/experimental/trainer/general_startscript +++ /dev/null @@ -1,135 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=TorchTest -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job.out -#SBATCH --error=job.err -#SBATCH --time=00:15:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=4 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# parallelization strategy (DDP, HVD, DS) -strategy='DS' - -# parameters -debug=false # do debug -bs=32 # batch-size -epochs=1 # epochs -lr=0.01 # learning rate - -# AT -dataDir="/p/scratch/raise-ctp2/data_MNIST/" - -# set modules -ml --force purge - -ml Stages/2022 NVHPC/22.1 ParaStationMPI/5.5.0-1-mt NCCL/2.11.4-CUDA-11.5 cuDNN/8.3.1.22-CUDA-11.5 -ml Python/3.9.6 CMake HDF5 PnetCDF libaio/0.3.112 mpi-settings/CUDA - -# set env -source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/envAI_hdfml/bin/activate - -# sleep a sec -sleep 1 - -# job info -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" > 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -COMMAND="general_trainer_v2.py" - -#launch -if [[ $strategy == *"HVD"* ]]; -then - EXEC="$COMMAND \ - --strategy $strategy \ - --batch-size $bs \ - --epochs $epochs \ - --lr $lr \ - --data-dir $dataDir" - - srun --cpu-bind=none python3 -u $EXEC - -elif [[ $strategy == *"DDP"* ]]; -then - EXEC="$COMMAND \ - --strategy $strategy \ - --batch-size $bs \ - --epochs $epochs \ - --lr $lr \ - --nworker $SLURM_CPUS_PER_TASK \ - --data-dir $dataDir" - - srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $EXEC" - -else - EXEC="$COMMAND \ - --strategy $strategy \ - --batch-size $bs \ - --epochs $epochs \ - --lr $lr \ - --nworker $SLURM_CPUS_PER_TASK \ - --data-dir $dataDir" - - #### do not change this part - # create node-list - sysN=$(eval "scontrol show hostnames") - for i in $sysN; do - x+=\"$i\":[$CUDA_VISIBLE_DEVICES], - done - WID=`echo {${x::-1}} | base64 -w 0` - - # modify config file with parameters - sed -i "2s|.*| \"train_micro_batch_size_per_gpu\": ${bs},|" DS_config.json - sed -i "7s|.*| \"lr\": ${lr}|" DS_config.json - #### - - # launch - srun python -m deepspeed.launcher.launch \ - --node_rank $SLURM_PROCID \ - --master_addr ${SLURMD_NODENAME}i \ - --master_port 29500 \ - --world_info $WID \ - $EXEC --deepspeed_mpi --deepspeed_config DS_config.json - -fi diff --git a/experimental/trainer/general_trainer.py b/experimental/trainer/general_trainer.py deleted file mode 100755 index 33c21ced..00000000 --- a/experimental/trainer/general_trainer.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# author: RS, adapted from https://gitlab.jsc.fz-juelich.de/CoE-RAISE/FZJ/ai4hpc -# version: 211029a - -# std libs -from typing import Any, Union -import argparse -import sys -import os -import time -import numpy as np -import random -import abc - -# ml libs -import deepspeed -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -from torchvision import datasets, transforms - -from itwinai.torch.distributed import ( - DDPDistributedStrategy_old, - DSDistributedStrategy_old, - HVDDistributedStrategy_old -) - -# parsed settings - - -def pars_ini(): - global args - parser = argparse.ArgumentParser(description='PyTorch MNIST Example') - - # IO parsers - parser.add_argument('--data-dir', default='./', - help='location of the training dataset in the local filesystem') - parser.add_argument('--restart-int', type=int, default=10, - help='restart interval per epoch (default: 10)') - - # model parsers - parser.add_argument('--strategy', type=str, default='DDP', - help='strategy for parallelization (DDP, HVD, DS)') - parser.add_argument('--batch-size', type=int, default=64, - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, - help='learning rate (default: 0.01)') - parser.add_argument('--concM', type=int, default=100, - help='conc MNIST to this factor (default: 100)') - parser.add_argument('--momentum', type=float, default=0.5, - help='momentum in SGD optimizer (default: 0.5)') - parser.add_argument('--shuff', action='store_true', default=False, - help='shuffle dataset (default: False)') - - # debug parsers - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run with seed (default: False)') - parser.add_argument('--nseed', type=int, default=0, - help='seed integer for reproducibility (default: 0)') - parser.add_argument('--log-int', type=int, default=10, - help='log interval per training') - - # parallel parsers - parser.add_argument('--backend', type=str, default='nccl', - help='backend for parrallelisation (default: nccl)') - parser.add_argument('--nworker', type=int, default=0, - help='number of workers in DataLoader (default: 0 - only main)') - parser.add_argument('--prefetch', type=int, default=2, - help='prefetch data in DataLoader (default: 2)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables GPGPUs') - parser.add_argument('--local_rank', type=int, default=-1, - help='local rank passed from distributed launcher') - - try: - parser = deepspeed.add_config_arguments(parser) - except: - pass - - args = parser.parse_args() - - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 10, kernel_size=5) - self.conv2 = nn.Conv2d(10, 20, kernel_size=5) - self.conv2_drop = nn.Dropout2d() - self.fc1 = nn.Linear(320, 50) - self.fc2 = nn.Linear(50, 10) - - def forward(self, x): - x = F.relu(F.max_pool2d(self.conv1(x), 2)) - x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) - x = x.view(-1, 320) - x = F.relu(self.fc1(x)) - x = F.dropout(x, training=self.training) - x = self.fc2(x) - return F.log_softmax(x) - -# train loop - - -def train(model, device, train_loader, optimizer, epoch, grank, gwsize, args): - model.train() - t_list = [] - loss_acc = 0 - if grank == 0: - print("\n") - for batch_idx, (data, target) in enumerate(train_loader): - t = time.perf_counter() - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_int == 0 and grank == 0: - print( - f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' - f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\tLoss: {loss.item():.6f}') - t_list.append(time.perf_counter() - t) - loss_acc += loss.item() - if grank == 0: - print('TIMER: train time', sum(t_list) / len(t_list), 's') - return loss_acc - -# test loop - - -def test(model, device, test_loader, grank, gwsize, args): - model.eval() - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) - correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset) - if grank == 0: - print( - f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) - return acc_test - - -# save state of the training -def save_state(epoch, distrib_model, loss_acc, optimizer, res_name, grank, gwsize, is_best, my_trainer): - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = my_trainer.par_allgather_obj(is_best, gwsize) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m) == True)[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in {time.time()-rt} s') - - -# deterministic dataloader -def seed_worker(worker_id): - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -# -# -# MAIN -# -# -def main(): - # get parse args - pars_ini() - - # check CUDA availibility - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # Strategy for distributed training - if args.strategy == 'DDP': - my_trainer = DDPDistributedStrategy_old() - - elif args.strategy == 'DS': - my_trainer = DSDistributedStrategy_old() - - elif args.strategy == 'HVD': - my_trainer = HVDDistributedStrategy_old() - - # limit # of CPU threads to be used per worker - torch.set_num_threads(1) - - # get directory - program_dir = os.getcwd() - - # start the time.time for profiling - st = time.time() - - # initializes the distributed backend which will take care of sychronizing nodes/GPUs - my_trainer.init_backend(backend=args.backend) - - # deterministic testrun - if args.testrun: - torch.manual_seed(args.nseed) - g = torch.Generator() - g.manual_seed(args.nseed) - - # get job rank info - rank==0 master gpu - if torch.cuda.is_available(): - # local world size - per node - lwsize = my_trainer.local_world_size() if args.cuda else 0 - gwsize = my_trainer.global_world_size() # global world size - per run - grank = my_trainer.dist_grank() # global rank - assign per run - lrank = my_trainer.dist_lrank() # local rank - assign per node - else: - gwsize = 1 - grank = 0 - - # some debug - if grank == 0: - print('TIMER: initialise:', time.time()-st, 's') - print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) - print('DEBUG: sys.version:', sys.version, '\n') - - print('DEBUG: IO parsers:') - print('DEBUG: args.data_dir:', args.data_dir) - print('DEBUG: args.restart_int:', args.restart_int, '\n') - - print('DEBUG: model parsers:') - print('DEBUG: args.batch_size:', args.batch_size) - print('DEBUG: args.epochs:', args.epochs) - print('DEBUG: args.lr:', args.lr) - print('DEBUG: args.concM:', args.concM) - print('DEBUG: args.momentum:', args.momentum) - print('DEBUG: args.shuff:', args.shuff, '\n') - - print('DEBUG: debug parsers:') - print('DEBUG: args.testrun:', args.testrun) - print('DEBUG: args.nseed:', args.nseed) - print('DEBUG: args.log_int:', args.log_int, '\n') - - print('DEBUG: parallel parsers:') - print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.nworker:', args.nworker) - print('DEBUG: args.prefetch:', args.prefetch) - print('DEBUG: args.cuda:', args.cuda, '\n') - - # encapsulate the model on the GPU assigned to the current process - device = torch.device( - 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu', lrank) - if args.cuda: - torch.cuda.set_device(lrank) - # deterministic testrun - if args.testrun: - torch.cuda.manual_seed(args.nseed) - -# read data - data_dir = args.data_dir - mnist_scale = args.concM - largeData = [] - for i in range(mnist_scale): - largeData.append( - datasets.MNIST(data_dir, train=True, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) - - # concat data - train_dataset = torch.utils.data.ConcatDataset(largeData) - - mnist_scale = args.concM - largeData = [] - for i in range(mnist_scale): - largeData.append( - datasets.MNIST(data_dir, train=False, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) - - # concat data - test_dataset = torch.utils.data.ConcatDataset(largeData) - - # restricts data loading to a subset of the dataset exclusive to the current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - test_sampler = torch.utils.data.distributed.DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - -# distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False - - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): - train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs) - else: - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size) - test_loader = torch.utils.data.DataLoader( - test_dataset, batch_size=args.batch_size) - - if grank == 0: - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model - model = Net().to(device) - - # distribute model to workers - distrib_model = my_trainer.distribute_model(model, device) - - # optimizer - optimizer = torch.optim.SGD( - distrib_model.parameters(), lr=args.lr, momentum=args.momentum) - - my_trainer.broadcast_params(distrib_model, optimizer) - - optimizer = my_trainer.distribute_optimizer(optimizer, distrib_model) - -# resume state - start_epoch = 1 - best_acc = np.Inf - res_name = 'checkpoint.pth.tar' - if os.path.isfile(res_name): - try: - if torch.cuda.is_available(): - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) - else: - checkpoint = torch.load(program_dir+'/'+res_name) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: restarting from {start_epoch} epoch') - else: - print(f'WARNING: restarting from {start_epoch} epoch') - except: - if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: restart file cannot be loaded, restarting!') - else: - print(f'WARNING: restart file cannot be loaded, restarting!') - - if start_epoch > args.epochs: - if torch.cuda.is_available(): - if grank == 0: - print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') - - my_trainer.clean_up() - sys.exit() - else: - print(f'WARNING: given epochs are less than the one in the restart file!\n' - f'WARNING: SYS.EXIT is issued') - sys.exit() - -# start trainin/testing loop - if grank == 0: - print('TIMER: broadcast:', time.time()-st, 's') - print(f'\nDEBUG: start training') - print(f'--------------------------------------------------------') - - et = time.time() - for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training - loss_acc = train(distrib_model, device, train_loader, - optimizer, epoch, grank, gwsize, args) - - # testing - acc_test = test(distrib_model, device, - test_loader, grank, gwsize, args) - - # save first epoch timer - if epoch == start_epoch: - first_ep_t = time.time()-lt - - # final epoch - if epoch + 1 == args.epochs: - train_loader.last_epoch = True - test_loader.last_epoch = True - - if grank == 0: - print('TIMER: epoch time:', time.time()-lt, 's') - print('DEBUG: accuracy:', acc_test, '%') - - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0: - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, is_best, my_trainer) - # reset best_acc - best_acc = min(loss_acc, best_acc) - -# finalise - # save final state - save_state(epoch, distrib_model, loss_acc, optimizer, - res_name, grank, gwsize, True, my_trainer) - # if torch.cuda.is_available(): - # dist.barrier() - - # some debug - if grank == 0: - print(f'\n--------------------------------------------------------') - print('DEBUG: training results:\n') - print('TIMER: first epoch time:', first_ep_t, ' s') - print('TIMER: last epoch time:', time.time()-lt, ' s') - print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') - print('TIMER: total epoch time:', time.time()-et, ' s') - if epoch > 1: - print('TIMER: total epoch-1 time:', - time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', - (time.time()-et-first_ep_t)/(args.epochs-1), ' s') - print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n', - torch.cuda.memory_summary(0)) if args.cuda else '' - - if grank == 0: - print(f'TIMER: final time: {time.time()-st} s\n') - - my_trainer.clean_up() - - -if __name__ == "__main__": - main() - sys.exit() - -# eof diff --git a/experimental/workflow/train.yaml b/experimental/workflow/train.yaml deleted file mode 100644 index c21d4141..00000000 --- a/experimental/workflow/train.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# AI workflow metadata/header. -# They are optional and easily extensible in the future. -version: 0.0.1 -name: Experiment name -description: This is a textual description -credits: - - author1 - - author2 - -# Provide a unified place where this *template* can be configured. -# Variables which can be overridden at runtime as env vars, e.g.: -# - Execution environment details (e.g., path in container vs. in laptop, MLFlow tracking URI) -# - Tunable parameters (e.g., learning rate) -# - Intrinsically dynamic values (e.g., MLFLow run ID is a random value) -# These variables are interpolated with OmegaConf. -vars: - images_dataset_path: some/path/disk - mlflow_tracking_uri: http://localhost:5000 - training_lr: 0.001 - -# Runner-independent workflow steps. -# Each step is designed to be minimal, but easily extensible -# to accommodate future needs by adding new fields. -# The only required field is 'command'. New fields can be added -# to support future workflow executors. -steps: - preprocessing-step: - command: - class_path: itwinai.torch.Preprocessor - init_args: - save_path: ${vars.images_dataset_path} - after: null - env: null - - training-step: - command: - class_path: itwinai.torch.Trainer - init_args: - lr: ${vars.training_lr} - tracking_uri: ${vars.mlflow_tracking_uri} - after: preprocessing-step - env: null - - sth_step: - command: python inference.py -p pipeline.yaml - after: [preprocessing-step, training-step] - env: docker+ghcr.io/intertwin-eu/itwinai:training-0.0.1 - - sth_step2: - command: python train.py -p pipeline.yaml - after: null - env: conda+path/to/my/local/env - diff --git a/src/itwinai/experimental/executors.py b/src/itwinai/experimental/executors.py deleted file mode 100644 index 2c89f1c3..00000000 --- a/src/itwinai/experimental/executors.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Executors to execute a sequence of executable steps.""" - -from typing import Any, Dict, Iterable -from abc import abstractmethod - -import yaml -import ray -from ray import air, tune -from jsonargparse import ArgumentParser - -from ..components import Pipeline, BaseComponent -from ..utils import parse_pipe_config - - -class LocalExecutor(Pipeline): - def __init__(self, pipeline, class_dict): - # Create parser for the pipeline (ordered) - pipe_parser = ArgumentParser() - for k, v in class_dict.items(): - pipe_parser.add_subclass_arguments(v, k) - - # Parse, Instantiate pipe - if isinstance(pipeline, str): - parsed = parse_pipe_config(pipeline, pipe_parser) - elif isinstance(pipeline, dict): - parsed = pipe_parser.parse_object(pipeline) - else: - raise "Type of pipeline is not supported" - - pipe = pipe_parser.instantiate_classes(parsed) - # Make pipe as a list - self.pipe = [getattr(pipe, arg) for arg in vars(pipe)] - - def execute(self, args): - for executable in self.pipe: - args = executable.execute(args) - - def setup(self, args): - for executable in self.pipe: - args = executable.setup(args) - - -class RayExecutor(Pipeline): - def __init__(self, pipeline, class_dict, param_space): - self.class_dict = class_dict - self.param_space = param_space - - # Read pipeline as yaml - with open(pipeline, 'r') as f: - self.pipeline = yaml.safe_load(f) - - # Init ray - ray.init(ignore_reinit_error=True) - print('Ray is initialized') - - def worker_fn(self, config, pipeline, class_dict): - # Should have same structure pipe and params - def replace(pipe, params): - for param in params: - if not isinstance(pipe[param], dict): - pipe[param] = params[param] - else: - replace(pipe[param], params[param]) - return pipe - - doc = replace(pipeline, config) - - executor = LocalExecutor(doc, class_dict) - executor.setup(None) - executor.execute(None) - - def execute(self, args): - print('Execute') - tuner = tune.Tuner( - trainable=tune.with_parameters( - self.worker_fn, - pipeline=self.pipeline, - class_dict=self.class_dict - ), - param_space=self.param_space, - run_config=air.RunConfig(name="tune_run") - ) - results = tuner.fit() - print( - "Best hyperparameters found were: " - f"{results.get_best_result().config}" - ) - - # Setup is done per worker via Tune execution - def setup(self, args): - pass - - -class ParallelExecutor(Pipeline): - """Execute a pipeline in parallel: multiprocessing and multi-node.""" - - def __init__(self, steps: Iterable[BaseComponent]): - super().__init__(steps) - - def setup(self, config: Dict = None): - return super().setup(config) - - def execute(self, args: Any = None): - return super().execute(args) - - -class HPCExecutor(ParallelExecutor): - """Execute a pipeline on an HPC system. - This executor provides as additional `setup_on_login` method - to allow for specific setup operations to be carried out on - the login node of a GPU cluster, being the only one with - network access. - """ - - def __init__(self, steps: Iterable[BaseComponent]): - super().__init__(steps) - - def setup(self, config: Dict = None): - return super().setup(config) - - @abstractmethod - def setup_on_login(self, config: Dict = None): - """Access the network to download datasets and misc.""" - raise NotImplementedError - - def execute(self, args: Any = None): - return super().execute(args) From 489000394bc997a50fd88b410579f021951f6ea8 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 17 Apr 2024 09:58:04 +0200 Subject: [PATCH 112/171] UPDATE scaling report --- src/itwinai/cli.py | 51 ++++++++++--------- .../torch-scaling-test/README.md | 2 +- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 275d853a..717b25c0 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -27,8 +27,8 @@ def scalability_report( plot_title: Annotated[Optional[str], typer.Option( help=("Plot name.") )] = None, - logy: Annotated[bool, typer.Option( - help=("Log scale on y axis.") + log_scale: Annotated[bool, typer.Option( + help=("Log scale on x axis.") )] = False, skip_id: Annotated[Optional[int], typer.Option( help=("Skip epoch ID.") @@ -43,15 +43,16 @@ def scalability_report( Example: >>> itwinai scalability-report --pattern="^epoch.+\\.csv$" --skip-id 0 \\ - >>> --plot-title "Some title" --logy --archive archive_name + >>> --plot-title "Some title" --log-scale --archive archive_name """ # TODO: add max depth and path different from CWD import os import re import shutil import pandas as pd + import matplotlib import matplotlib.pyplot as plt - # import numpy as np + import numpy as np regex = re.compile(r'{}'.format(pattern)) combined_df = pd.DataFrame() @@ -83,7 +84,10 @@ def scalability_report( if plot_title is not None: fig.suptitle(plot_title) - for name in set(avg_times.name.values): + markers = iter("ov^s*dXpD.+12348") + + series_names = sorted(set(avg_times.name.values)) + for name in series_names: df = avg_times[avg_times.name == name].drop(columns='name') # Debug @@ -104,32 +108,31 @@ def scalability_report( df["Efficiency"] = df["Threadscaled Sim. Time / s"].iloc[0] / \ df["Threadscaled Sim. Time / s"] - # Plot - # when lines are very close to each other - if logy: - sp_up_ax.semilogy( - df["NGPUs"].values, df["Speedup"].values, - marker='*', lw=1.0, label=name) - else: - sp_up_ax.plot( - df["NGPUs"].values, df["Speedup"].values, - marker='*', lw=1.0, label=name) - - if logy: - sp_up_ax.semilogy(df["NGPUs"].values, df["Speedup - ideal"].values, - ls='dashed', lw=1.0, c='k', label="ideal") - else: - sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, - ls='dashed', lw=1.0, c='k', label="ideal") + if log_scale: + sp_up_ax.set_yscale("log") + sp_up_ax.set_xscale("log") + + sp_up_ax.plot( + df["NGPUs"].values, df["Speedup"].values, + marker=next(markers), lw=1.0, label=name, alpha=0.7) + + sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values, + ls='dashed', lw=1.0, c='k', label="ideal") sp_up_ax.legend(ncol=1) sp_up_ax.set_xticks(df["NGPUs"].values) - # sp_up_ax.set_yticks( - # np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1)) + sp_up_ax.get_xaxis().set_major_formatter( + matplotlib.ticker.ScalarFormatter()) sp_up_ax.set_ylabel('Speedup') sp_up_ax.set_xlabel('NGPUs (4 per node)') sp_up_ax.grid() + + # Sort legend + handles, labels = sp_up_ax.get_legend_handles_labels() + order = np.argsort(labels) + plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order]) + plot_png = f"scaling_plot_{plot_title}.png" plt.tight_layout() plt.savefig(plot_png, bbox_inches='tight', format='png', dpi=300) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 74e316c0..89d6460b 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -97,7 +97,7 @@ itwinai scalability-report --help # Generate a scalability report itwinai scalability-report --pattern="^epoch.+\.csv$" \ - --plot-title "ResNet152 on Imagenet" --archive imagenet_results + --plot-title "ResNet152 on Imagenet" --log-scale --archive imagenet_results ``` The last command prints to terminal the average epoch time per training From 774e6d48d26ff004a4530dcadef594fa5cd88818 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 17 Apr 2024 10:34:51 +0200 Subject: [PATCH 113/171] Add SLURM logs --- src/itwinai/cli.py | 13 +++++++++++++ .../distributed-ml/torch-scaling-test/slurm.sh | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 717b25c0..282107ca 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -48,6 +48,7 @@ def scalability_report( # TODO: add max depth and path different from CWD import os import re + import glob import shutil import pandas as pd import matplotlib @@ -154,6 +155,18 @@ def scalability_report( os.path.basename(csvfile))) shutil.copyfile(plot_png, os.path.join(archive, plot_png)) avg_times.to_csv(os.path.join(archive, "avg_times.csv"), index=False) + print("Archived AVG epoch times CSV") + + # Copy SLURM logs: *.err *.out files + if os.path.exists('logs_slurm'): + print("Archived SLURM logs") + shutil.copytree('logs_slurm', os.path.join(archive, 'logs_slurm')) + # Copy other SLURM logs + for ext in ['*.out', '*.err']: + for file in glob.glob(ext): + shutil.copyfile(file, os.path.join(archive, file)) + + # Create archive archive_name = shutil.make_archive( base_name=archive, # archive file name format='gztar', diff --git a/tutorials/distributed-ml/torch-scaling-test/slurm.sh b/tutorials/distributed-ml/torch-scaling-test/slurm.sh index 93dd4349..ba89e421 100644 --- a/tutorials/distributed-ml/torch-scaling-test/slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/slurm.sh @@ -15,7 +15,7 @@ #SBATCH --partition=batch #SBATCH --nodes=2 #SBATCH --gpus-per-node=4 -#SBATCH --cpus-per-gpu=8 +#SBATCH --cpus-per-gpu=4 #SBATCH --exclusive # gres options have to be disabled for deepv From 494b6ce0012fa1b5c0110057844b852b6a705bc3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 17 Apr 2024 11:04:34 +0200 Subject: [PATCH 114/171] Refactor log scale --- src/itwinai/cli.py | 12 ++++-------- .../distributed-ml/torch-scaling-test/README.md | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 282107ca..8f16d676 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -27,9 +27,6 @@ def scalability_report( plot_title: Annotated[Optional[str], typer.Option( help=("Plot name.") )] = None, - log_scale: Annotated[bool, typer.Option( - help=("Log scale on x axis.") - )] = False, skip_id: Annotated[Optional[int], typer.Option( help=("Skip epoch ID.") )] = None, @@ -43,7 +40,7 @@ def scalability_report( Example: >>> itwinai scalability-report --pattern="^epoch.+\\.csv$" --skip-id 0 \\ - >>> --plot-title "Some title" --log-scale --archive archive_name + >>> --plot-title "Some title" --archive archive_name """ # TODO: add max depth and path different from CWD import os @@ -85,6 +82,9 @@ def scalability_report( if plot_title is not None: fig.suptitle(plot_title) + sp_up_ax.set_yscale("log") + sp_up_ax.set_xscale("log") + markers = iter("ov^s*dXpD.+12348") series_names = sorted(set(avg_times.name.values)) @@ -109,10 +109,6 @@ def scalability_report( df["Efficiency"] = df["Threadscaled Sim. Time / s"].iloc[0] / \ df["Threadscaled Sim. Time / s"] - if log_scale: - sp_up_ax.set_yscale("log") - sp_up_ax.set_xscale("log") - sp_up_ax.plot( df["NGPUs"].values, df["Speedup"].values, marker=next(markers), lw=1.0, label=name, alpha=0.7) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 89d6460b..74e316c0 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -97,7 +97,7 @@ itwinai scalability-report --help # Generate a scalability report itwinai scalability-report --pattern="^epoch.+\.csv$" \ - --plot-title "ResNet152 on Imagenet" --log-scale --archive imagenet_results + --plot-title "ResNet152 on Imagenet" --archive imagenet_results ``` The last command prints to terminal the average epoch time per training From d7855526c4a3ed4a9eee8257c27a9cd16482cb13 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 17 Apr 2024 11:05:28 +0200 Subject: [PATCH 115/171] Update scalability report --- .../torch-scaling-test/img/report.png | Bin 45730 -> 198864 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/img/report.png b/tutorials/distributed-ml/torch-scaling-test/img/report.png index 53bb708ac3b94aaa63942b32742c84b1566a74b4..4e81996e1e22505410cb7b19852a7a9558e3c0d5 100644 GIT binary patch literal 198864 zcmeFZWmHyc+cm6P-EMmu7)Tg^lprlg*homjB`s3Y(hY7~L{L(Ylx_tPX}3s&DB&eY zh=7!Ubi+GO^uE9E`ToB@zA>IPhGV1qVqI&UaUAoQa~|h?1v$wr8)-MLTD5A6)P=K3 zt5*Hlvuf4agunj8zl_D$AH;uz?ap1YQ?@d)bJVjjTy;^;?wYxkow>=C0}h5ZwkB4V zd|b!5csUPTwX?fsE5glf@!t<{S=ktKADCZyhnsA;c0t{C)he#Ta( zI(tgRDeOm^qmzp2Qu%0IjY0j14TX-64sE<)r}Fo%{m(K5pX+H3%I~Jx_3w>7_Sb4E zyZ0C#y>#v9U+m|0t&P1u;rBsi<3X{8;St}*6$UJNER{?r+cSDA{abD2XWMxk{~QtO zrSsR@dk0VO`&#=jH*d{<{<3P-dpjn)@bByTfx~R;|MQnsYxhYB|MxEkDlSyvfq!48 z&MR&G&(Ev=;`Oun?_W+ndK~co-2R4lEZu+q*WU*J`&IuLjQ=@+nU4m34d;zgqb!rb zPiZX$&RO*h4fa1i^Qfn(F*7l-3Ja_EM!2Ta@?TYG)BA(m#&p%nuzN~hyr`OGRJ)MQ zxO;c}=g*f)OG}f^J=i@^ANSzB#IoBK2GL8=(b1>8Cni3*?=Ylv z`t<4C`K=o^JZ|Xc6d$H$MDFI+(HdTQzgAdSSR>2mp~w7C&bN&46OL8KHv77;v9X!9 zyfnF8KG;!Q9K@-v)ak+0|Ng{Cqo!3Mjl%x?w)D~mdj`uHJsY2$d+^ezmR6X~hxZG= z`DX>HmhHpgqIt_~)28G@2V-bw=E74_JB-O&B+9NVimIf<#0w@SCeku84L6ro^e^3J z#|F0cz8q^Y%P<*fFKnVOGqD(M&2{OEzRT}AWp2@ymwle8o2h@iPJ&vzyf|`wr_B-G z&1d+GY81+S=(8=pyps0*HQh#JO9im8vr8YJ0p+bQ2yINhEyte_xOm3LPzUNPo84TA(Hr)*oEy>eZIm};?y zdsDi$rd)#Z`fUu2nTFN;R_&rRG&IgHo3o64xn6($hV>kIvIez%!x7VQP@^6k5KgEgkV{(F;_ z&&b=TE@DWr$aC4F&1Eo6=$fobL>c)9Hg3key*p} z>p0w+)ZdUmIb)D;B)B?GlF|KTUdi#a5LuRNlZNv`t=DeGUwj%L>A9rd+}zyqT<@mQ z`>-UIi=irx_fBo!zCHRbgUbci>EWheU5_M=jDpOXTVYI1DHM4|x1ayuW8L0% z!-wcIg;mD2p{n~jS7zfR39C`O7_Gq<uWUT4VhRNuX7Fbr9bhC z_L=Y^^N}MQEG+SPC5bj-E)%R2-r=)5kH}51Yd*jHkGY`c?YWgfJ$Xf*Ot7>NW+8(Y zgNLQohx>3l7GsH+eMB}xCs8(57iCr3)XV-tozf$G#2QdR9qk-uT zPwM;d6M^C3X$1wMx9{9x=jBzxpr|#(Uu?>=H!CeKr#Pjay7_1Svj<%D+awkgv|e2) z-RM5opFk__=5%{LBQZ@g#}tuG#HRZmA{z~2Rd=_6fNA4}wtPp4{vG#^i-p;~7f#m9 zN#9B@qN1gC=~Cj&_1n}2{UYz^T`jq@rKQL{zrMacAS5K|_3Ptgh1Z4(Gzj;G9nKP- z@w9Tr&@avnW{{5|fW-IpUB≠+~3#1vkY#?Ae^#Iy*Z}KSce5u()vDgL^C8@hH_4 zH8ueO)!f`%X<1nnd`(k^u0&~BnVLp{;M0pI9DaNVI;v^c_u-(Sv9XNbj-5LLpFBy# z{gW>~6$}*Ak#e2-@iA1u%%{ z-}<@tFRTiUeOQ>sHkTYjT-+?hpu0FX$wAS5@#2M!`{Ib_GJfvxTrT9nUOg->-kwso zvl#Pe*y=W3TlT&7X|;fl-rn=4PW>~Q;kj7d_bE*?M4x_{H$?V4hUekO7|H#cc5@#4 z;V>^%eziXKz9i#OjhE!h)G~A|p2)KJ+G75uQtG-mcgw^rO!I9n`J88;VolX{bS~8# z;}8{XeLeH-!Nb|{-ub2xg#QH>i|RSk`dF#ljOt^XV|0r>bll@(V^tFC>gUocJZ}zs z9dul#>UyRHU3z8NX4>lT#o{~yn7qq~uIJ)p83v|G;C;p8!_^Cm8u<=s){{+?{c0)M zH64`{UH7Kv&E|pEdj4S>85zH|VP`>0_|=}Oz%oQ~%Uic@1-P_fX!wG-b#=ZUU!Lz+ z_SPwMaV{*TU*5Q7i$Hq*u!uflYmdj4PwCobvQLiv+xI0WWBcyi4)xQIuq3;JG%a}q zEm~BDdE~ooa=k{^+cyW>z~KAx;j}k#rvscUS%S(P6(BL=T802`vq$*y|{8)ckz9g zaMs&MO2QYC3r{{@flf2ZhAce;)QpiP>Uu`EzZDpZmzVOb|6R9d?ZT>>D=+pswmT;% zUb?jRgVTfEM|m+)YSDMC#=qB|=szCE+=D4wn#w75o}4Y)Z#Cn-CGc^8jJ&*%W&V(0 zM$v-%%xlMycHa3TY>uWU22-;Nbe~`fD;~VMp?}x)Wy<0x)TEu930hq1zo{>d&K*P-n5Iyw=0 zdjtIg=8>hnvElx7Tg199Gs`I4{oztV{i0uJDD8)l&-Zn)LHT?b#59bSS5N~xqbtdv zYfq`E1yR%w%)7Z`oro+ijAvi#e0#mzK_om>+&yn@dL#qWHPiI{i&b$Ga!{b3-;SS7 zf&C%I5sFu@CQ3HPO8H}FQkc7}zP!4KSo*TNN)lSgYerxC(xr zc3+z9_3(X)aDjv&8a3^E=I&M#>{Jgbcjd9oh4aJtxSQ1|Z}2~#b8QpmriN0G{Dh+% zBHhOB-`TuRsdJHbOUVap4!b5*iV0xH>YTdw?>}`cFFCe3_lEbl)Jdd^3GgjMz=`!v>5o)vZr>^Pz@IgWQ z+P~`t+6(h(1x#ZRm3zK_XGb)ZSel8Up7}^_jtOf(I55c8kfG5pN3!+6-=@4p%Z?V5aA+? zWRd0Y<8$=Hgq?7>95V|`vRayk2sTe+f;^+P-N$pL2wd4^B0yOgt~pZvEQsCdexsW< zZsfoGX4T?oIipUmpPwI@h$Mu7WbOP{yu7?GoyU#br<#pLu6?~O+16EDjohew<;s=Z zEBk~j!+?;q-C7eB4{N*h#|?ab5rZ29JbkJu>QQ<2J7V`_UyScSHdTj%`njn;kfQVP z#7Kd%TWhS;PHl%~!+{UcH%Y1r42+}UP`ipCqa&4ySfW4@DrQ9B%DcbPbPDrkT623N zQvSWUK1Dst$m{<7kDVoVv2#w7bTaOg!OhD00r(>wKq=U4B;(etwPY(`AuD4k?ApIS z1v!>x*Dh8}E19($>oyhmz2cyxH_|({M*B)EeI9v1GBIM~Nno!@5o&?6t+sPdAjT_u z$BrEVz#IU2eOTa}?OCGj+PQP5z_qU@!(E4S$+#Ul ze3-n{!~H@7`6ERQ7znX};>D>iUcSE605;_+8cEd-?gx9gKRr5lM&;6_K1EBxr%jlp z$sf(iKnu!lZUq}RZhUFgA%0Xn^DJ4WlJ(ax@Oj;SG7|_6v#r!)JUlOE#;Z_k$tVG0l+Wv}$D}>nZ&mOy zWeB^um+Cq0H^d_QXlq-_85nu7M;a~>Eh+d!InKoH4GZlG^tJ#A0;1Rq0UT3bRpqd70s4i z1p=kMw*6i+YNl0k%v;kkw=`vOv|~?jbx+ns#E;R52KhbQwWhow1s;xz+C^gsOF7h; z`}gmcVcp0S>iXy|_eOdSkK1$Sc7$^`xfj!kI(_JEx6Wy`sg{z+as2UFPs?qzRLA1( zHb#$nYxl+dCf{p9JW4kMDi=Q@3W}}gPg@vn>6$6k|>+WoAlN{ z1jaA1%$ZlsiLI%TjgblFV(5zST$=UB)-H0(rPKiBmT?y^e51_Ei_gY37E%|as^Vf} zofcQiU^B_*CG_5(`q8Y9%Kt-*Z`ON}l*U`%tT6|h_wl}q3CG$a1t3MUCdnKf(;mF_4V?q*Y_23sGo{-p9-O7+VdH|Z@j*G&4SNC zets3K5k&xE>yTQb21F7q)2EmNXSxo%hKned$6af>WLf+V9Xj-fWp6dY%a80^T_mzx zN!vU>JwipE7p(%Hz_8fGxcpaQB#HNA3|L)}e>vznLJk3`qhL}&>86q(N3YHqq%>GG z)nW#GrW-iK?1ZX=6tOxzqy`nSA5dZDkqsAVO8N2cdd5bi|8B}W5|+x!mY*d*4L?C? z+W(@_R2?Jgj6%;fpf*yXxK*E$fIQfb94ui{I5XoUHdb+%!EknVwyL^X*~#g(_-yx1 z6g~5?ixb0bnh4}mtbm%2seW&Z&YnLXWELV*S=iW~EK@+AD_5EjU=>J&g# zvTluD>D=_M~jh^$3Gm3fkYyt~0Nw4IN$Mi=e9CvI+2163h0kl=q zpCo_tP|r3EBso-3(0R;28}Qtt+=MrT-+j)Gl*Jq@2#aeOS6Boac23U5rN#NlTA_}s zC|d^_ll%z-li@<5i0-zHGQ)-q#h#wpwzUGQ%W3xP>79H&{(!rPHQPf@&ZpcOLGTZT ztju%g&Y8TuwSmFVAz!|x5pi+H(k_NTRKZgl(iYYyM_XPz$9L{;E0(ykpnBS#^K z4G?Yu^WXRRg+3`UUs-jKG3ZA`0!#74W2SRGnI}{LlcviU7Snh4wlmB#uDhwXH*fa) z)4`QM~RZV+oq2@ z9H$ePkZ0EfqOp4~-0Kt&*c$mVHZ_&AHiWM{CKYR#R?Nljq`rLEamCTm(bd_HV_&}H zuMHD!h%v8u!h<4H{f#@J7xw5lMN6uyM+V0wCC|ro7w-h(QBQZDMO+{e-Z#^4J_dB* zf#WOmZGawf0*XHIVr+@V=Xz(hpw=EK822gH9g-Q|w>itWZfcPGv$C>sw#%fcZ{|sc z&B1nv<1UlB8a9arJBAIm8KhJZP=MU)8sNS#TCS6!e2&!l4CPcE*EArQswN6(s6qx_ zZk>YHVUNQj&il|MVGmuM9qV!VcK=9Yma#l4Y?r17*x2CI68G^LT?MoiH)e5PoTP>d zT4*SAx&L@kJ^}KqE~dtSx1U6JtfifeFMKr-btz+`-=A8ksi>%&T)dQ~)<$Ao4~k#+ zH$1Zgy#}BpMSZ)FGKV^fCBAwjHyq7r1((t7xQJEE!Nv7HW#QEq%S~5X35OF1x=tWU zyeh@u&@}4NqghKNvIQ38g19meTtDS>P((xn5LEl51ylc1t0F~|(30wg9y&q2V)^XD z!EfK5CQr>^2vEsx`^j2enq}RlO`GKBIvB7oXe3t%9X}+}zNs%(RN#mO9VJstf{tOK z%Otzw<|3obnC(x-ZD3(s=`e7bRt2MXHkYtY&jcQ1VmeFmj%0}5UfL55=5Y^SxK3MU z7#N<@am;E?1#yLSGnE&aWSQHu8$=jOl;Fis{*QjlGW=IR9LQM7SwLWF>YdI`T@Qr{ zj3O@7clk=BS99favg*$Pb`(1W28IduWT4Xel(8fq59qr($K>O)+Z6TZOJCN>bQHO- zv21v4t?%1aQE`F5u+@_v!6Ij5rlzJ|*MHn9!$uxizuc$kXJ?53zIC$i;~7+mADfLM zhbFTx|NDpU6gU#LEKlA0PyIG9cw)ive-&hw;C{+)^6bwov;k&rmGk4g0s;bGsYMu1 z-~cD)GaO^iAX@qY71B*2DHzA5XtxJ-NXk(v*cnak(`^ZW@Uv}Y)HCCR&f5{=2TsX7 zsKciED0!_1z9@bOODfdvbOtH^LHg@&8?()uGnMp5sZA8!GzKvjC(*S_z~%yAnzexh zXofqegG(sMRIWOF!k~%z$hfw3R+?rHG{{S+&QA?xTXhtD8$&+CCR9eHcR9{S2+SaY zr@KxM%9@E>Ai**D9Y4<#EVk?x`Br zq1YH%__JL6qrJD5`lI!v&IvGc*a&^VXCJ{B{6U)^>(3l3X#V-DH5~{{K#CwUPF=pW z^6}mC4lXWv@5SeT*gWLUHBJl+h=>@Tt`AkUFP@nz$b(AY#)S~)$G0$BfSv!B_=-$m zc+bmgZ1&M7%bdZ!k1--nKclF*PzjPWb1Yp(3a2M>iWh4$tmR}+)8%+9&b?W~b>{r} zJ9T3m>90>ybe!X;5o#nS9QGA`1`vDpA+3NCp}s*+5p0d`w0yHIyxOcPfK4asZwB`X z#Vxe_tWk#F-9{(#Mp|>TPP+%Gv2Tw=ZQPEY;H0OYT-?=nbL!o@cQy~rvNZ~;emJ=1&Lwkg*sx)B z4v4+Lk9WJ5&2msvwOuRO5-3>RZ&3NhZ^Q)8$v(Zq3lHM@L6NoO|)& z2eqcfZ-e8ec#t02e110_!Xz+OQ!r)$(Pq?iuomG|YT3j6_8|8R4GakFJ|4<^Ijb4i zLa09_qiD=5EZOOAHJfb_JHyRK@=^K^0Ae)aPbwA|>%oJkNcoFm^;>qBn&BYIe`TJ){dn%EbK(4qbOh*x{WGLvm)G!H3`%$e? zib8SIVqS@+xp^wp&*Ap%+YHq_LDYrWaRJP5Mr;X+Gk)XuVWu+Yd%C-w=p@gbQ%#gW z{_**zA+pVqin4ZaydCUNPK3(<8wsu&#U6#E8V?Jj6Qpry#C90;WjIC|=T#3F7>SOz;e7FrdX~d1*0kFgZYd2?Fh0PkDB!j+D8gmMf|yqL2FoC(Hcp5oLxpnMj51J zbz3jE@hF6>dXDI0C~OJMb85D5l7fP-TwDh%9L+!-#Z}iesB>Zp3B&cM?&oLcHW5%Z z(2||fvA9;`L~?h{D=!wetgNiX^ua-~eALzq^F9$tZrbz6Z`Fm7j0l*?>-mVR%Ps|; z#i%wk4!WT4*zHYw2Cz`LvMiviQPho$jN;1n+(R1aYj+)K!a99MU=PUAxvw|3@w-f1 z6##-aehL61J>BkV_2Eh=zlk5kZw$QIhQ5C+r4q2Cg>?A}*)vmHK=KeMl=A%fhnR&M zo47teYEkd09PyLp5^}zBBVAPb)wWHWlno3FZqL0->ct+JIPL?=0pszv!fCmU3t}1F zBWCAxuaA?l2XbSVkm!ghkdG9f z17wm5Y1Z!zv-K%|QPTMVNVTeo3IfiqyJEXV>F*b=^a|Z1?v69aNY%ZQ0V~FE8G&y3}f07br4s`1Q>{ zI)YTfMGlA8_u6BW?>mw=`Yjs^6Rast4HGK(l84J3^HDD11K8=RY zQ3YmQiG&7qL;XlEQb+*Nb;P9Ng>~jHh1Wpaw5wzeb{fj~ltSSqVw`wzMJkt=>(m7M zJZ@0kL=6+RT^L|Y$N_nmk%{zieSWsvU;YnETWmbnz(o)dTV69tg^Ia4ys?m@)>V7a z9=m*$73uf^Q?4+ujD`E5RR%~*jk4ZK|4wwYgoJj>pa?O|{#Mj%~ zn{MvbH%wGb38HR}2P#@;UjwSP&TRIjCzObmOhcJB?hq}tJ!X4WiaVE{KnnXV{R4o_ zqCRVyb^B$1NqkTXk4K5CrCy3Yc;=2%VGy!p5jJ;1UtizG(o4;RAX8s-;N^8t9E!0r+{X2 zL2c_&8dA{ZezO$`p9AUZ#-brZ_~7zUp!`1~4%)NNpcJkl2YHFskFT z^HZM7)1EXwTM{C$4^u$+G8m#L{4_T?xM(V2`(5^8glKMSR|jfo)6dVNC7Z$e@*^25 z1)GR-ad5~$rf5P8t`=KD7)W~l{FrZMu5GU=;?@!RV*aG0q&&>#1c=gr6X};pfTfOw zOZqYpRmOd|wBHxy)Wt|~ZDd}XkjWaOEK)Kn3VJRr*cA67vp1@&@I*J>nG2eMA<*do z14}A0N^a?ukJ#%4Mq{a$UYsTc2-s1@0nYrLKn#oEA5l)fq!qGEg`EMTeMWrn`}gky zl}92~Krq;&I2u4m{kcz53aan&La&5?QO({Av%Nf5-bsC-U?!$=YxJB87sje}Dqmok zrg$zd85VgkLo3kfD)*&S2OXVULD1{FnWyIo;{cs8&qm^M8W&23hI`u&H@>pa6oWG5 zQTL{~&^3o}PK4XnsjT?<^QXT!tGFe^4BF$@qNp|X(7`2F%30TX)ocXl0i=g1R@+=H zvQj4+mE|jqvS!VD?L;P_hV&<=q>m-rLS1mKeNz(B1y{=C@mF7lXhof#6FZOTQ?LqR zKi{m$8+!G{%b%@BO068#C|1oBd!(4_D*|I6?{{zrdqCzc1h-}S%k3v70mY;0 zwV)8Xd=D`eHCjst4F}*XkHNWnd&V3Toex0L04c|yzM5 zEv>Mzv9SQ?21pE7k+z7SOs#i6qKsewON<#XuGhVLA5gKTfe}FPR)d~E5Sf>k7m@zS zB;&oq0YSet)y4RV4`g>}KG!133?MzI*MyDJL`;I!$8+Fg%v}hiT3VJlUna5rt|CAY zL6WEtcu)l})R_P{RaN!TXYRhVY!iVFqXD1JW~UznLSQXT&x|Y*m`S*X)2DA>{JR7W z$6_q!fCpuU#tDr8Hcf3{mi~l;Qy?+ffYmyhU5ovjlCGT}WY6Z;y6rBb>kl3==+LGp z=Hcadh6t1laN+gvVPKP~s*zDVA)iBVM zk_|dkX=i&vGQ!>L+ridP)p$SA>i7bD{8}E&$_aD zq~Bk4>b$+j4)Bx%0z*ozVX$^QWC2C+PvO+fY1qi_z}ST0xVhUFVUIAqJ=MYM8;4Py zj0^J}d!pmixeog?MrjUKy(GL{oLPEqnVE#})7p4unV?;D!-=xk0u3{xU6TkT&fzCq zCK9O;?tY>SZ|5`h%eNK>zc?{$z&9@E_GpAS|4e(|htDB(tuYP8YXSyJQnajZcUb&z z$gdl#`aGVPJZ0~V-2><&VArRRrj=XY)%y6bq}%(=V{yR3bBJ95PoC`cK4$H8K~}aA z8`d80;Ci%e9)(o+#J1fe014ZEpV1O&(ybF6_Xb}d;rxAHhAnZhZKeB}yI^za(i2Dq zX+T`A%sp#Da;HI)rvPHR%yix@^fMzS91ypR#q%QJ@Uf{DFU_T-gLMV&sqNB&x!`+U zB!SPAxz|_$4T*{|BtAKZqbeJpy}KR!Y9Is9&6|Gj9uANIH_T^#b`t7ZQe8tMR0o~{ zr7sg$ti&tzP^i6cI(4-!9`eu4d!vAnXG9tI?d#9hGKzW@6C(hI+#L_4<9U$L9R3DE zHfftI5}OmCynlq#xBDBp9Y3dQp8y=O_h@nlA36csS&*=D8q2W|Sy~ahMBvBSKj z5h*#jPo%8E?6Wa3Jui;t50TAVnj4(NicAFZI_33uf*;_Lix6+uTw6w{(QOBdh@g^6R}_ zi#;t(A`#JFs85Wt)XAou8tKSDdb&vV5?OKxSsNwH)7T-Ud;*A>nCbFO0sZMw=Yx45 z5;b{rV&)^3A6h~4y{(t!t)cF^jMoZH0>JbU+>GT^of-*CSn=YJl}4dUHWBXG?(Ys0 zPtO}*UtF3WAx;mP?b{Vi#lYA?8X@6iMOsPXd-CU2x5D^ZHrv zX&GYuyvyj32oSek)WiP%!-q{EtdbiWRUR@*=l~y$a;WbHuegc3K*9fr{3qBaU)uCY zZ-GLW!Bg_~Ex$zze_VY00D-X2^-74Tq6xHQ*|HuCn%5;{??~h`0*`$SxHj`sFHJq` zJYq0>s|uEu>99-^a{##AL~KCf0lU4!0FqA23;nwe@Noh&vmQAD*5{m;x3>ziGZH{D zz%U0JThfk~COXBsVzJ&!0bkTO7-C7?R%6&$PMZ10r}f!Wyxbz&nier-~p9gGoG6C9l~!zt)is z6eJ%YX9PZed=NK)_6Dx#G-mchPY^$uLPBf%W|BAz!_TSP^X;!Jo(fvEr+RyD1}#`& znFAN*dH9ux^RMNVg{ubh*9Cj1tS}~65iJUCkAU!SZBdRQ7|~1{;vXW%=|ro~U<|%T909auoO{{1kSC+zp89maT;Uhu9L-qf`EE%k9N1?Q}AIA>4cSR zd*4s8`octk2n!^mU8xATX^`Cd2A{mHylaKh#ZNZ#-~zL;X^^CpU~6gwv?j0+x%>95TZFE~yC~rm8*{95 zSByQV?CtMv!Ch2@f6o(sUUl-(3pZx+!^-u)T@ABX-?~s#v5AWpNtdy(-K8~z zD4T~)k(3kg0R6hIZG3XVF&)~W7yG^kzvB)0KmXt9tfo9WZ-Or>9|eWe1Xj}2>wDwl zr%zJ?IVigsBLSL8`%2lp>&ly^|56Yf-X3Mw%6PK*wdD0oYu{CdT@0(M%EZ2XA*$o6 zT=o#5zgYZi5*6{-qCT1^s)>Up^qH`|0VpL<7p3LIf`^tBf%9v_G=ASCu4`!(Zt_RK zL)W#e`+t4QazM@gwL5=Dzm+%o|JnZBlr#+9KU{CGO|BO2=9s)QY=I5^0YIC?KGC#y z`!&-}hJX8E5U4a4bY0jAJc27~04{2FDCY2@sK8YMNNSMPoO~kI-$o=H)=Vz(_F~Us z*w}v$-!&E(w^dQ?1EB6=V370swN#_L$ZZtie1!+>Kz^G}TQt(qzeZWapucb5zD$j@ zWZ*405>rlau6*%zT2RCaQe)%e*>Kw^yM$ELKdCi>^6x~qW(E`go*#A=lq!l~oTO@g z1x=Oh5$t~iezlXUSiQ=WrR3O4<2nX(3{V_dS8lmV;19l@>l@KLL<%A43iN`(Il&G` zf?CRCKzn|9za6`fP|nP85s}cv%Q?Wbv200mH$qTWhZUF}yu&7zPqzw+stQg($>v zRr8IWmZeFZh{5ozYOr3H-a_TJpQh-eihRhPxzdoWc3zrJb<-K>z1)oZ)+t~rWtXXy4 zQ`$ofTxdq2;;%td$R(fpYLi<~CC#_07jt)W$44ZzV)K%%JXw?l*5@lzMZ zpGSUUE%M2*WOHXFBd-AWlU3D^ylj{gw0XV1v7?$M9cU=GCQ&`(x262VXTGk1fmemn z&H7J>&8mefd+%%<;u6;4y8WbfV@_z@J?5>ybNkmuBR`KYrsna&>NBU~9$ib57mNS0 zJhC$*h>*A6mgSnlDN{-n-#OVta=<4GS?SnH^FyT>W>x<4E{UL=~bt3bnsJ~(uM0L2W{{1+a>SP&i_VT zjcG1eEE;+8-Ad%y&|O>UG=JUh58rP#x40eC?65qZsAhk~PO3a_pz&DIz#l8EQi$xK zQMG@YR15i4AB?jI{ISlj^2ZWyez82m-n2SOh{{zV{+DM8;aX=S~{Z6d@I)|QJErC|H z$rYV`dVAr-+PH~_gSYY7lWQs84yCqu9GA?Xd#^bC{eE7N>&|m`N2dr7Cs*L1L#N)C zx%2myEcb=VWgAwuFz=I_&Xp(@@bQb+R+FjSkw)7nqQrH$*vU#*inphGE@zkFCGUX7 z7BQLhuAc|AWNS;6ZCCllE~?*l8SfRV;<{F`W5=_XVW;DIK6hWsD;Pin!|A8Ww*?b; zZ7nvwcoImrGr9Z8wY1NHNk{M6Onf+(*2Mal`RMNj5%p|0msa9khM8cGW&JE0D+x+Y z&bDB>+e!vU*$yf4YWa26hu_pLmB^=apvlc--p9L-OTFYUv%gne^&~|bPwpt?W$|^X zee4h$dr}_PtoK=^t=FgMYpE$R4twhEoOl$H^2gKpI*!wvEX-%`u(NZX?NrOEVe!|( zL*~mkpW8}b>v&{%s}T)5M@M?u{l`j06K@yN=E_SgF8mIyt4<1AbQEdpmwA5-)?Lbu zY<(@i^dVc`k&>!ktP*gzRLFt*g!+xnYl%@68g6vkBbj7OT&oP@FRTkuathl;%dDPS zuTD{$>S=UBFrnqjsfad_7*;5%O>)au&ga|I%`3Z+=cx*}_~{Lg)k555O}1`&!OHbE zInTONB-D!fZ|Wa3pTk-_m?*XWF-%g*o}c5TD0Y|Gs+G`8L#iplG=i`}#z}{;mjX=) z<+WPeLw`s}#Iz5_G4sn1X@^5Cn@n2bTJ|!-FL4zN48+ocy~p= z-?NgVrWzd5Y7eG$DFFt8Au1f~Dla+@wuC>QxI&SBKFD1u3zojue`X5K2H~rpN;6tweih@s7@TPL@cNqHx9QCf=eM5tn9kh zX(+{U_jexI&BzULAGwJFk`9tn0;pm%5m%&WU%B!O%^-4OP4AeFJZm@TA4<%kPL@1ui=H3U{%%x8L!O( zDY=mwa(Aj9H4>2bQc}5ePmAk}EAP1B1&tRR%7fEJ>kcn8D1nc6nfj50o`D(J3aR1P zHfULmxwgjeFC=Pp$%gVvfvDpWICw;gxUh(Wjr5}4(F-sO+iCIn`6&>!pU?u*#dq*X zZieSrcO_wul$4b@M2(`5OYdxBOoxMLp4^-2*SO5+1-_j2WSj~trRrVRh}?cPEa{>U!942{=`Oiy)f; zEN2Ll8o4%clDU-zN~D<-eZ;;Y*68BR`{u=gCYkTo%bX#W5n7M962{)WJ8R!xe-557 z%GtALt!D|Vg;q+zpPNMfg;zlUU9R*{zFwzdu@K7)xE8Lg=J75gY`DCyH#SLMl>M_B zroA-Kp#?L?(eR4KA0^muchS=iDdwOHoybsx#*5?mJvK%#^`>y;|KNp<>;lGrLc}Ut zg9wBqzMx?$C{9s`?bSa9Tdj|LXJShTtGk@F>?Xb^g!T5Ve1B^T?xVedUkw0R1RC3G z+$PI7m5HGR2B(Lj)z8ItQd9mHmnXZfpE=WNGu+}e6iq72sMXC zR)J&CsOE`cfnytVa}Z)aHo9M6m42|({P2124~QbfEkk%y!i#_fBCiLt`!_*wwM00q z5;KY-t?0zY;1`RVleQfdIA}g)=w_gchZU$84A~0=9EH}h9vc%VzS2@sNtwrgV>UiI z$V1!2CLR&)6&hZPad=94-U>|%4l)YOV~)|4OxeY)i?((aQY_QnLYkJBSMTb1U2NcY zWgxe@MYmI@Xve8jSZL8|@vnu3Mb$0`x0$fMGgopu+hfib^0$fVy;7#T4ROe^zN4ne zr27mynZxLdN`rd%;wJ`DGLpfXGuS@``q%>F=0@^?CIFw!Z z4y%KHdin-urOph6K3F;0$Nb!$;o$DvriWgc%$gAfyvqVI!>M{o7p~q;-(Z*$X2P?o ziaDVo`mVSJrKbxJ=i&Oy9=m;2*}~m+`!Y{xiG+?UJ=ly5Ha5gQy(hmR4%vNHFu|wi zOZMbt9|yYkqfgRQY2tm3Yrg1=I}(?C8gz6KI-S3D{rq|DV%YJI8Lx;#8x}j}r?n3C zdF8=IqL$c%UiXD(@1&AuP;}=d!2&Tyqih1V**8id)?hzmBn5|8uH(&6@62HQGNq<# z-!7XjhsAVIfbU~4@_irtT2oFJC4kYzAl04vePqNcf&0zoILM=+G2hXO=$#NvKA}BE z?K@+D>s9OMiSpjz$dazuxBA1)Hazxm1^ntZ4ZBP@9x>e(v0-jgG{XF~O(!Nw;q)q0552x|XZ z7wKYwjO~$q5jg_;<{PCA=YH&2Hb+oIV_~sAoq%aThrPQn)S4z(qlyR4OG*x6_n~72 zXC>e@sgQ4fIJQA0bfH_u<_M(!O%J`lhRQmWNB?|MuPazx_`FCLUZ|*Z<}$Fz#)knK zTkMwUv?(^iv8iJ9(H(7-!X@fj0<9i3;oknSZ#gR?xI;LH*G4ee7nF(3Z#{Jm(c&LD zt@9rh9`PEoj4a6q#xbR|R0VQ~`M{gUsNm{302i|+Y10!QhTzw6UqXs>_MGGiRQYJe zK1;jq3}Rwq^qKX=gD|npZrrkHmbY|g%YK8LFE_W-{#vx}kD;z#2AFD0R^=dGD8nVi zIcVrnh^oO>QM>m5QVtu0#VFV`GvA#~)-)>N@;^$o4brlsucSLvD)=aLimhYjufI&> zqy7}J-XP^K`>#~ZKF`S!v(`{1^htLefAsDV6FCZ|PybzcIng~Kd03qssyd-lImg@; z+{)L0uEvFd43Js`@La9&f1!~^ga+8h4@j}Ivy*ngMU5qRV#y_d1A&N{V3JsU;GQzG zo;|ZoEEe#y^2!U9_a~kE>!;S$`!L46r9;YBf6GKE>V7G8P_I;VDCh#s?RRWk{4L>c zY`B4~E*)l-^QZ{-T9l_NyDnbVPp9 z%g4IlqO-pZO<}y>s7(D9q1Gr!tMg5|NQTgkXg@qzVr~A8Y_@v(SF*Vtst1mRh{L1- zA;T7C?k1uSlMbLB#y7`e4?m5w7>_MXQ4!@kl;;x=Io!Vgy8Se4jusUfeq3PrnBi@i*`z^(sOAlNPp-Zb>pG8Bz``Mv<8KB`E zORW^Hcjn30#%hclD>?n2y*+Ip@U$t};pMkOr{&v!Zbk#|pfw#hbm zeFA~r5_{RZ1S+emQ&O+}nxSiVF38JM(Y;NYtl*%Fv?s?FkY?+i{crUA9hMJS%*%;{ z4!F0|a@{V^pZ41Hz=4``zJf+so=elnT0r?!yzB1pQkS=!xr4E^d2iM253t>p&cX>3 zXq|9`VI^_`_|XjV1{Q}BjJB{@We`A&z8GcTq2v=P7fge8#gX^hX{mZX{i6E1txfkg zqy>bSbCrDf=r)~K%HzpG|CBjUqSH2yBjj^sUEQ|CLW%oPUOvBH>zt%yu&cSJYMGhR z<9EO1lJ}gjR}ljb>?4;3Hp-A@j>aVArhLbdH|r~^A33;j56zrnuVTx$9AB#Ml!$%4 zciJ&QJN|8I@%#N;A+`;z5|Y$FStGWjY?We}uh!A67pkI_ylDlbmuKrd-6A09H^3M7 z5?%W+e4HcS1S?1xyU*Xu;Z#mVQuV+7D z0*PyId1++%rTe@?q&NL9pDJSpR zcQ)&Qqha=ZOFMeUL+Q>EY!kNv!&`K@jy4}U z$1Ej$*T7~M`#Y5bcZIb&r#RX8--Q?Og~aC3$B7Ir`fdw^EXsZko_hGm_=%@q$BLiW z{z%sZ$xxp2Upfr--dREe#b9PN7YCI=K2h|HNMqAfiLkWKhK7#?hiE=)w>(u%460O9 z=lo~+_N{8etB&W3P@Zj%-pyQoi^f;kAO6SME+?W40E;Y;;~@O}Kbb&-MZ!*l-|T|T za@PSqKIQxOmueLzjJ>PObu*V>ccVm)E&=GwOW+>c%o?a}|e>ctxPH0^!R0+^_Xv+(a6}%ZY*E_?CQK^I4(EXp8<&F7cTY4r5?O;e zH~8^9utGAcNI<-8j1$=pEK*y#N(^~qW(9*C47)3bIrTMOZgWVG$*&&|pmaZKHdpF; zcOis{P;+uyx^8Y?GKm4RP26VUOF$<$ibwtYgJL>J>m45RX}b3FlY^9nEVOXIP1fz` z4Ub6#aDX~&5jan!o6?4sycN`Ec%+O$7LPcVZ&}Ig3`$(Rx${@x`^rBKRGDn4T_^Py zvCojxtI%Y%ywj%tOPuZ!v-_-Lzi*wie1-aH!VSvk`f=?&B5*usg89frmT7bF73oJS zWB;Z^y9R|ZQDkBD_)|MBHfX8zzVtbz^gqAgtPJsmpJxhz;9P(|C$T@TcD}t0sbF)< z6L`+_M(a=~ZfZIHvtx@`C@WWe^7Fs5LYdB2TnQTU3Im#!+~+KqyE&$2imOE7e8fe~ ztv$PrbcY_1WNS55)QVN5I$9~6qu30P0|`_q+Ny5EaJ*qB%S@94P%%SRc9jjP3Ku#F z&@WY$!j*qM_w!hqHLeXmLCxT);>CGd#RG(;sCvIN$hZ%EtqPzg*hPyZbl^uX4(i8E zVkm$379RVR=>CT55Mf5fkMKO?^bz<_8)i{1jFrjV4VcT~_Y8Y^x!~pep2}-;>H7nj z>~p2^#x5p(8B)?Zv;2p7y{6ZNN1V~ThjuJ+@^|y!t@`Uhuz{b5_MonAq8+EMv)5-R zC2HwfNraHX2|IHzjfyhF3z^iPh3_Je_Y>rON{TLng^+I2H-)21 zwvpzqxL3}|ag(4i=ETaPPTg#T*@_%q=4bcw>#2u(c}~OZaMZrJ6wAx(-My-iV;>;3I@Lpe#RfH zuM^08*3>oW<!_RR94A$ElFA;VD%BWWkJ)qRCsRg0%Sz|& zenDBdpW(!Kun^j|BN0tuq*%+t5f0K9!SlrR;AkBIbS>(vG?A1&Ck;@MbvU?$ObGSZ zL8a+QvuB_0{eAoPlcr3Emjk7etnaP_m6)&|O}LQHH}d%D@lP%5VYC<#u;sv6Fldj=fsN^? z6!9bK>O&DD{_gml@fn^dD2X*T+iCRMw_8!&a&n}zl0X93q-elRlkU*@xln#<&qu!d zh2)6k9%n@jSTDgDgm&1}^xW^X!r|ygufsIk!DABW+B){`H5!)*L$q>I(>Hi&OmO~* z2q+Ozqp|a6&-Upq&r$wcZx7hQzVr|W3X#+SOP=b$*?sV7q0hCqdqb{wc>NePj(a3f zu!c?Spz#aoqg(-Z3Jz$`#5O)Ro84hkp8Z-O^L*7`8-m+*uq6n~I>^Mj_E-7T_@&6n z>Lr~gHaZww6`A$HToJz?F>Z=g9o_wa7}3Fa;LHTluU^_qRum5GAU^e_i-ltM5l%0n z@bT}C_;lj(o9i2=?^iV(5ZChEVxBHfrT;Xy#yOdOXMFdQWAq%)ITAjk{{h~9Yw+fh zMf z`Y9H9;b_@@awy6IZ48*{(Bsz6BHYF*zhx43`OWp!psIu53r0D3h|o~$)~zeGUuo8Z zb+HBg{kiED=maGIoOBZrL*yJ@tqHc@8Qj3VZv_$1@xyRYDpZ3Tc<$CniEn`51EBf)Ud!NU$tm^|m1K!Qd zw`6?E+zlS4QJlLDTox6OI0p#J0eQ84&ULhOPvH(5Vpk{3FwTh6^*dob4XiOh&Ut{3 zh$+{&ff&{2@U6S{?4b;g+S=N}_L=x?A20lMT=v3ZDv^oL|9<$R{Ulqtmd5khd#~uL z6y0vAj&47)Y}Ewxb;m9-g``==J`-{dAC6=D0N*n?mWC#H|D9-T3=^;fI+MiOA1c4(#X-Z}~nen`KqErsUAEyTeL6SzMUoaM4;&X`sg!U-E0g;SxHMYBe}&F;UF z^l?7u48%&}uPlCtYnnT3JT6E} z*8{5w7*_3SO6i@?P{QGsgg%F7jx>Gr&>DYxcV8pV&LkvD>r1v-0zC7w83kj~xn-2d!u=V7vJ9kVGk6aM@$aaXd$L+a_X0D`Z=c_rk zT1(PM{$>J`pW^^kG9;uaD&)}vZ|_F&)(O*;uB{u2vxz2rFH~>Z{eUWIdlN6?mmQ2 z5XM%MQ)Laani_-;m-u}mnG~4L@otD zhR7cZ*NDbA%lLpkFZA~WH>$)cr(JZm9MS=*3VaVG?*aTii95joW?pEB zv9P6bX&3<)izv7uTDc9acX7m^g%)uPv=Z4U1n&to$j0JZgZUT}C=zYU8Ky`-H_YT1 z0!<2$Qmgkqyx^P&O(%{0{4(JCBJ?7Zz~fZ-X+)0_mJjt0tmI+|U>@a@1W{Bg?y~5bBVbbUDlBa^~-y5-d4O*79m`%Q5*9H5;ZD4x9pm@8LL(IoK~Q@)O;T zHyrueJexPuCivP~Ib5nEX%4#7@5;)`poHc^{kEU9sU)Ju&{%+w%4u2*WMud1fDaLs z3Dd}B=s##c<6(CFuS^z@t2rPhB!2R%Riq3=E>e~deF)_CMD~ADu|SQ^#9afKBWZ^T zS;{uD+dRvRxc#DS~9b<_ZB7CB79&Z!)#?IF4IW6 zx4!IHj-rVt+jozO+G$Ncj~*>4&!;WAfok;n!NivYlA1jWRfsfCC2)-Dp_irYWnDTB z&E0p=6}52Sc7e<#9fc?~9RWO@;?Ve?#Zp2+FFC!qxcJX-2(CmFGEas!@YjS$=8o9A zzO>%XDP=n;ru2)JpnQ; z*8u|-E@3G4qcUL^B102s;s(NX@KYnp_%JheQ_68OZp7ynCbq;Zkzd?$LWgwch0|8) zMoOMrZ`A;qaFqe$s!ueHX*T&QL*f$uz;*&sKtsj6ots&)9Gw6O+%Lx~IH0k|u@LL>NX5$Ghy(8zwWrMnOUN@N4-C69)P>S0FP5>Xr)V zJtEYQ6lX*o2cb11#qr6+d^<Occ>AJkNK7XF6tzB7M?!}1jNx{Li+grKEUu{veQ4VIsFnc2jRQ7_V z_*sQ0K#+f2_ytA4y;TUWh_Guk4HCPM3=9H`OaZ$zC|T$q{qwY6ilE~9hP)75MZjth zMs9Dw5bmK7#1_oWI=mlsM!T@aRdmNY;KvK!H*(DFzLLAOAJweAe?v)HWu!oTB<(D~ z<~pQZdxOJ56_x6nXr)WU#ijAyI&cmUUMI@Rz`|!RJ+kE494HWIDzt~w`w2i}!Y^U+ zb#id|6l6aVvExhjFx0c(>IF@@8W?gZPW*(I*o9zup0GCrkz|0w?l-YE_P%L>56Dq4 zw#B^swCn`nC7Gx$-(!iVgUdL-oC|GRi_*%BQ)xiH&E9Li&W>)L;!{G)veWgGxt^^< zeu#S4!?JXbFc?g%JI;{F{{=_pjw93!nD~1Ly}bvzB?uB=YoYbf+PH&1Co+bTsN~SB zT}#aaHf-WsrV*C4Jq@S{Qf&C zBX^{uXoW=IiN=$;)V|CjSC%WFDLApOH(KFQ(=-{9e#loE6StIuO@rxqMCyD7=MojwnER`B)}+PqJPWAefYvG!kr&yGeai zuQ*WO(GV=-;xG#tS2r#(!*^c1o!}od)m}YWBm= zLV^irbp4mS2kkZuXaebKE=?eHx7wSVn}75MK@E4Z8-%}(W&MVRhWrjM{2c*4QnJO% zA`B-)8yrXxzw%W<^A*%rWME%&)>_N-jOPfnjxaX*ly%i_H$^!!RD=y=Om=Xj=vzdm zXOJLI7eythUuYr~S8SIcCW>j>o?>UC;Z%bk@`qt#+B#9@x*xFdDAi2r0=v|a*Pq42)w)|oe8Y-0iJnjv?L$?8OM$C%lF~q}R$oJN4cC|A7Fv zB|L%Imd8qN_6~A?34v@NsHRATd;Yc`R(>Pu;=|rKw;P30G0NXmC{T9bUZ@$3NoMC-8S|aA za&mH6N6OMrFr8Xv7L#=PEVguI^(bt;zbtnGt zhPWc9>IR~(6GTZ@-c@Bzbc{7<1n`fSG`BEviyj;WZ4NE0Tpf0Nz;o`hfAS|p01hgU zh!SA2o*R;EKY*0Hz)ts{M~i6zOcp#fg(+~+Joa<%94v(EI3+CHOsMct^$_hLWh^IW z(mQ6)-6YPn4dP;=aMdiR)$(KV1cL42~zLiPT-Y+-@Se@sTMSVO0O z5UxvBKyG>3R>=v@RO#cFPZab~ukMWQ6cHHRl$y4UFZ5CsSfXP*=+fEm)>q3c-)@NQ zv|!-nQgwMNPZuWf%09$_AD0LNv;P_zG2~z;wk|qkw%b*kb@L~&AwS_j8u~&Oj`6E$ z^~|Y^1o-6zNhbws5)#AS31%#DMM6zU(82~uB%iGIs$qgc$V~|vqRp-9c__%4 ziq@|Dlc({>0RkNndPHd2kPpSX27GX;bCRW$U&9P(Ig8So#tcMDqi37z40NYonhm|+ zGZ$$60&+`rjEYCqKszOj1(2!|v75V-BInpMI;C%;# zOE)rnVvba8-ur*eA^`F)6tdW4;(0|i zd}(+36_2Rpz5ABl?+H~aE!orGm_Y5b`on-mdM}+tIf4al4#DTLG* zI#MbCUZO#IAKD+>-or@BZlD5?%-{Ly0U`E`fcozp`=#qA6v)d+(ggIr&`ub{^L8ii zQce$L=a#JtE}L79Cw(4r>f1QnlO)RjJHa<@&kXa)zuP?8@ zou)k!hs1&3rUc`uFm?w=j4enu(6Eg|*Fg(5&t%>=OG^p}y-grF3O8IwhBb5qIA5rY zOdSmZ(KH7N(d(3m!~!JmxDSK>emp-eO&C`rElcPm^ior~PF*{xGI(kFBqk{`&$VLc zMA_n-+RwEr($|rf!(^1ibp;c|zvGLH9Fbsb!UyKfHaP+g~74vpk+J!AV#&Eli+M%_Zs zcj1K&g6mh)pStR%MfNH2Teq5U^3yialDwP`;iD^+ejFwZJLS4nnH*Jnlo3s-7M0=S zfQSAJG+nn77L!Sqx`37!#UrHfST?vZ-H6A0Cl)*GQCjn(FQ|gY(AEkA7{iASz+Y_s z5o)3I`Sr<+5|&E?>UvpnYM{mSMEHH*)$5}9*aE=Ls1#_hXz-yJku#csWkmsss2dnm#NEx7=%D+y8(j0UcZWg`}=Ks z>hEkicH9SYXaww95B%==5=2=6uxk}akM*xUF#~ApXXuHG`y;(JaN9`Xe)#u`-S4h& z+cHAfqx#nrU&_9~8_vw@9r$)xyNy)E$OWRx#?kyrifV@0=h&F{Pl_s74y7ekT%@9I z8>PcNk@tIzY&U7K*JfmGhs;@<-p?)P$X#AA#ycHamB>uDv$&$-T20_*SQSIjO+6~2YTpd#S<*I0>r&=g>1gwEZW#bKS|UKb8dTuef&>#RqHrjWs&xY)YI#0R;?jvCB)wX&*iueNN`Y3m zfs6Np=qa2A0>D@!cs=MOj3U@Q80zllgKl#cYy(SBkpX3Km*ES4@n?a3x(<4}E1+Js z75+XU!l$Sz2Y4N@e?4Mbl~coI5F)V#2^BXtx7b!x z0@Acfk@fhB!Jxc3{$67{I_5T&wGGOhwP73Q*0;XvRIchxn(}p9*0^h$KZ|Odd!i$`84nQ}`bwtZ;!QD+#*l_Uv2EQp^DQz{4Y7WZ?!+2QY3b$Qw*LB&xA|e`$T_7@+ z0*?3+Dj8_!$QYaS_TQ8XzrjgRj@tcY9I>1gRHU&mRjzVyH)8vGsTSq*sF?FYBM~JL zlf;Mv{k*pBt$=1V)uC+YSvZ+5-L=u%i+)KVq$ES$j=_nh%`X>idedgIr?9Nqw8B91 z;l1vjH|Gn!>a}GD{vW0)RdQcB5{|pdN!Z#k3{jo0*rhpjres4Ne&$B0c^!SxrTGvU z(?o($g@8a5qp=Uo{b4vpX#9&gP=u)gz%>7t8&JOQ#hVr6K+n^&i-QNpVDeM4KB$Io z^&*MWd~;YH`(Ih7-=G?RtCEvB^o#26=(z-mrD17U?9~Z-jo9djsH3NQAQNdSj7lKU zL_DjoDxdVnnTjlr&4FT4RbpV`OAf8Z2?n8xT)C_dfg>DY1gnzmn3de7$%dFtV6=q9 z%yq?g#uoGi3viP4EKqkAgKLgloK%9Gy0m)pu&{sFEpxYZXpd0i(C%AbNG6FN>ymuJ^VX1fqQN=3f=vg# zR@QWHm2ZuWe{Bx?l2u0A=Xab5Ui1sMjYNmn*1%gRK9#i-k7@gU=Iwcbj<^Pu$FeVU zAHI^K%hr9Vo6xmk@)j05A(1nI6&AeoI`zC@I{`sjM$qhJDu%RkZ z`}^$Z?)P72W%jObdYO8Art^zhcV68lK!6BwH)Q7QQDon8d!bw8+ev4mwHNKc>D3Uo zCz>rta$`pQPJ0&p!n~y<+CAQ6*oK0>bi7VZF*^%s&pT6A-D0%T&8@E zL7(Va!-M0;jbb~4Ri(^JMGo7x`6-w{YdiJbdyI6oWM(8sJHgjuTWouF zC9g&m-~000&{ej8e5AP*WW-cpqL#;g>#sLJtH)%pUOvu{q&rrf&7WB>l`#+2!pF1C zYJE~CCt&t^sQ9f3)v!`jCuXQ#{${!JzG6pr*LSUKj#;W8R$asXBto6MTsgYZUXSv( zE$IF6cA0!i%zM2sU}p;GM$uU1NI2{4s(5s`miUV^ezuHm(!V9?Q%KWp;=*{F^WkIJ z7@T;uS}k^d>y5>f!3`ikQFv4HRjK-sw$dE&NtZ0$h<}YMrmT>SBb#G#DVkA8rX=tr zw(RnyXqGhk-)oWyv})LwezI@{MgID`OQygh>B{UlZ1#qG-KMVo(OWWwCbM$GC7yxH z5mV^vH>PNYEtQ)eBva@rWbfHdr4}@>p=SE!inar(h!!UnizS#i3rmfn-u{wnN$8^E zF^^9DY6gBc_U#`LHx^XNCuGAgSZ&cOS+3ixaJYoyD`3iZpfy$+CUE7S0YnmByLqIZ9c_bN~! zHr-0YXVwotaPE*Y^3S(9#gp7gyKcaG_^ycOsHup1t~9)(t9(D-!=+UFjll7}-IK|) zD&k(lE$(mZ_>(5KTRm*_j!S<;knh^hDEYjUSMV%LC#Yp8v^7o%DURd_dy|Ngl^3LC z6zcF8uoug}o-~~Obfr9aw7<#SvhQ8&^&U4diGBCb;4qE;t3QvOp??veSrGand?cV! zFV9=YAx4Q;CS*5*$!F{PP1tL6mR#Z!Kf)Isq%3r3J?mLG+c^?wqsNnkxK8%B+v~d? zOMfNUW!=b}2@#Rq2-ioroqzrr&SubV4E2yn-HB^w*YOf z{h@e(|G|uEAxp^aiA=FGF^5!M!@j7p{Uw38S8USQA-`93HBJ1r6R%zUPMpJsd<()? zw|G|R*MBRA7E+sT#OMZwiePue^2xHXJe09qSJ-hT(|14mJLZ3%eeXRm;;z!67&RzN z&2~pXO%QL%1BH7Tx4+4C_FZeTk1}BHP~^)NpO%Z6osYC_47S3s9X>?A*}I+n$kcvwODanypu5<`4X>Pw3 zIkPEF3_fzEEqT=@s+B^69XF32Ge=&1bxXtT84%!p&bI`1p0JOGwEkb(+(F@uzZN>j z0!K`sxFzX@VA-ctr!awj4?8>Nx?kNR^Ceb+dTER&iD;Pdqf9fiaf7A{hn(}uy)m3b zU%$E~v3IyKoV=E&lYM3s?a9DWQ{QW%>##t~v#Fv}PAZN%@Uexnkc6d&Zrc1nx1GX8 zCjU5kVwbXQ#(o%_J(X0ON8(9cV)qIh*clV5b?9^o2`80G0@bY*tGftuZ>eHOM2lw7 zgrNjhJ7g{&$;~-bmsyWcNGwF3rJo92cz>}^F9y_vM`Q` ziH^?r*N6`hae)yZOdC2Zh4;0`{$7YvI>}{L@5^lJV?3+6&B;v6v{+ z0Kc9TbjTL>Lh*P^$p){D!FxG41{6y;VoY7-%gIaIUf3#IHJchFBDuG zT3e%O9>L!D@#?*FyX8dI+kvmX%+jHjV`}{&u|K=6V#EOT+z#Xvg${|KD3(JY+r9%F zy7p7=*|q%*|HMUw-_j~?Wv65xi$;N?6c0E=Aagf_&Moldy(WSqLf9jSuV!_t7PxC5 z(snTX>{{9f+P^Fao(zEe&Y6wF-0b85pzef4fNM%7-0~nLDUXygz-)nD*y1o7I1nIo zAi!xzgKiU+kZr2M?;u5RC@6#SAYvPF#R1a{uyHB)&DX@iM24sYgO*8F$Gz`M1hKL- zlCL1is$9jhT=`&g@MCT!6#ImW_7SISc#W)0AOl-&xzC1MC6p+PrQ^DFTPe@hDsE>% z>ua4kzEeGN)4m78trlsl0nsRJt~_=uHM%i<#f^4BRFu`yOP!TxH74`>H}v82-LKOg z=G{>NQJJslSkczHF)v{oMbnl?MNP^`-t8nxpmFS>O8D_os=pnR!Wi$}cO-oKLCHgz zk&C(0rm;7+E!Y5UDHe#+4AD}o)4-|aoY6oXCdN8z#n^u$b&H~|22F+Rh?}?u1jxo)N^#C z=({HTE=WW(d-KGuH|B7OSBGcrfMUbuu{K^z`tchca6epS!aEo;V`mvEEYB-59xl>v z=gV%&`fYp{?ir%%C=KcG>GrCq#*6~X5q_=k3HlCF%UpC{Sb(M&(P&8$+>p5kTXW&C z4ujWVTsu!_^2Oj99#`$=YlmK+Nv_%JB< zjCm_Z#{+;oX=!N$zymVWG>m`)Mnu}~9sgSRFMF;Cq0S+{5;~;7%tBN%X_ld*hFYH> zdQHx_4Ep^wP@xJDH~_p?4Jf3sfM13DGUD7EAO)Pjf9@Lq3)LZ=Wahs6*WL*?VUKiS zeO2L2kNLeTN5@*LbnV_`&a%ix)T)21&pWSLvC6s~5gzM_4+81JHe=YA%N%!Q)^&ID z)+7QbmewQ`FlLi#ua}hEoP=q4;oM1WE?M3|f~HH!O8JZEah1AK{HAE8y(xU?YQXXg zi%34SKFjD>`65`G6O&avn!u*0!gko3MNEASNa?Peh3i?dq#MhKLk0*S!PFmg&NKj1 zf=DieA-Di;fOR1FGw`=Xgz|q=7(&^>CGZ3UjljCBXlg-S z4X{kRfQOTff*70yP>_aSQ=n0(h>*LYJoo0eXx`CCh@W%qe)4Bg{ZueyP*S+CMUnZ^ zhFjxh>+2MYMc0pI8t=OlJw6{G;c+;-2?%6q6v?_plX_$kc|$>8&oDEF;nfRfh~^@b zbUJBbSMT7WYTYFRBC5ZDwh}P{i8Plc%>_zQFu`IGaHJ&YR zSv9c3_Zm{t$$rMmsmKYtY8^69b!vzrN_sQUN&7Qq27>MKADP=pnD!!*d;x$b@ObQe zn@@102j?b4F+8HKhpl>%006BT>Um4r!@&BgfnVv2pd0`l4&bl`gh*E(7L82#r3a9D z90*bFNg!Td|F?Ld7>sFb?2P~~P(O%t7r?b9#&a!HN=m8^6rO0L4*=mR+=G}=tbmpR zjjIY9qE4YJhf^x@C-b;Z1ii|>3qO7Au|{M#75GfQSLEW05S5)~f5d1Ug>P+&JSykthN#-tb5|KOi#>wt*F`5yp?4?{v%AsMT>8i`t z%?o=Dk?Sik!M3F0reROs14m#4M1x4KA@CWW@GaOOR+Z z#^}()RT-kx1i}+K`PX^wF)Mcq&@#b<4F!-?1Fya4-@RttaROW69;-sqD$^>b5zF1BUDnO(X>4(1Cg=>P+fB?s6-?1 zLIax=5C=!Z$?m>g&vZtlR6WTeC`D$9Nc8@HSagT3zMvaA0gl~i zl)XYw+V8d7M2lmzVxhr+l3jdR5=ie>h96Oa)>?wfGKInFbr!M?aDDYiAwYMe7gO}( z!-o%O{C6x8;{!_>d4kvpxMFD)x3va%W!d5v+v0WcmcJZCvx`^|N3kk$CIWC@pk(~W zd)hZ^mlD-|KE`xxQpHWTJg^wp?*JDjqBH;q5BSouWHcK9;36zORVd;AL$y;2h;l%j z!4aiojcKstL#{k9v3_hn4pf-GOIW`*-5i-sH>YLQHVg``#pek1V~K2z)NK|hyFC3_ zZ~q6!g(x!u;{2jfadyh@HRFl*4%L7oi0fQtV5_{rADd#MYM3lWCw+CRoAB+>gOTrU z5qo#aZx8)mqZxie#9rF0TmEU`1@LptGU3($f4xj_Xs-J$;`x)D0u_5=|Jys;jC1*t zzkifJfxzuYY19`(&-bH>*Nsdum3hI>R*g=trKg8kx!&kxD=f^@yf1$KoEUB$?S1Ui z1BWJ2xj%ole*74Di6&BXzdo5h0_5fEmZqwuyQX>*9l5oI9`u`|u|Etd3g~`uaXTqU zT=eCni06$I+Zs84(Z=zIF&hVhXPmrOqoC!DOL^~i%u_8Fm%HcBx4vTprwP}(=OTVU z80X1Rq!&oC61W$m)8HfQ`+I@1HwRzzB9;G@KmS>xi6<{ne=vcsl>GP(mY2Tvw_wW{E)i$>&p^UOmzJoZ%L(1&--=jk^ZNEzp)e>o;!w6-4_NAS^Xo-{n=D?LCb$&B`p&>wZeE<87FT z7L|{^rc-@3elR@U{Nl@R;^t0@PAz@LSj^b=NsZDgH^nr&7#|Z{OFvB$b8RULWIXS{ zZKjO@-nq4%o85!{6=nIW40|e@gfSG8_8FKnUf7=(&JCYsm^nC=X3tt5I5`f|fn84o z!5;5H>uC0ILWTX#>p@&h&(0f#H4(QZVemOE9Np*S;sWmrb0x~Zza6~Ynx=}%|D7Sk0%g%AH9#3F;MFS3| z@8J_-jX2!l=JL^y((4|h(Ssx(b_sEHe|uiUev)9t7nVePf84s_0=WUoxn%h=X?FNQ zg=#!k9bH(nilqF1pWjgvb!ScIf@l2Gg+)pkb>qYDn0vy5X7v*4>eo+!*B=Rx^sPP~ z5VX0Y2#KO99ur}?Zhn6$el8J@x55F72gP<@qW{~)kQH$$C&jV@V@kG4@i$Vi!E~4G z%CxnD6uE^jVf?R@=jatZ7rXh1?0js{r*;1omCd_ulI>cKbte^{J#eSYVvq?cYmO{E zZu`OR+($xX!Iz7&7y7-pov&Tq@#H7q@mfHP+@zrDRLA8X(mJJ~VivVthKY3(0R>Y} zGg{93rS}_#d=r&&a&6#XW110FnI$%w2s%d=!XU2tN=f0OvW}Bk@bXGhGG`T$8ricB zuZy8OFE?P#p9>V6erEikb*FlF?M=lGmZ+GR#mYL+IRz^p<;{gkn@JaK8*WfM9k(c= zcNJI%rHbh%f_Lt0l}%p%C!T!{pw9l>a?nM<8HOHG-ly4u?fA%}i(leaKLzH|Hu=pvo50sUZPPvH{fwMEHmfb7}ZxK5Fd;pew?Ih*1#N4DOjMzPI6h|-p^Ch1Z z@SC!XziOTNEn|={h}p|CcB~pt()qM8g?;@=Zfx5>+1#ocivTS~Kv# zIpA#Nq#&tsM!#{$N+>P*E&Gc>2p6l@rHxXEyV3%$uQpt{1&ff*W`9G>{DW(Zt{u1L zcHLa!ot-vq2fEERe!rP(Iu%7`)!-Rx#6fJwM|R2!m=?QuT`(}!{NQF^Hvw`B#0Fkl z=T9kQM*}4N1*7V_q17>%@ek$c>wyr=c)8V$>Dm;5#B%KdFVNriF$F4-V8-4}G2{GB z`hQ|%@Zgu6GgePsnIsGP4-Jy8atq|+@t$bE?S@@)N-g!ngtTe+HLLJzdGZg=6Q&NS z1~g~Vf6k<5k?MtYoM|gs9LSWitt~ry-I!VgkA32@h3uD)+DgF886>@BLF`nG&BL7x z#DPQ-F55+rNBes!bLs1qm+|tq6qL0-Y%V%K z&q(V;MRK2caUDqhADi$M7eTk?vgWkWdk%2v0Kd#oP{8#~=}~RQavX~;iF(#`bc&Ch z!W!dZiPm*n(0W3AgJ76kR!xIjdy}E+%f)Nt?4eX_{A2~{s-w#R=2rW$F7;;!2 z;h<$dkzJ}V+H5TI;5rv@7JH{YE6dYe!3_Yazd%aPTlF>qQC}+QWLp4Lfuil#z(n+e ze^X~{UQh_Xfi-&XS?PLj9q6CP?Fx9OaQz~DH#tbauF>D^k!l58`%9?Ek^#M2Tx1r(1 z0o}z}N*eGBMl4OBDe@c$TTTMxfS3LXllW|BZ{Q~15)^!!z-5ML|0CxD>J0Bg;4Znt z&Ibzz4g1;%`6Ujh?4j9Vr)_h$0$Q#F)`7QJ_9h9BN$LAODv9-gmOJqDial5IV&tCcT z{8{L;3un#C+_-0qEvLS0F;uOQ%6X87qG|JrpY^;?`&K6TtlFQ79)B_PCuKsqL%6GS ze2w{TBnM&5i%Y*ghneSgAb5v*WQ6#vhn4K`-u(q_VJb>HkQQM zI+GY(F=Q~G`R-i__(^;`Y;6HYJ9i)zNXyC5FfoO~(l%IJ1LtoyVE7V)TW8$_F{jBz zNXWlJ@AgM5s6284DG4}p-}=(dy3{5CTMM0+=j{w_p=N-RDG0`Rf*{#z5VQb$qsPEX zX3?#RgbDN@D0G0FTYFpE$Azt5Z@V6dN`1qqEmXiP2K7RZjxs)F z|0kU%eV}xX2JiWiNc1LN(;B11KUGevNzNjDK9-ge^HLGF`uHBL*@ACpp;P` zPuEblYLMemZGF0m~h6+!&zvi^I9Zp`!=++K@R`D01wCNhF6|l4B2>KXJW=pDy*FpSTTKD zdhMFL-Rt+r$_M_n@~w)LR|qakJ`c{ElYH0}asX^|3H@f}ynzVR5QMw{3dK52^q)x6IJmCe;mSrkYmr`H zbbp~jRpz2PEn%rXKPN@c^RI2bM8nS|oew&kNW-6`Tp*IzP3=clP1_PqRS7d`hc_DX z5TKL%cdI*UHzt*NFvXi@V}}{8{PEEi!?-oNxHgB+zm7S*wynHbpstfwqmYdRn<-$g z34w&25FmSq!*AHW&O|fqlN^pk|6tAv!k>4^8|xrf(;?4EHtsDg6X2q{BU~)*^sxUM z$GK%GxFLg*mEneb66_e^HHuB8P|%5*Sw4$%eY4^rmhcnH@q0-bU{?VhJzSvokpjy~ z0Q|7ZSIo<`TOufXf!Dn%?2HKk8MJON6G^z|EdVHkOPbl>7Agevbi@P=_nz-9fR35~ z<^1<xyQLV{%uoevS+Zuw=ruALD+bJ&b~k+vQgUu7KD_WivFIK;1Et zBVUc+h2EvxMIG{*5jQ^&$#7B~VYO*{|6pX|5!iH!*K4_Nl0I7qevtqcvxmQ)5*$fC zHX%Gc&>Ot(=Jt!xAapwxjb2=rHB;;;iouY#V7@=fD^^v-{5Z6$R-jxXC%MsoNg{)q zZ%PVE^w0E?sl&s^`d8+4k#u!_sOwcEAK~7~{PYHQ$BC+V`U427080DLGBVhp0|UmtaR5q!DF*m6WC6QyXJxpv z=s1*!xo2k}I3#2a)^Q11^rql=ur^&Ak47`WN0Wx*1_mYHb`Q90v+EUHL)$^QP)liZCAWPxSS-79+cYr!Pc6KMk7c318{QdvqX<~vPJFy zFWE18207+O9oQbt2WP&(tp*z z@3O}mMf>AOq_`28E_{B}Sr<04M?5=F$IougO&W+t-T^n$+p;7u6ap22H)Y}1K;+5+ zTEN^^Z~yy7l?A;lR0gvz<$$$ed4X1j&tB$|;RZqe8zsJOCAvmE|MdF-mY>orFWJWq z7Ch$KFyOsECjA(Qb!Xa*)7DS*9f-*MbQAgw5$r3w@BU?&M*f)Oh1ndajj>V@%y4Wd zO|T89s4MG7M$~nL6f`~^oZHLG=F+?vz8uakbQSi+UB;NqFr(mLT-&td%l+I?E{hH+ znv<2SW2J3fA)FbgyZWD}y|R?hmCkg^N1biqI%lyZ^>?=pjvSX~LriEQ*#cv=-9@dW ze%0`CV~OWjNf)AowPI?hFa1z6bUf!KRM`I@=;;hbH(lU@&=SB}Ukf-hK_d&l$``O< z$d&-Zs>FlzUR88!Q<`us5*#oA0YjSPV@zaKdK3*F0%C2iIM_}X=quJ1UZv3|6tf!( zsGUjXYxt9gG$ivru#=Ph`QJwsO*B(O(bj&x@bS9q?WP!W0-h{2raAv=*^936N(Ec^ z7$FLxS}ekkIR)Md=VM0-#a<6t3YlL_!^R>F9kP3N>TOeaXy~&u@4i#3s=Y*SpIwGY z=NDv&LB|fK${?fP4hk0t%49}HY+YvN6cKNq*SbVieAQsW4l73S;w3kywEs(Z)_xgmMx>aaLM!q^Td)*EJL=SARsXo05dPAWwq6i0U zBhL?JN89gJ(~E0U3KJ8km?h7qsy9sZXb&Ue8w%!mGv@*-==p?#VW*f+@|-D97!(!0 zk?xMH{ePa_|4O<4k9Z5Y)}kW<7=1wf<-1g=`~2yXWSAeP})V z1x=-``m|Z^X?o3_+?i9|S>KeD?b0&*IdsjXxpocn_BjP!R=vCueeY3<5CuLGase;P z2Z8Pqa4dv4HSyU*X-YKK%X^8c9SuS0oztW*0K+S zQ+Xs+eU~7t_SvHdqVP$DELBI{TiFE9!ihUBsjh$7i?g)P_=L;AM|Aeo{sCW{Kw$Ah z&pVe4ZBF-DosH;osGY_k;q(Vw+qo%93W^@MNV;PfRXK=p!nX(#Rj1f)j8F1LS3Kr1 zb#I0{kdJ&rdl%=Ol&H>b3*kia*k_KYgO)4fbXDg)SU!(G5aqYmhnvO-(|Zz!x&Y!VTA*Tf48g?n|5JL*+#Jn+~&bigsA!NBSFIy6$IY@KzNr`iCu?2< z(FU{uW4}1TDw-xyCiG$kQI@~qc?ryC&2B*%h2|{cwmUS2gPNMxJ+5I~SijtE`+|!h zNbjOga`*vWh>O5{eOFaQ><2ou-vPbe50e)<527A@w(RJ9*4#B|EqM#uOT)2vLKNld z!WsO5eXnOU(5+lCSme?~o`- zkntQEFAsvH6+?y}|4-9j^kV-! zoAe8ltv%hwXFK1@NOf)T5Az@TIJ+!xI&(2y5yxf1Mx9F=HuuUXUn4HI%uptbHP^UD zAt4J#z_0uhHDj*JfQtK(h?89D_ofJ6y}|Fx&~noXDl0t&?zti#IMtzWhweJ z-nd*G{i^1vUPEDeekxgQH%W0aBVSzP>zOhInaqXef)IHeiT)ws1Et{0N&XK1eoQKD z-C8NGB%PpQ-iD~xSI+X&4x3y@y%&rBXyf(#svrd+cMdTTL}9nS$fD0mx#x`gh|M=U z&ZWxT43O+s%Cu^-)#Jsx;VIeP>`$D_!uL7($;?;EhNFx3Jv;@2Mou2}hIC1q6H?|9ceUFeq6)y2#B_tjEGFBV;1wjIx@x*S(siC7(LD}tN|==AL{QSXhbS}c8*@$Suzv(;F& z5f70>Y?WTJ->zJoTchz|q?0|T^bHB)5Dp$k#Q?s_|B1o#`F+`XeCruq`Qs&FBoj}} zH~B9zEnZW2tr8%aA-T(E&*;Hao>nzABRLhiLN%p_13lzibG}TgR(7H-O{=vS>*H4+ z46ZAwYLOOCgQu5@o*t6@mg=pJe^j?Z9CXL-*ZiMtXRkh%tQv^EsOuFb(29wt&$wU0!du3=DPWW7ckFC}69*Cn&-IN%WZoi+<`>jpZK9F)R z)7n+$V+G{MAj6pA4zVUAIM{sVvdx3x5g9=8KG5zEKo<-nB2yQ`oqyO6I!PjGxYsglA?9)``o$50ue1NmN+ITlKo$!-oMR`4Am+HAL)P#9655KgYJ(-3$wLWMJTpP!%2eo z&AX3^DzNf1Qm+uk-8Y);T)xMb&{p+`P@<8KiZuFv4_sjBo5YmaG>=LA*9xy6+!6RJ z)g_^^{bY&bM~O`t=lwt*8giKv*(|{*b65UPLs$IC&yU&Q&h5F5C~ISI#?(NGC4u(| z1<=?n4^{?Nn`jWg3hA8i9r7SAA^zIV5rZ_a(N? zESF(7o)<&b_4PAtH;dt`GIBv&`NPvwXLouT7lM-9Gne)ZQ+-H2O~{~=4A0fd^rsrN z-Zyc=5uOYvwNy--t%i!i9i-4e+rpFWwey0^c9`bLE+^jF*t1Ctwx*;qA&YX-w>#Dh zdhDAo7&T|R>Iml>9Swh9Ao$lOxoGcVSNo{dmvQ!L-Y+aMTt2O5}Q zGE#ko&}6A}?ky#MnVBGk;+JJXA_+0gRkj@pof&kZ2E;d@I^bk zWbm^bQ^&sEXsaeXOFM15&|yWFYR-Kc|H(~5y2YqF{^|uEQ8t+eN^GRV@|o?xD!o|u z#O=+H7d0fTkD%0X0`8-GVO{|Flw)N0EGxB$@eTCKC0Gw`@$%J{W(~auqA7Uh+$H*n zSGwgz`TzAz{IAH%qM#lf`@t^5gl=Qp!e4aQUC*s?AFf9oxhnsoGyW!Wq@)!)rg8b;oB1#e*0gYW5Q)95cyyDQo3U z5qVo|ObiFe2hMn`vg)#Fe54!b*vVnYAteuLWT(nyEXqEQik%#t?b3nTWF%<4E%Rpc2{NTR*ZL*R*==wdm@_T3E*{YU{@)BadqTpIk z;Jllqr1whIOxQRwq$ghQVMCte6+$D677A=CQknnRTV_2Nzk-v%GTQp|g|pR8s!!DY zb%XS-eIC=dsoI)%Yq)O5?HV?Xu0tn~C}&#%VM_a%vB zG(w#xlJ_i$*~QN8rvRE6e^`r`f#!ALbGD^)<{%^P6yLPWD@)YK2=!klJux8)N6P&g z)5`#5&DpdhBHQA11D?sV`tw@+85X6 zb!q-*vjygLJ?HTaN_T3Qe6;fXrFa(IZtn&5U5WA&jVg9 z!N0#y|1C83p-&(!m*shfeIg#~@`uG74%j-~@rP%sZ?fC!Fh`#4Tir$S;`()vR)g$-M_2i+cqEM8dtjY*7sGVuf@mv3TAx-(`;`HU0*D0^%0{~U_1^?5=0}RQ2dJWD<16GiV)p|!>@pjUx%7}#iQ_22=4&Tut z)*BL|-W?z$^9BZTgD}`ZCTn2EbC^z=igZm&lG!IJ~2Aeat4{^eX8yC-Kh@ z2UbJpX!M=#XeZStctXgN`BNYPP5`FC*2}huu0QgAyeDQkxB6VChWWqSHNBdA>f-9m zc~6M~y>XYzn^v}TqXVCces^Tvs&}`-8}5~nPrcO$ZJU(!$Wq@uW7L#1(d8DF#K{mV3ZsENB1HQ)TnFYX#o8DY3S|xAt>-J!jRCb0?txM90 za7TnfIhwtC)x7xvZ!kfrptSRd% z{og!-ySxzw&UBiWE!S1T)AC;s_tIk--3KlmiobYYV~@onObmuBLv?XIp0Ng%k+khJ&O~Nn%ewfn@OFN)7P4Yms_J>k>UmTQL-(B~)t$(NV)leR zGWsp8Qj{**&-{Tluy>Zkr=gkfDxXu9`YwKDp{Ia_IdnPuC^*=R{(io(K6)BCAU&~nNC>7S8|`W==5+XjDAbPf+q&>yMb4s(6i*)_NF1%_07Snh`b>#eWqOlInT$YKKTpv9=FGS-x zzJ8kXOS1-#Ow_BA!kimgD~A)OF?>vT46E;c4}iss#sbS@|X-$FFTdpMdi<{7W2X^ z(`o^CA*wdRoLhcE1mI#w=JQ!vi(K7D@c&WwmQhu;(Z48ygtQ7GEe5G5AsrT=A_&qA z(%qd32qGd-|b&4YISw&T+j5EsU}jDj;6u2bw{GVdy_yOMI~bMk>>~W1>2?$Z3{+SR=r6EQ1 z`CfdmcNTL(0ArQwO!2y)B9O+MMClW?;yJ5^sreezOJsW zZyzzkEE+-cfH)g6psc1w4I=u<>FFn`s)0WYE;2K}093RCrX`Z*EZ44u0KN+Ua^KZe z5a`c!-@kJKL*(KmTIbTv@$sj?RgsXC#Dhs2h!5O zjwLP|3WZl@Weo%b@O50=qx<(8skf_gb0fio?<-`rK-mR^fC=CQ(1IrSF8ta#z(QJA zz{nCB5!~rh|GM!?2o`G4?Kt8=h+KO2=)W~nASoYL`{PCkaQ{> z{vQ-BtNcO>O48gyN?TnOF3z&os9aCQb4%4z8;sQH-DU)YH_0iIS#o9QwD3tT6)I4j zn(!m{LVC}xAJh4(3pF|?So10On$}TkIp?ohG>z2F`nA_-jBB7?&0H=*w?NnR7EV|I zsD3HX$>_G52;pA?6IL*0w2`#*1QSNE;o*QJ@KHP5w@%n5BbA~62O5G^MjB<3YL+r& zzblFD&0YdBAx=-bSphPagsA!(294ov@y$EuE(;?xy)UDArq+ z*L5o6U{hC6Rc-tk%d-o1DkGyr>W5r_hL`zY0I`gww7ioSxyBm*F0 z5IKmwwA`M84}-HYVE9|Cl)S&AFrxA2!ZmSl;;=X%&&8=)zUX>&ql8i^W2Up9KlA>& z7~{OaiW%SaJ*@n>{}93Wrq;(%>Rq+%9Sg` zbOPaXooV>Qlhw2X2-h6qtO6izB~e)z4CI2(b3KPzm;jkS1+BM8%TqpsenS!rllsBF z)^}hR2ti=Yi_C9PJmEOYH^BtuIV?18*R{#lTQdf*JIlg1??UH*jc11i&(}19{6UzN z;|mEEH+VPd8@gCvLEO!TBe;U+XI;6=d<<6fv|Yzw5Yw6Vm=TP!4t+k5xKv3Ys)xOe zxx32B%D$~tRzpG%_|^o}mj(Fb!YvIj`K9K!q65~9sFKpNnZxX6gneRX$Bjr+WcO&$ z7JOod8w$t|LEw8|5+9@i)B?2^ejE$p)_=(G57mK_bs!2wBIK=aJ`e5FA4@re)0y^a z0)s#}k5moi<6BkYNoJcK<HXu*yhA z{d`fYOv+G=!w?WMA7r)&Bc-gpsr=R~K>qtFGqv&)yA#vwS+fG0zQs?bdP}G+;Z@yg z)$KSvJn%@-IXJ#5eyc94o_TF&75S)}gFf=q3qTjZ2D&c?w za*|bGdZpdQAOIPRvbzX>gjzRP6}^s)ecA11F}x1me4JGWb^!Lt0Dvaj--@gufEdjD z^^NjkA$y1X%$Yk69-IP*)kXbhq6%sIRU^AaXNUtF@*;M4=vfq;}WwY2mdutb1-;4v`v*vm@{7+b=C z&Q#_C`QQvKtu(wPWYl|QE5Nn`0xn#H#?8sO6g6`UysFq6df46F-Ck38zRy6(1?j*L zBBBcs9$p`2;f0zhRWxfr6!%!rtr_EF4;~vkA-WzbG+KQ$K)<=`m6zmF)=g^L(>zy9Eut%?iptI_f~lDZA=u z@7sH1&$d~JUmQ87%(r|@`xNoSKzDQaVijO&H>`VzjVLjrkViHK0RdLg7f`(04D-g{@^5*rv9Lii`VtTjBn&Ub}`QL~DW(V`VAa^G9!Qd}xgj zlR5+joCsLadp#S3koyL}?pd#2HwLq!OpT&Y=m?+@XavI6BvmkE`31tmQ8L_%djn)> zfGP$hugr)a$3mIr=H>=b4-R~^MRZYE+0vTMgbnQEK-o%ZwI9H2sqb3@MUu~I5NRr) zz0?P#48lYL(pGL>o*Ed-!p*z^eTS9YF(aI)MrIp9v6c%GD~LZvUKuQaSXfvBpdjwc zl*Goy{t$&a-DJNEF@ee#_`!P|tYwA4I~QU^$lyq%zr8dR6&1Ats+QeNZzutqgw0S_ zCa40Z_o;4G|(Korp`g=l&L#>zY9o{}tdt2+TF|`#?w?;+9>7nB>2O z%B!HLSO-ZW;`uBtnf2{m zl6yr3>5UPQP=?Cwcy&qT-zmrjv8e z+syLo-s)qq=1?Y~*iEmPevC;iGKl%*2F7+%u-Miufj0;Pke|R9Gaad{&>4aSJnT9{ zuXFg~k^Oe#rLY{hQzKi*0|bv6GOLIA_)11b=Ug$^`QLx=Ko26tD%U_4ywam#-Q6z< z9Voa(2Ut&rNv4c}wd5qk#_7R*o^?3#%>f6`ST21$c=IdF%=IcqUY?g+yXi@G?e+u6@O&o z8!dH1J}&Y-z>5Q(h>i}U%jh#9^oonfAZ(;wDW`!C(F_bpYB7)E5mBVkfsY6)s2kf`BX>o}HjJz^fTvOb^IZkBWJZJ?Dg)8&qP?`7$! z?a-I;9v_HuVa~prD)lFyq57>`uxLczDyg7LYj|KP!{&265GRmVBDr_g)`c#rui{y~ z^L~~2>dp98BkS_j7b6Sn-Vo+BXxl)~B03Fgc>-2$mNTuOprFS}4Sppi#XKA09A7g4 z^==muJ3tz9vFqUK{CX*3?E46! z?uuZ8hc}a~E}fe@(zClOd4+};4-Q82T8yKB78^o=V9BzF7MmN`s#U;}obFZw7p}wW zka@FW`8Yu&HVr}oq7pk^g@w()v$BC}gaz{iv-r&!q_os=LV*NjdtyF`4@9P{z_QpW z1SLZ~%*iQomVhGJ2k|Y|WuQ~@0KfwHPeQ*&5Hch*)e2iFpKpHuXQPsj#TOSPK_B7F zpIY85aGT|SIp3VRI#^x=>A6JUtiR&^BBFAhHi+z_J5I^z_*t~r^J!)Mg4_j_Wg*)) z-zAFUIVnOIqDJ-KMCtMdkrs7I7=CpwZPDWJ{*XG~*FjGy(QUCiht(|Je|I|iPTWnt zCZfW~_ebLJACLwalXlGA^pnItJ=Al@=It}Scp;vR&@yV?G1+9aqe&$Z)txwJIKw34 zenvg~l(Lq&lPi>^8n4_$KXtWLGq`!;;z)Fu)F$~yqHpOp$JdH&Ts2)3$s$tk!^F0V zoiPA?+s#ync8xE8$1!flk?*8G`1X4MRkzae^^dy6{=5apMNlw>)##?H48i4J_A>Ve zJuX#ByK1&Rg;Pt_`Ax)VQcvN>=}1j$=_4FCX!k%zi8_y@Lm_!^_9eHln$7l5S9d& z=Wrxe@4hPkwb(rKU@k~jvx~Ewx-{!)w$rYsbwZFY~WO_0$IXf#>tEa^{GZHMP&oR6t^w|3AnnR*x&^gPRDkQ9rUnSLzm z=e_S51i7=Ooh3xI-rm=7tSjTwcbsIrFrdgvTxvF zc_tafuAH|0`kqyH%TnY}LFi{YB^U~!Ulr=N(YUZru?f9!6v>!w_tI-5@;fhkC|AV> z;jQ)7A%##eUo*;r*1x4NP$z6L7umFL$-iI&C8zeRUOG2}OpsOA?x8_}+DyOQmMrD4$KR`WV|U z($3zdU3Fmxr8BDc*8>HUpuf*2#vfshTtO7TkK6b8R@kaVy49~PbLO6st)f#gocE#Dqq5RnY(by$V7rkjrEVFuFWWc?Rsr3@$ujLpJTJy<>8Y91YPO@$ zn5`rpzYi!HO;X|3cMlcA9?DGNnXwniDV3j3yGByf`3&izi0>7VFkfXRsF0V|v~&Ja zY|b4MS|x!k$aL>D;0JdI7&AJ9YB5;2aaJr?l38C{Bdmo&NJFH2r8Y-51h`B{DEbHq zZu%uCrA(=-Lpb(`6JX`E4jxcZWYu8A)^dFkbWf|)BfhZzs2j52@j)BdOkE(aBK(YZ z17-W$I49}pH4Jyf&;5Gf;vxXh)+H8}nU9;m$bTS1Qr@=x5{z_bd$UK_ z7XGyX;n^$SotFy#Dl%%Nfo3Y-lV-X;>_*A={+e!l-{h;3jY89V17@EDv?Mk zao7kqubR-MZ;3>`^PtGW4UxQ^(Mdkk^IS5+C%LWAoGkJ>;~kTqu@xhfcLIN_2~W^b zUoqYejkI@*OHX$UjTC+;PeH7XbMp%gofW=YNCJ&BiQyLa#YZhwv&;SSbp<~`t3c!6 z;W32kYtG-t`j+n%So5?}O?bsgIuRx&a+!PWfHt}jd3WI0s&h9kQ?S9=Cn;crKx z-m#b$G0pE+u>Cot*i?Rt$F}|fm9r(L&#Jj4b+ofX3rR#QC&T!)G49U7hSa3jmgp1T$RE?US9%JAjkz&YgQZ>I#EK5+-vN`0t>wv$itV#sXpXuhyKeK(eg!-rWJAzm6>O3Ij8-v-CQ zN4JA$MU+Or(z^;$lrOZJwwn~Vh`gEI$eI<(5vK}aczGpvjFA8yK%Vxs$3Ojxeb|ET z1b)~(t`>|yV~i}1)E$CC&q$qO5P9psd{HK1^Bp!9pFs3tkVVs?VLo?J@;C6QNTw$? z75+%==Dj7ndcs3X;a=A#6ly-oQI;4N)UGv<=hcf>V4oZqIFLgB*kT58_vzj6VgYb5 z%&l9vv$y5xXW=9z3V%q+c!PXFRrm@nRIvTjfFm;-=*X3jI2n-a20><00@L03ZO>tf zL-AFwNAW@kVK((PCJu4U)fJ|sZ0LEVnQL%LXejpZ)M;AUg(A;XaTd9%#5bvJQ6qm5 z)aBE1=>SiQGU^^qXUL8huYjA@5&~8$^fqMVoygk+J4-Gx*eDgOS zmE{t}s}~HeqXPSMg|-!b^hcAW!^6u;gG<2^^t9U0ImLuhF)YaX3R!o^{O3DL6=U%z z?56hb(;_}ZZBK8^QM)zBIFw9wL>Kci`5`o+6Z=hD5%7QpJ+uvRepfo1H0zfA;9N3U zO(u`NeLsxN<a)2*_kC&TKK9Eif)5?cKc~A~I+#EZ1nyy%{TXLukPJ9WJz#;E_So zCF>d2C;Rm2)7SZD8)GZ2ScU^i9hQgVmxjs^$__L#V8#nM2VSGldKyEOtaN+x=FP#9 zbCo`r5N;AGy-Pl384d%`nYZ6lTr8>#5*UBEXqB9WN`O4Mh6uX7aGmeP^xp7a%7yCp z0-vtExOR*e)@-e!6!Pk-GqI>uJWlk$G8z%4)Z7i26_~ba@*UX0@XQs6coEcCtN`UYN$=0FLvpVR3P_Tf4)kj>Igb z@?A#!EtwWLAfTo)COSkMJwf(JLVN*w4aO2JYaD#{r93&{5*!?ZZdj zvcO@MElJQ+FnziFr-Z%^7lk_S<%-AWxk%OMD#D(46n%G1N5$^&Rddb)&ULA$r(Wr< z;9Tz^og`LI4YAt2MS8)qg&oDwWLu^wU@>+6kx=mMv0^%uwL=`r&87_Y6C!!hZl8HA zW?B11oarFR+r|Vnbi;Tlf&^(pLFlP(%G#0x@06}NKf4o1^bSTLnxmwObh^$ztd>~? z_ao%(QcZXt)_RGmZk)U7M#RpJTZoP2_46o#MnxuNdsmYOPS#bJHykQ_vpaiT)?hY2 zvS$PbPRmH2r;t(Vg+_ACQi|kbcsz%|LMW1FsTL*;uAsv@Y=RTHr}`f)koA}Sb~+4* zzH6*Eh5*chb@_RFmBiDNwzGfbuWh3k%OLB;_pm!hvQ^_Z-Cq?S#3~K$_Rx?{QN50c zsDTrYIlcCT!+QNM_x{%!b1Do5ZELf$2151u9v4*P6&r^)&C8EDuU12+gK`^Wz(Gmj zDoJcAj@eQrmu9T^+e%f+VM*L>$4enI3mK-6wYhgvGLO1@;zM%V{!g=!7dI6tC=2@> z37I%GfxKe-Y@cgi3X7eNQea3nAEYp;1LiNgrOfl3^EG2Dh|AEjZ}Nhs1empjgr zY2Wp}cRlz)sA5b@0dd?W(L(3?0jcYBtT8Jq>xyNsz|hZqw)hQ^<6Es3U0j}zxL>v( zEqE01hDm+b#(Yzo_=Z^GBrri&(vk3w-d#X4!GNMe?S0_*MZB3lZ=c^R=cuete`zJX+Vm$Bh2RY97^@2{aX{o6JeZiGczwOsdph3 zg0HPLl$4ksCk$Rb(ejdJt@Lo}x_1qzE(;sN7)MMNoZ^ca`c|2yJ|o-cCJIKVzjm|6 zocax8+F|zU;|KLO!65Xdq$SQMA~Z*P!RG!sGcz+~ ziwj2l(||)+Z5;95QyBaUal3#ZGbj4lDleLcyk@EDlU>_jR4tyqRb7jumss#w`>0@S zc&ycg_Sdq!9G`~>rH09--tFo?+o*OinZSI=cidO`!bv%sGL&ui%cbY2$YD^a^-8sm`^9fcI{z{y6Ws{6{;KDLOdOC7 zLuihR(PQNVeBG`tpuK-w#sFf6zvYdtS$rua(sDlN>GLJ(m(6r$eE0D0`&ftjbIK>=*l+6B?E>eX^I=j`DLQ$xYmH|T4&C@~h7Me+ zhE%t2$mawe0F30Sr)^dTa(TBXcku1by>|A~+-J4V(`@|=eWkXMAcweggZeOONDnOq zdk|h2`q}*F+e%6!yl;jhY?DkLnUKhv5Hls2$cc_Rxru*V(DH8eO(~^N!j-;vSv{WA zURB@+PC(&kH`vZx^HOCYIYQf+#dP3($ba=SjvJ)^Dq+o~hM8hQ^rWcBW z0P27IK)i>FB)%;!&_+NK?jwOSj!E=r{%YKnctkUcy`v z(DYGgL(*@aAqBZK5~2tR@RF6sNA5Z$Kudst`wZvf8sWWF_|qjgNLqVaS#gGmZk-2n zBOa^mul&l;HyVK}ykmk&M)w;0=4Z}1S)5)5IZ|7m@nb|H-vQb13^9p`zmPizCSvfm z#=r$w9Ihy9b%JEYI+&);sBFQQ71{G(JjnxxMRWQIr?UPEcYdVC!eNt%iz`A!+tnY1 zs!lP!NkamJ;4eBp72wtR8=8dK2o=_V1>X_m+IIjX4WOi-Y}zF;vC7sX{!gi;e;)1w zi4<4CwTSEfag9p8P*%8f;?c_o%U@WEGE;k)z|*E2;sh*4D&wJx#gmZF5!5W-(-)%l zKU%K21LRfA#b2SfzLe*@b9}{pUn|Z(>nAA+Kda_>1v^>T{PZl{n;^Tz!>HuhcIp%^ z^cb&8N6$&7LS-Pd5IVkalcN+UYGav#ertdwxb_7^WG*#zfgwIm2^#nhuKh+(eg zp!j5B>xfIm?E0rJ?Vx0NSzCugMFI94`q+hohDFAblIE5WyJ7oo-~&E1Gh=_xZ;e-8 zGx-0|)dNoF*#PuS1pMG~TI_QxI88xO2R96vh9M^-EY6r$;B|uV8K3+5LR1Jnq|x_T z@WFK#1p8QDN#PR+bp!C&x5#t(j%IoL7ohnru;5+c3s@Q?5aRsJg!hTIwk(j!s@qbT z5YjN%RosW+YF2MnPEI%!Z1o11ju!Pae!K<7D=stf&d3**3H&$=)lTB$D<~;R7#T4u zXB7023a@VqIm72TH5(zA4iF$w*e!q>90e8tRc!nW2Dk7g@*|=Ij3OHaHCM_e46d6BUj* zq1$JJlZ4t>@ib-2oaY}(geg;n4x313^twp>qCYjRNH$BNF2YG!)ag5KJG!d)fmN3{ z^p<>*Oq#)f!$Yfnn4oL6!32HgQ_6|Dm-967wjbCs*z_@D1y~fy{0B82cPG(n4~V4= zHfy?rEyN@5D;T^kbmk-a|Mn8NY4VZo7J#dWDjw0Q@Ag(5O;HsCuk$O6GZYe10OCx` z}&29scd(%OqjKY@dn zWANk5ox67zhU(?h=Dp?Q$N}uq2jqEE%oz4P1jvMR?q?G z2e)`{=#OF7c?S2mpl2f-Y^o?IDdRU>B;-wczYMXVU13Kpo=tv*II=)c$od<15e696 zW4}mgAUbtQaB9yBqBSqW8Zl0AtzX_Rf=H3k`}N3)12O)95re{36&vs0IKL(q`?_w+ zd%65x74RIG(AH=v#Cc}Sh$M@<0L(B4bxx+cv6 zFhBGdH6D}_vwcGmJsF?7R%}Y*C=@@kVH;ADRjL+Q;)gC98cv@P?Y$=@1xy|qBoG~( znl^oE!jv-%0aE=LFB}7PMd1bAGg!UrAc4FA(3Q5wszfwI=TIe`>;Oke7!1||Vo?m! zn1yaxXfFD|O9>(KxPs9KV6MI+8y+m&G$9Dn4$^+@Aoa8zVD1;1=PpSG>%z@_lqePt z)qq`B!*U>h47wR3kYAV-47^TQhSQ$$V6iR23Cq;3cz1Ac@VW<#I!EEz8GXr;Z~+cC zw>|M^`}0^d3N3znQy{8dr!rApT5ysTM-(KtZ1SP=I7KyR?Qg!5p_%QvP2Dq3TR1=We z-U6Jd1x>22|FV79oOf0*`AbPumr!A?5q7+U#4G zq?1e>X|9e*K03F&5y>Rqu1KLS|NLi{>bg7y6+;n8yqI4&>sg_c=M{~@c_Ezluhd!8 zDNol;<$Pe}RHWFbw*i>UbyV)B-q*PDr^20n;21$N=+VVP>8VMzaAc1jZuJ zY`%pW9w&=`%e@!N9;kw+L<8 zjiUVV;|Dxs1R>KK(T`ew{=5uS)5S##IBUSYR=s)-ip~8;k3ItKKkFa_fy=VgJ3te8 z1(c5c!kFtit>EzNiH1*yOu7(e3T##m4!mF-P0T1ZXkAgsqnFpOTIO^OKx2q7SJ2W5 z`TmqiNa)?O8iRiX}`r>w3v-do{Sy?J9&zo&`4T&jiu;KVGxodsxfMPfT`yGNrqkwZ%ck zPy5zZi974A24^@&OUBb)>yoG`SL3gCf|*e>*58y9a9t9qaRaEi0;mP{?z3~22oK?r z8Dox*?hYT_RXi6np2;SW(tQTab*e%h=mN>im2EntGyvE7U{*51b zxZFoAse7=z$#&G|c&G1K?&X4y0S_4ksXYo#!!n z^t#Yi5$@1LLzs1!Ju@?Nf4LLok6kkV5Z}FL($*#0`OrQLEh{nRl=s zsjMB`3dhmg_v(wj>2`E%qs$SGO&lfE_;Ku%-r08A196!tr?Ro)Wb)e2QYZeAxCP?b z{eiB;5x##tQ`O&6er1mML8wvxM6H~V&MD93lZRi)v%qZ{>3$s(atEgkmcrZ{uVt)q zgyo)pE0)T^#Z;eK`c9}M&T1>|T=>@lg0N;^RO#2~j|yrz0Qv~aq}6<^J!5*$dPU5a zJ3yjMf9L-A>$3fpQHoBc2&WY@B>~-APVvj3nJ_SgpD! zw7K{wa-YVbiTk~8OiXHo4wTLJwn{3=*IH;BKQp|6Uh08HOu*3eJtZX+2dP{_)v1_T zVVn@;x3}my z2Yt;ss@?h75atcZv!YU({ZTPj{m4l!{#W_LOIbuz=uKKyskhtek?K_%Yl+`vuH-(s zTU0W?C9ZelJa*j2!VY%p3o3GBjE$fjN4mdsBhtwrGPVAKy&IGg!)&~m!aGgtPrjlF zL9koCP><{H)*I4kQgC^T{#Ng^yEjh8K6bNu{r%4N2bCPh{-;;v+q)FiYz65QV`6sa zi_KB;TBHtcqS5IlSfylT*a|tHOgU8vm}GtbtH*7pXtIO8Wf5HwKSxxwx>6f0N)~wRa=28R|;GqWRZtQSU`&7;oxS$%Y zJrOj7)8wKxV+}naucVh`!}KnFy?0Xk()IAVS9=i^oPAQhN?TGA2 z=&$FNZ!1;wnh|&Cz2pT+ELLEkxpemFl$U1cFEto88WI3unu8vTU1q)PHy>nYW}#GU zsa!4W`H6`&Fe#i|j$%(!5M0|F4zho+VUq zPWt0%{qXJ8Ji)8a8EU1@Bo5L`$jg>t0{K=iYj-97Y@sCxG4(&Zb|6a}UN1xdZj_V&Yiq+N<^?aSs#W+r20Z!rar>42htYA~j^R^+0 zz4!f;k4@s?=4Y**?#+Jji`_KtI&|A)DtkUuHTCpb7R*xqT1_cuu;3_VZt+dd*xjb>%t`0$YiWbinMxAby*w~G1UrKVs;a8K zvO03|wawCSy*zlNPF(&OiSdGC<|a7MO5J^7Y=K%EYX@CDHWB|}Y-gE!s zk~&OT`4(8;c4X5t!g)O07B6*N(Vg7{NsF#vEI0?$bl=B`D%!P!pG>3rVS6Y%eOsiX z4uM`J_DV%a7uioJegA72MQ4YysfJzo7N|Uo|!=8a;Dz`S;`|ci@*LQD*&X>+!!@nv$I6U39pAIir z-vIoe)9hkV`MVP7PP345b1Kv6h;}Uk)8t#Nn62=FtUym%A{)U$mq90X!Xo`^!@ejt ztTx(w)r&Ic9}Tk!TpAzfFt0O79w;=A6r+DEPq7zV`Ql)EZn56(ZzE4b664ab~G*`6d^57n|f@$6aGb>7tiY65N4z|Ei`5s^=WmTG7_)8w!u*)V1e|ex;y94(85z zdF*mVJ@bB^v%_O`9n6s7iB{}?FBWbuENWoEW^kN}J;+JM8SrN9K&G!L&%<#;lAb9- zazoO3lrm;8o}pIq=}UpU2%nH26ho*_Wf%O&HxbwGe@gwGI9)`I?Y_gb^$|l-EY3Gr zXUQKHz%k<^>s^mAq*7U(51T8%E|JZ;`iQ>U|H9pg;FDnbd3Vn6V^+(j`uRFA8k8@l zH(zB?>=q`J%jnd7c|(tIOH3^EpDHiE2k%S8{y&y*Ae_Ir5E&__7L84neD+v7E$8&t zn2}4DKUk;a$EqoQ&k#?Q$Jp$@akirW)+Ha$HAl-5&haA)dglPPUqv5KR+(4k6wg`I zVhVARz+|H~7+6uPgm3?TFy&iq)MWve`R|QC3B!q>aOeORUHl?6eNL-i^T6u3K3TX; zOKlSW&4*>w{>s`K(b^p0(xD_{H4!0a8$mp}sPL;k`PL{8NS8O5*DsYFtsvCdw;PlTj_Zz+} zq9m(_G+CHKDeRJWXR%AZSOP~x-q{HD(lH}%vlFozuU3jhL1`Q{^XX|3<<307qIWLJ z%)&WK{n|@U{5Ho?a^l<5JZ2i;7-jk2-<4C<)YKHT?IcP{=9pKn2*&+SRTS%glQH3Z zPn?<~Y3{6Z`^tJW=2QI~W20jj(bQuV2ceW(vYY9Xi=0fBS`49PRR4Om^4&<7PBRA^ zEngZho?O~E&v!=p-lS;}sCC9C|Jyuj=C~CQO$@hJxOmU;>BK(BHry-t@w4~?S#-_z z-SHbHbvL`Wrd7C?(;OCM!*pE3c5WQ;CZENNpU(fK<&YDpm0|z+>X-U|JzD5!$rN%r ztIMo)WmcN^G&S5p&dVsGos1o1;8B+Sue(K^zmyV_C6~o6|F2-_(Y!ZhP)%&n)m!B^ zK2TxGHhr6q$*yM66!hnpKOEjEak|i(VR>HRMWWXQ<7)+Z!c#YJ)hN?ZwB`?l-r%Bb z$}m!9Ko;9P89>`^0>$R^#(A;GoyjHEkn;k-)lUCoC;h8=3S56zJ2nc^9u~xo2+#Kf zDo7Wj5ngrHr$>)@xFSvb6qga_<52(MqFcQSx9s;W^+V6C{w(qdXMb|dD$UV|q8Lt~^ zp}@sW7Jd2G${sI=(b$N`IthdXvBJ!N3u6$j_0%7oTrp7_m-pepx*Zc3I5a$d7!g>$@`~>y-ILG_Lu18BZ!kC5XT<%!n zmTKcvu=upL&3g9RrpNJnf2MEL8f1F@ovlr?}3Dlnu3l7QF`1!@-ok zS}4XjbbR}+5FN+;^Vo;W*mrGSZWR$A3is5LO>M68)% zkdEDBN@X}brQyyqmg_Ve-lY5N#P*$l=J%O8@5!Rpwt^0!%uBz%SYm8*h%H#ue0RF% z;?}2gqjtsVUj%N!#n%kg|Bx~~tHm#C-I2`y?mh!=)W=Gkw)x5SZ1~kJC;9XK{q)7< zg9kAKz}PSerM9W+PoG*{C|zy5sYSp|=Snenej|sj~yF?{*(*YOO+E?LbMrJ{&`f#ZfB{0D zZ)(0S5TzXSujjErg{ARn^%*_bgZ_1{`td~yHiL=(8pGJiQj2?ufEH?<_DV0)z|TSS z-b$JD00U7K1<6M7X}4f}%T6&utr+>A`x)F|kfg6_N9?TK^P%-t>y$&Dw*nKj2BEER zysC7^=c|1$(3acXTyGszFNOdR@=slq5t0rh2a^!Ui6N)Dpro{_npJ5DbEPVhq;5X9 zGMZNsN2(NeG;I(ya%(GI62LqLhK2IT!v&Pinge2^>*Qea`d0bjz<`KZCh6bDqz)A+ zq$T0i<6ijB7OkoR#~+C`v_>FNc!Sy0$_90%rIM0uiDso`hHUTUF}106F{Rk3IJ=R`ygXyQAG9-L zU4T;(F{Ez4a0P=crq3p3V2JdvR0BC^LbhMhn_1NON$sp}f440rj(?qqa*d!pi<^KV zI`iod!0`ERZ+R56`$ z(>lLC$1jmDFYd?%GDsIY4;BBt*Cvb!vdrM|ufg!6a|P z+0c!NG0mWxow4&$^!xpyO`^#-Cw_PS>DL8W1Vwo-g1%u?Uu`F$7b1bYwGmNyf z^aQ3iVWvh~NiJGzUpM~L-U#l?TAIzf^(=WJSw4?@yOx@jMD=&Is`Q`5HKf?-8gtDp ztlKK0|DMKqW$wFOi@txU{sR5qMR}W9P5mA$!e&^6aWV7n)GZ)oZv(V1yTT5nZH|is z>dQ|XOC?nmWF1R#mWR;iwf?RVb*}S(bBaJV;%^@DHc0a-s%TsMax75iHeCi3u0YOO*-%wV&=}ZovFGGN{m0i z0yzbRyI|h{&USi$C7STQ>lCv#G`s@fIx_$Uxw+hnPQuKuUnV78fXJ}NfI0&bVLW); z)btRnDZUR^xP#dm`?|WgdlQv@UY-} zNNZK}9y||otw+QF8OcFoz+E8x?c2tr+r9vk7!OJUCJs0lf-eii%L14^2f2M7jek5a zf%brwOh7i?82FNWGs?rweZcxX3Jf(N!ur9-LA#exL3c}GgT2I>g^?+ z{IYPiq4?Ff9EE>VB#qOt)7x)2Z~oQ9vj@TQdNyiS{kuy#gMCM1o z5-@1(N)JI0l%xS40kGS()doiWBD6U@6*ct_V4}Fre@?p*`)G6NSe`V%7g0NG54(rK zQ4lvXg_RJ*Hxr1ce%GP6z6UG28O#OJdn%1D&g73F@59m(d3qm_s4}Q}K zLjp%u2L~bN0D-4Agnzhi)IRYg)J{Y#xmyUfo8CP%@t)(`%krQqV>xUCr?C}$qz_Ql z`s0W;o|%GThsr*Q#D;+}0hkdl_C=_qj(jf;bf8f2*{0RQ=#C~{xcI?T*!?-9^Ym&W ziC{rfb`@)=E!Fplbf<_)rbKW+Dqeg8U{1aA(y>^-NzYl>l{0$D|2l@743d3?6N7;J z&5?%tP>bZAJoyInQE;wzs|I_A4?|Qs9=8+4JXI?_c1;Vwo4p#C5XFEG?84mjx6{Hq zB!hQjY3MybF#PH9z{d#%3NJVK-V1?+gLWqMf&E5KC8-p0cLA=Mb<(Tybc{SutjGY7&mE*q1KB;5#{1QCS(0g@M%f zu!&LvcsHbg$AcJrlISV;6M#aH4ta`bx>}+3Py_6h6~4IR1VsvD>fpgLa`gY#u7OwP z|IMyp|9VaZ7;PZPKAfBEI_8+c3gIqf1+1!1K{dR5`7${j-6KcGn=s7)#C9jYHu2!V z!>a1g$0yV#uxc?Igr|95#{Q z;kGIs%)bYL6GvI0atR2; zK@Y6o8l_=UaBSbmk>!~qNW+0e858{vf`t20Z#nAU!Q&_L@Ww`egTU_AN*G+&YfEWx#KRbK0>$wuHKhz z#}lBM!_DXpN@hKPr;3?_1Dtb+5y%=o8XVNu-#;NoOp^Yuv{rcczi+M3VhB(9RgJ=6 z!{+GSiXI@IZN0(Y6YAg2__M<;1ukuRPuJEZsZfLF`pt6tIQ>SgrT%*?1^s;%>t+6^DXSIaa7IpNbXjQlj)@|HX!iM*boVqw ze)dNR_&pEh^#vyA8sa5UNGUM}>`x=aUBr51INR#!(~HaH>!I0RcT;b8Oh!ocRk#;f z@0Tuo(FYKW496*HQfM0x6W?#H4P+XXWjywvVw(|ifj2%(kbmL zK&@E=CMgnoyARUR=uL0yf=;)DR zItp9CHitJnm}*5=Mpa^f0tnr-3)nj0e97nqt`A}!0d&H^ z>Bi&*;C10MxFogmL1T%;OOKe;0SzhF38VgkjB&~NF=UDXQI-Gm=g)Nw4G`0OAURWz z54s&5P(ezD&`dd#Jz{>HEz+i*GIg5GDvyLlg@9U`V zWBKCZi4(^hGoF9`zN9F}q?xI5VDeb(?!psl%Nr^E>at72j|h2Up2X#yiAm{DX`skt zfn-CI{8-r-&HC9N-l^+8&F%9B9o$TQ#ccrd+#`CTMCoj3xbCf%mCi|pYQ|+CL!pvO zhZ-tF7lk{oi!dzjhFO$l^%zz4?>^X)lz8TsE4>a@%NbZx+6wCFAiHTj{k$XO)t_Gs3y73|W_z5PPzsHWccWUDRF z`sOtp3~yGRviLUfzFli(6eZ=atuTh*a$~{@sZa!A*)oUs zvvrCLGbU(%E~3m4kj8(kXozZ-Xo+Z7T)MPi|9O9Fmtnk&61UZc9k+X;#(r{83f<(s z#ujCCq^OD<_Y(|6zfQ|3E7wB`#T1$kp05PaGVKPjcT`7@%3gq?Ly(!dh{ zO0Se`*;J-%G`P+*B5ou^5wL)d<&~8sU0jNFD$e%m2&?UMo(G}oC%5kRpb0^M1E57= zWvb=9fRk1y)JS#5Fh*(>t7MzfOJAOfkz1k^CJLCW8Fq^%s1XF6e(t#$M3wKo7n! z#PUZ!c5kAowe`1Q9&)4Av!X#;fbT&D9ea_MFt)N7(f1RnNxkzQmX|-QzcD_r7A{ijcVXO_)#YM1wIFN0&C&xeK|5Z^ao?GDGv zf_aOf((WQo8kb&4bCYmK4`Gn|eESe?C@4J@X(4=e% z9nZn6Nv9%>IsXs)053x@Y|T{4E-roxX_@ z#=ZjxQ5i;um6%&28lzephpZLa%lzZ3ipcCSN$k%dBTYcJ*%VMElb~m=RJ%H8RloQ zkhRm4)=b=AC~@i!L-m{ouRVhc6)5x@h?ZI0X*;P45=DrI72AZF#$oD>a!@cHre4Vm zJKJj_>}6S+B1^0=&*M2Z_m@Bt-v@fUZ`O)^hzTLLArkmRVz%J0*V(rBY_G~~)2zx= z{{6fc@0RBX2n(`48|LNZv9Sv9VWgq6S)l)h?ZuS&$1+cza}{BLBh@W%D|12eCqI8) zMOOi#`1tcehP2+4bk3NkP2vsO;N)isRlXcEx6+RTSl+)vp*+5>UXU)FNw#-0aqPQ| z&NO@e(4HVmGc?Ttqz>(5gX|~Oi0?IM{4cKF1Dxx&?;p2Qb|iZ=kdU(X)!0lfloR}`~?gFIh!_%0RgxmxNuskcMw?AAE_MA>&v0Jh!=HwK3RxQKTJVh zMdhTB(Lcd?nnZo+%GaJ(C0{5)oP>;9I?ckWYj_dpRgS`U>z_c$u#F!+)iZLJ{hmQS zG2iZtu?@lwK0pr9aX$Myrr3$w}pU;lT{*vSrLRBr4+J6N zQZD1Knf}Ks4lb^UCOY?Ez5J-_oQ>USaG!404K&GqDy zLU#l;MPx+m5H3)}$jIKk^A?RT1V=^%>?7~5cJJK~)Sh}&YHN7#BgM*$J(9d~G}5s6 z!0TeCrFB2^9Jn_W)YEdl&p&!D9Is*22CtLJK}Fni`p`R{9W-A13^(8nFk)UAx1z zDlp3xf=q|GE57AqOTKb8F9+_@u6x^3xY9e){`U0U@y*Pv+SSHUkHHHl?n%C#mXc3{ zp{&4=*2RMLiuyeMZQ_A!AxviZx6yWhnaO`bbRS*qR5;k=L8&UUkS9Ps#Es91bgP2N zqc?Mrn~a2{YZ4;-X2lYAHaszL@Yk|N=0&Y*+kQnuU?CF9fc%Bs6FE}Bv9%4li*ImP zA7N|&B7lGcFJS6JDxJF<$JS=oVGnj_xTIFJAV_eXnfb6v-pDA`sC z*L8-{4qY%#jE8 z6AW0cod!aL1~`+`(XA9O-<&oJ6?=G2_U>JGt8-XboRHF~s@+6SkQ?#-eGdm}ea4Z# z^tG7(m#x*Y1W1_r=U7Lo$)J5_76vXO2xpCV2B(J>95fL_;K27>ktX=vyTF9ud{~Ym zngJ}-ZP00kN^qoxbZynf2xc~&?aTiNj5g3e$90Hv+`01(fyUs-9{;sgI}($6%zNW% zh|q8B?|DLu^Uvmf=(VaYG8jBWw?NS(Rq^9#Z`0X6$$3(UhK`1IUs^)B$K!i0M>rlK zFENeTHO_wNv976mbb=VmW}cjiaBI;z9e2Wx;jb3l9cN}+u?Q@ z|9yVEKp*BV^H@Z)*p6U%4Z5t3j%0mPmz9;R#3DluOM@o>aSk0($q9@!S%D2EP2@F9 zM_h3D%JqOEJh9aEqLS*|b(fxwl&d)rA3x6V99kpUg^`g59-bETW1*kBVqa8TFWfrY zB7O{ff%c2|4O|F>1WS(ytSEDa&6p0ikMhtw_tCTnS0i`?DeP}*ZLncAIj;R20YwUD z+PmTe!#o2c!THKa1JMeM234C>0s?92bHA@M`o&+yH~g(#W7PRMg91ZScToeLl4sImR|5NdHNFoZvO%jhdqn8zmDn70+PDs74AuO`f*w+t?1y#GYcRkOGRtgXkPWRp(f`cB%Eqy@wAs zEg@F)BcPV;xfVsT^<&wkqi%R$fBA#T@Ct*pXLyefJXR(ff`(oQTcvtdS1DzD_}fA! z?GE18Dk!(E0Qh zol<_^7k#}g; zc#42QRVF4ZEn8YP{)L3@lP8lXk-u|i7m4T5`duVM2u_}@Ks?19F&K)ABNh(9@_G$y zv1%OZF)VjE-@ZVhfS3ae>1!oJ{V@jFc6sjGihUd+;(eiQ)3isibyeE^wDJnnX@ur- zRPPR)r+#Z%>mOyiTW_UXM0OF={bs?oXg-R6KFOa>GNaV`1Xl27FuL|H z2>^gXh=3?#sC)AAe{vu_)DqRxgE*NWFtk52AE>UdVTibuqw@lTSU^zFre&^fP5fd` zWsZtOIGAFQ&Re7*T9b#@i>?&Tk-0itUm-0Oe$>Xm4nh)u=lA=>>}nS5Q!st#9CB2x zN!GMu?}3}B3GY@_r)3PnL&C;9V4-`=P?Us(vfxv2elPS`8<#)~t)cW{E4yU?>LT`6 zm_MFHSOMlKkZVMt3xPohU646L0j}&8@V~SGyz_!C;f_(D+TO>gs3u4-?y;PUfx|ug z)ATNdpww|fSY~F3vBY1vqN@vl#xm$;{={weE-lp6%(kk)wU^tqr zLP8?}QCqV60$MI=3HDuSccd-=-Fs?HpXMCVB>!{b#gpTn_&-rRS5pdY=7S-JjV1T{ zBNY|v%4J^9)59^o%;U(oxHiD~SI;-X@HW5eM9!}9Bb-r6cnDJEktkUtu+eg`sM%ys zSz8;~g@M!|1f=FsH1``wOyzS8$;;zJ+SMzATv12{F$Kg*BfV93L-$w~LfoBup-_w7 zx_Vm`$$v&%(wP-505`XZTEp0mB+`aplp$W}=G2?8wzh$3Q5Mznc0dA3yQr%cMW_ZW0mA!ON#M8FJvuY+%^PYmju3$Vo2SCi)0U`_QcHQWXQTp=e zEGoP}7v@Nl1P`!te>53`uxiM!H-P>mh=lE&b%msDBoBRxuV$YB#tH-Akd}b^YhHw! z@YiA|>tQ;Ff7;_(!I^|OAYnQVX}1%MgMTFfmaia~u6N2mR2m~b70f$hPmothR|t_9q-(pMvBRr@y-o*px&n;jh` zd|u~Vz8pa%b-crY+Akrx2z2d{Kl#rZyAn+Ilb@>@EOz)ULQw#C54h9A@* z3CXZcp|lpgmdxHr5H>ty2f)c5Yii&9^A+|oCI#Dm6tZsU9xtfH zH*Pf|#Fyk+Kt97;xT$~u=WAp`@beQT2m4R4Z~{PPUx4cR z77opAU>-<=%`Tpyq@)B1*Qz%eX#y%LwrhL+-{8DCRTOUog{=Rebg40slD}-RsP$ToP2@&O64US z>`IB+i*LOJ^l@vdpW$Lwyyodf!1yjv7>Oo@FZ0_nKy7_n;_rFK0!t|&Tmk|kARD#d zT|qFILR}RVB)9t4FUy`22vN1{eI(!wmw|LUAV?@w!%O9UZv!qDE|L5Y?u}`F8?2ks zKz^3N*G$#c2pr1QTOCb7#ct4y4dk%Zx*#($83Pse4gkt%h#F#|FC_)^xE+q~cTG*x z$58x9sq5!(aFAOMTkzQ4hzW=fz%yp zV!(@p4dLlW+^SLl!X7NWISFgF+3JZiwI zi~~{SNR-0%p(gCS4op+9lDH!*G*Gb`G+rj1^Nz?IgfIih$X@9Gw}q&k>!#{gJxCjd zUAbv?9cnJ(y-r+LDi%44G_rRkqDXf<3Ef38dwch$IWB*fsX1hs;<~NB?f)T{BKC%;JS0aOh^g-o zTYOy64kYkT05P9~6_`@M-f-%&EdhLJFuYs$I8bcuM@8*)9jZ{`ZR%0Y17Kucp2x(-> zU56F1&;rHrE+Rs+*9)Q*0jc6bA_$_i?9b!)CJpQW7O9vbn-9`H$e%ciwLm!`EGT3R zxdILa!PI8?9#FW7v?&70Bf7u8uVq_{%^uGMpC2s3O#oL!TXw_o4@to|*@&vy<5_9v zhExndavG4#X}C>yp$r}38rVSlOhnU*r;rC&*m zWZP5^CB_~y3WuskeYwW~qi?d(4RZ?=8|frl3AlhkTq8ULpr-zJZ}ha`g!&1F6p7^` zQICT}p?cI+HI++h)v0^2@@c5wFKTORmp5ednfRX?kM{zh@)QO|$cFixmq(n^bq@qJ z@I0u+0bvB#k^A)W9&JG;XLw;tB@;6i5sZS8!3T5)iwvyz_e=@q?WfA{oH8 z3(A`7zy-m(aFLjJTum49XQtsd-?TI_GIDLBVgsQMY^AW0JuB8gdgEc;g54A+NO&ObGv zcm@oKFCa=aK`t1EnbQNVUXk`(fP2$o>DxZm(pU%q@P ze<%p=lmRRSurdQl^-?y4k(>KHq{DW>zu>mL;M5{UY73s=b$6rEAtWjVEc2i}SSxDs z{ARxBRl|DoraD}PiTmV_H)-4tRRSE|9(mw)3GY*rB)g_<@WF5CI{Ahfcox1FeV5Lt z7gxw0p6kt0oAS-P;P2_Zyz0~4xA)xe7R9j;?zY!VL3*D=uFRsrT8W>?%8Sjd5$r}lq#)0Do#V4q6Wd_IkHKu;a z6T*fKuv-!1c6@+foKClKu_qScH`yCmDaqL0P@dwQG7H%q$(RcRt>1~gq8k@)nJ|AB=&?OiQ^>~=i z-SO+&cGHa(QAyVm)vu9tI8sWSuc%lY%lPS{rH!J{%N$pF?_+Ff7;6vukLE*<8>8Rc zi~rae>_TC2MrzHWe8zpJp|NqJ4(}ExG{Srl^K+jz=|3Xx&!ncud*iQ2BO3(KZb#?VJs(~HwnKX@PRmGwF3q&17n4skU(A#{<%7dscuS5^Ko$e3EFO0Yt3vdyK!4&EBF7qz1OTTCB=;1|X9f4eJzVzlBi6-(#% zaP{!v{jul4zo!nkBkglWFYOP_NjN^S&FNEb zZ``;hdmmZiCV&06Gt-0=?$#6*(Dd)&yr(yN@rWL` ze#I|-N?dVJZ<;-eqY#PtKMQ-^RDGC)vXikxh2Xv6?y~5|eR4OfK7*&gbVh=Z z4|n=yupr&1-%)-ntEp_r7RmMMKaabJIfgP5Pr?K%yX#mtj)c2{ygnC%2WxNw4T4;T zJ>e60>1=_4PeCol(EDcA`JC3ar;gimi_#4d$%>-Y-u4{rna}SP{MrBH`^2)q`&p}| zCvSVxL%KdQgLMvuc>Ir?pT-03vGJY@QvDpEZn$GkO*2k7?I!);?+t;@zQ3Q>*+W@= z;6qRDJ@Wf`O*>izS|?SNWaYC$-?Y>6moDE1g1cu7-rOH=oR8SsRI0hFTUE0%T=7kK z42A9W=UM+2AImVRj5LH0XACQ&TgcEkwsroBiNg8$82cLn6}}7aj?*(+vqBIv#~tso zX=xn!SV`qwI<$}3ggV-G;Y-g}#W}8tH(gaa{4FB+_f6$JdsAcI!chMUkQyO(iZp~) z25>vK&>!9&4``*Y#kXh|2WDoH8LQ>ncyM5=S7!XrOWg%4 zJSgo&UZayTb!Xdny{|~j8(bLwFf&hcW1Wy!3Uganu=tjCJ?79c9b8aA_y6yHX0OiJ zj#Kacek(Jtmy0elb@JQvvEts?H}jLV2D>|fUqvhDsEo%IP&<=-irQVKMS9u71OCZQ z9gafl)eDZO5G8J6;=V*?>;vudzSd|~c&$1mJ}r|}J2_&F`NxO<-hL4(nOZV)1~%UJ zo(8v=T<36$Dk5w&RW)hZv?ArLGXR-1wm)aDU56M1fVTcmXL7&}IR*OtsfjfrVw!WQ z{v)-NwpBf~!3uclpA@Gyd(dcwg_-=AnPo_*^n5p#{^X|ept17P5=Z^7!=XK-@X$5FG3Y^Iu z^rrM5gjv@=>3Z%VSKXz}$6=>Og~{}YiT_H0VoZyM5METO!&eE0%Or;5`57K>2tJ5| znCr)Y%zr#Sf|90cbE5d6)^;6{H#^WRF8SD3 z$EZ)Kj&$03@*Yk?PObV2!>0sqTI2*vCz743p}neENV#^@Nr24QDy70uEOTMQ zJf*jMvC(P1JHjXG(e+Gsq680>o_<%?0rZU)i|U<+XP>O)e?677si!I{kK4(xRhnwe ze53DJ(+v35aI}u!%YSv*hVDmO%0=6X*!N_Y`Nk45B?m~-h5wB9q1s;J1v;UE7VXis zwx~II;@|W|w&1ZXX~#TVIng~^(dV@UV0Oihr<91*#~ah~}))1^SdGT*`$gU2e~7b%*X zh=Q*{Klo$sLw2FGtXJ~1#g8KidYy^~x!pQVCAuV^5>a?EZ^h%w`RQ6EeHm;#A#gRc z9+Htcab7BE!C@!E@ZI)n^6&B*Zn#E7+qGpWmiWtZYG!7hj2m}4**8SkOEl{{i`32-Xl!jIeEf7?q?LoW=jTA-!x9>ktNoVT80<3b~z|~hbD4{ zou)#+MCF^bF$Na!8o>C`BMSlOG?d~S*qD@@`gdwc*l)$DKe3vbF`6e+;lZEAc?(?Ms7^R)xXL z5#85c>bX-(Xfi|NVku|sV~<8Da1^VDx2Fbggx?fUZCl|{rx~hwb+-8L9z;`xfT0rA za}7uP&WH3Bk)wsuvcZkA___@T>FsY~o7RLhAJLDkD5IXpa<$ z6aJjoBxG25>i)TK^UzbUWg=Vi@9<*PDOn4xih_)h5)mA!nhR)TVvGHkRMe3V??I-O z&f!%3FxT|o%i@5$6dUnxN1|Tli)o`3qxj#u7TXO}H9{dHa!y5+UXvI=P0k)GjI?pX zp9BejR(Ux!oTMcu&~G8JnnN&an@Q7-N^}4NcP~ErLBwnYO=86`*W$B7-((+j)XSV@ zNw&(~=34gsCEZiJORhtCL#))rx3Lxi0*J0!)nUE!4;9baJY89ehx|{Jx?bgVIh84E*tZ#PogCO5}89VPvYfWP#iu)w0 zbr@($W$wmt$xaF9IM>`d<4-g_UI~0yI#_3D>gLnWFx4Ve00#@1ej=)~_Oo2p?Q)9DLrOxixr3fsG4`M=I_y2&{O>XU2Pf?3=Uyb zvWLDeUp@!Xw!Mz7%v&Rk+#4-BA}-Y~n3~4t ziCe#iwJnXQBk>@CTc}X>VFqiqQAae}ol=|6fAl(fFgQ@Y(-tQZZ4{1BMe$|lE+{&GR zbx4^PRJAEDKZ;yOx-ftb6W)*dzl#u4dgh=Z?lx2(t%3dndS8Em;0$aB>0WWi&yf1{5q9*Ia&lT7xv zTT$IIeeKVjkIP>D9?tH!`T%byfkxd_Jq5Sfm1e2CiM??Lns4s5vLVBhN_M@4SH~QK zv2L_vIM!K=3Fby_-2a)F2|#-Wrr$aM<<-mWuS3RY$RGe4NHrZ03mWkvf9xxy%HT@h zNsTsbp%W7m7SAkY1=lb9o)8aeFq@W^E2pZI%o_>qP0h#G{$LIM3Equ$KU%eEK7Uv3B&! zW(FHY#Q7oiv*!8#zJtV)J%#@qvT$X%-+{~SfGhSW0ao07Gj4z_y!^uN8f*u?;f5H4cMw&{X%oY?wq}GmJ0*wChte}2qj!@Q-wn+xUL@q zlK&a4ydi>a1De_LLB^GP>~*}TTMp#y+(>2VX`IUCa^JFgv6m?&^Ui`-x1#+9O<09a zHpomLq}yTZIlYwXg}KQjLqLn?sh#3fCkcg~o=HjEy4vWRnn$TdVVuXJeSeKUZEwTj zV->X&UUUgJVax>9zxUi)4rk5$7Qz2xRz0|T)YdC`nA5NWr$WO~bXw_kY3fz7_yZn8= zh}S%YZz14*fYIEH(tM=g_uTLfdi4fl9rbwmkhb|^vFoCK{!H6%&&E{O``rVW@NsW8 zKpK%ES;Hoe&1h9S-8+6FsAa5O+|8+mGgHT+Amk2Pms<4Jv_pR5;IiP^>oGYzYhR8S zgy+Fn2SG-gls~r@dV_$6sM<%_axl{jV6`SVd(jR^NBC$d+fZy0AJw3e#y;rbCYMx1 zdsuk+d2?o332JdXzJ#d}g09k=n5nV5J7<@1p_hl%&)Hxsnm&U^jjzHlGm}Q{)=Ugv zWd2Eg=?#Gc2grVa0i~Bc){alN;ZluPz{>IO!6lyhiDdHxA1(9Ce;>k_;g|n&LgimY zmVV%QKPGsc49u=~c0NWbed!=oJ6JYlRsJaZvOk8%YpDgt0C^vr2~@mFYrduh@`vBC zEH9<^Rbu)Dj$T^5dF~4u+PECW_(AZ+XQErMyQ^mF^>0}SdwiIQ8Q2*iwBJ4P>(%Of zevRtY_?q}{rr@Np{-E1f(a~Os8uO3!d4g2wK9|7WmHUu|NPZzik@;-0+l>0Zlig(1 z+nAVEQU&4KyYwE@n`K}<0g_iAGLXO~c(C%h0+-N}$lnXiU9S6bH|0pH(ibg4Mmy0M zvKhm-l{S26mt7tEBxSd@hJfHA#Kgt3QAQyJXO~aNxS^Oacv%JV!Uzjt$^lZV!`oE}L zz@{_=jsgOR7{YTdLdEhw&Yp7-MN-8ap z)ybQ{x+_}p%qrJq(#5@R&}7M+<5yH6U^eeAS7y&#s7vM1=cdfaU`=AH@!qI2Un*9a z!)Js)D?vRY(2~RWml{!njUWW>8t-4xA@(IX$0OOT7VazyFdda=WP(~Aa@!c$JfQcoq$GE z@m5ayC!I@kqQA6naeT2!akC>(Ax9OQbo=sK_D<6ukoV4f*U=2@wwCImcw8h%CR3Y1 z@mNuj%%w9m^8!bMw`!MO21eGvn}x5~4KI`D@_UuX9p@CuXmKJ!*UC6w>+rtOsppW) zvZw)xmz%O-D11x*=^YOFIJ+JU!ihb3=RCN_;J+3r`tw9ML!1>3&9$s(1+3L0;X z(%>)TWCR>0mtKE2eFyQBlfkF{jGfcFW%3Q9QXT%BhQs}ASE$GE;?7f6-Z!!2mipKwydq=w8lw9?UZrX;`K3B4jai!Gc0|74y3P z$U|kifgbznviGgEj>idLQ7R!aeNi3Dx zSApj2QYsdUq2>$UV$(Ch9*V|z`|?E!-p?bL=n)&mg@Z=X{O4kbjvR~S`WYS5VAhZ%(yWY$|&yN`ve`XDzWSJ z^J0~v3mVzvCfPXt-LG3!W>W6bjuR3WS1@3_Ov(F1^hEJpU~!|*?L3dsgCVD)SUxpc zbF!ZVi@qDI)~0g3;#uzjbl_h&8UD0xX*!{ z{Q#o6VA-pnhXmn#H0bl%Lu*7FretQAeI9`zF9* zzaFt>78Q*Lz3Ag+ z|K?GNIss3LEi^6?lau%DU2`We%yC8qWpP-3alx+8eX4egqqxD%j{P>yLcl{71`s!? zI3-tPjucLCC$R+LkabcPnQ~u{Pb1Z|;^DY^DRaS7B&VnHTPD;@B$+-+?87Kt5mmvn zJCD6g{&V@4(>Z(jC>I94D>vR%HeFBYjU6?(hKwJr$oRqCJo7{#Qim{Bt77BFF5G4y za=Kg$g{fTYJO)Ki_6(3O&aGlq!C1$!ONLFWEF5GFC7EMz>p?~I9nurQHMKs#1THP4 zHxUE{;6$HOt3e_Q!F3kDtdgaA8yp*8Iot=s;t#|xwC}iZ9w-*;JMT3i{&sV=14-)T zry)$Ra@$>jhJZuOG3@@u0r1{~^?KJb4D2ROi+%e}y4%3Zx&Mk4=q?0K)nyapBYf`i znxl7>VCaMyUbqg(jTdzEpxFUI64H@sfTZc4hwMex$ z$l|_{9Z6GnDd_;u$GWdH;xbu$$y_)Sd0FF6@==3|+T>dDT;m&9_M>Fn6~sjq430Pi zgcj#H(k}I|Ua4dXcd-@v&`^&QfE4Cq9@&2muE`Z0W(lYMMAHyw5OIbEFlsP3G{52x z%g97N8u*jlH@`2Ke@m#`EXnQe(RNlwS`oS&(QCOJjE#Ykf>QW7CpdIVI>ZUru*#8u z1tFnIgL4oo2lHe510Zq{VKdkdcqxBE>T?KKh7$0n9}w0nc=_ado;$3u77WJ=_N(Qq z@AdN6cPTMrP)m@-l#QT^lhEW-l9$NBOe>`FuO?1Q2_$WO;-&~sgQ4j4)2&8gd~`vw zkeA6UbNCv;OtjI?ZlyMw5MQ)O@-t-NQiJvxYe($)_LIG2Y!hpch5G5dgtE%^1rkTW zIMh3eVq>9nh@F+du57!XJs|%$-A6ZahTA>Fj_(UG6)j+vwk0uOvVeo`w&h)EX~fd+ zC`b!3F-Vhe4;6@$U-@=uje}1y&O`bu3ds%tO%+j8>!FO6tt}^L%I3D+08yzvDAmYD zdc9IUfA$02SBc6lGW3CZ>7M>Xvym>66#e3bFi5Fs1nfU|38-d$fYR|o`!1fr6AOon!`V8-L<&C!7DFJY(GZs;(BfA4V><}&3CxlOU{u0~ z6)vmtaADvafwk8C*pm)A%b}Xk4}3`7;1F%6C^$Ri^4G(iKn6k(noMmErhJ3G$aO@m z0wq7?iw!f@8CjQ6b>}9pyh|K&oV&g8ez9TyD!1Tk(W|m{w%u-$2G8<)pW60YJ#CXI zqjCO@{f)UXsS8JsQ)ayAM&e8PRh^Y5wXaoY#UXB*YRZ$K(|s?UH|}~VPv!el&4__H zUoQitN(FjvJTon@ge|P1`z%*iipI4XCA48#K2T|U3vZbbuEbAS5{Lyr0D>0a^RQI zCaEYk_1~F?VEulB3Thj&uFP&OS$3soM(pFTG7MUbL1GAq11^C!0%?Upc;goR((;$>A*6#i?WL6;Bl1$ ziU?*)onr+kJ47^f&DW!zZ##gZ$JJPCKP52{4GaLVtL_htoyfvcT^$NCusg0Bh8;Vg zO?X;l`c^FlmVrn_$0y;7@HKVJC zuJAsIj9G9~c>?rtXo(u#MF%x^8SJOkI%o>8tisZY?9;hplu$v4^bWyWjl?g3Sr=4H z`!3y9Y|Kp}K|286&BD`X;^FCCd=5VZfku&+yZU80NRr~cU}2{fR6Rqi?Tp>Q!T`#& zQVgEgdIe6TctQNe{;lPC$VF~_Yq`$U=9%Ja?#<^ zQnUtpIdpV9&C`=8v_kyipn9#|;o*s}f-neZCmJfWy7Ord)-qfoBAWs)sEUJ|qdPgJ z(Dwjr@l)`TY)lc#yMa8k4AgQ*=9M7!%S}K!Ai!h5q82=)3aU2;t-Vbg&?A~FxITLpk#S4YY$ zvmq7?Xt-%BDF-t(vd+Mz;tQVnX@2Zmfw)(67cx=ez<8K9H$O&_Z*%^|Hp9|k!6!F6 z5~uCS6P)_hmz^BT{0of_LZjA~V^06nczJ$$H(YO8ghM9Y)gw}>rkz;PX^O##)tKBR zGj6{iza;DAGqSKX?Jw_tDQzUUHTB{}(x7DVs8Nsk-<38o!7lX^&qmdw?$<~4l0Gpg zw=zsVJGlI~5gu(X>3BsmJL1FX;E;X2^JJbNL})^l?1X)}t$=bY;clUo40@b|Z?a-u zuOLe+mwTN1m4L_Etb{SGo7c3nP>C#$qfHQcik$T4U|6@%*0+V27qe)=J|-)t3&=)R zGdKbYCqzLQzYG;ahnBk_Hhpf@0a2=;*&;tA!oxEFDZnr+%kdsySdJWfcs}|OQ~ZmK z`!$1+QBj)%CJC-cx&nX%>x;t>i})7N^1w%guE!g*KbI1QXuhe7t3f7zo`4{=px~l6 z@@X7x=XzadU`QOWK#p5FLj?8Jq2Iy~I8tW_X%~Ss6m}g0sD(!=L_n~9*)t?Vq~|ow z3Lc%q9;^v}_SLOGJ`RdDkonc@(8T}UMIS;PGy9o|W3-AAq}4cib05^o9L33K_9#l0 zZNB*i#D}|KUkhFdpSeNJdXvSZ+e;0=9cqr*W)(T;@c0^v@t8Z&bVNTU`ECkc(~L4& zFYRD2JDc5=j}8@DNL87RPcnKo2XWkJfNGja=XC3!;V&^-);}W&1ytDf5eE%+n*nrx zC#JM}#>m>rg=P9?iAq{LoDds3TYidgazV8{Z%8YhyQ{o0ZG|GvOHbjOJfeffsL}c2k+paQTzvd#D0*45 zjDqI};U&C7rQl_bP!L_W^nd-@9{e$TmP3lW`7rl)?7CIQ0|C^$YJnU#R!gjwK`66z zxGTiZ{}GW->m2rdRqv*&2Y3sXIup*U-UWvmD3<^v3&Ob8cKJ?PLKSo;pDgm_?HI9G zGKDWXGd^-7=Q!lLrae62qa~v#(0Z~?e{wvW&^%Dwd@~&LF}L8Bk3I#a?|JnhE`fVHYY$GA8-iQ4Aq^DvKYE%dgoS|EE&NHa*KjatcT+G)W~ zHHO-UJ)ydUy06fp`SbmVsuG1?yql@|EWN)kM(TPU^MK7Dei`gdqc_vP=3ZTaKJTWi zfMW4ca^LCuocManp57A-CYs@wTz@)R8;4jvR1pfi`W72V<@NB-=bS_|J2d< zG*0K|WI_Qow!PB19}ZKt|L&{PpPYFnzq!Y<5rlCPQcf8JY1%Sbs7XhD+j3=P6|1cu z&}XT|v0LbvJ$fuDVbcS*H9(cwwMGn5*fe`ZL6m$cpAOKSh1qT2Bcz+w?T#07}+~(V)~NEiuu% ztc*XwgAac>TBD*MW##J4o2`qnMMXub+E>BAO$WG#K;ecTq|m{q@<%NJ{M*R3!d_yr z{FJ)H=K1Q``#U+T1g;FwemB8m3LRM?%mI_?^080$???P=?9-MEkDYi&)b+4FKYBG9 zQQN;^zq*{1S@crwR9j*vG@IW8%Wj--s$!S=`%jBoR|~Q#b6lPt;Xc^>CfoJ#F}Lw| zpM|d&S8hi;0x&V2ZdarFI8KFy>QZNl;v**y{JSY4)gR1NX<4RyqU6MyvmLbJUzRV@5a~54Hu-TJn=6l%#f*i$PpA~ry%tOA zqxf*kt$GY)nrp{~^hDF-6Q5 z<#s7*2=%?5vi#rIGVjH{J(Au7ETRXUnDezDDVwqTW1M5#Gr|>wPX2ardBQX2|7<+q z^~jI?W!2_q7bscU!!4`hPT4gw+3`%O%K>FRR_=3?sc?Yle88kC-eq5gBW@~9|AM@sqd1w#Z8H8;pdnIK;fRWLv zX6fPGFbGSJ*D0)on3-m95mT}A?L$P9>o+yBS75KI9=PrJ!-E??la0L9OgAN^$oVTl z)4(n~R>*`ErLT$&NxTK|Xip%9~)Ox#z-k>9@$y*62^o?2ihwL znOG5I*1wj|Wv;lZW}ZolUo~xyqp;x7vRhzjBbs1sR#nibB2}?|isLeB0JSxeu~`Uw z_`F9|AP*(jW$x5(wUU@!F?{^&9_%O56;YaBS+g6go!sj9{o7t;zRYg)N&PMR_+!3< z^|uez=MmcyLCg!*x=5TkBnd-6bQ&}@71J8h%YzWH_k5=%4Q;GorI!l_9&UiwBJ zZqwfs55njZg-`jRVn&&4TA$;VulVlNAn9R(0 z;#}dHZ7x^oA*dAF8XPTmFv&VJMOLVxy7^EpF0O71^)_TOH8Ifv%e+ouyXe8()!6ok zLb1h;cbF~#sCR%*LvvW}GEDGA3sis2JhuvHqK}5Av2%Y)Z|zm`V;@Kgk#ckyq0T}Kg;XO ze6+Ux)wOusjk7QXY*BHQdM(_K$uOiOIkBQ2t6cI=rGh^yDz~w4E>c{?2&@}_n3iNJ zznood?Jz6L;lT}lDR`>YyeqsL`$VHqzHB>3_e(fAQ)-~EXn&qMZ*Pt06<9CcVGc}`|;yFOUth&pwIs6 zWewZr#!dnT@oSkXAe4tA%MK)xz_8k4z{t*uco43mfWnHWeJqDRmMkamU>Onl{$1odulPRX@7O6(PwzO z-i^~rFG`8N3K{)1{@uhiL*V_*0_HAC+m?~Y_*i>frEoX6FlGHHNplRB4Etq$h_s{K zIcGuz?eg{v-Rr~PWnV2u>Thd0jsXA!5A3=n5j8J>w7l9cOla-#L`J-iE$3B*V74chcrJfCb)CgiA!tz^2Ek}cNyFw1{5 zVj@sYzix^+0Vhs5cUViOu_8=tD#G=J0Va>gz-fi_v}Nw%mfhFdT$cPQO1mv2VKlZExmj$}u0|YORr3cESiEQwr!i zGZ~jfB{~X@Sg4viQC+FT(dRVpqAbS$^zJ)$1Su5PfPRPLW9;|v8|MKj#)r%JD+p)Y z)M?e>H3EPA-;-flH4>}rS$qg#deEDjYT^zhQCkqxQ2qM)8G0`GeQJ03Uew&y)0^QLv#CTqI4-`}JET?%O>V9vlo3ryy;baScLC1U2k1D#Kd$q$LAtnVdRBm~GV zSMI#g8xFZ4{o`B`RG0@~Yqj(PN?nw&!m}viP25uhTZl zFyk98*w9Z7Hkov$c$GSPEDsrc7f*J-mTXG4^+gZ&VPa3#xDQz|ijYRxlGU zC1*wEG+(=GzL=K(Syy;63u5w~+h!JXtpI5(N7*nlt4`GCVtQ*I<1a?P#wEY%e}{>V{;hpYEl{u=mg@4)#KpqxMJzUZ$Cdq10qe#1+c_{wn=#PZw_78lwFHWoBM9HkM-Q7@2H03tMm*vK}8Yr7I z=J!z3$pe0BM@hOe?`jCkA0z|}E*x1Dj(ZJtAKv+wmqoI{mHpBMI;dN42-^6;hJ{^^sbkHIHBiKsNo^OA6 zH9U08d7mW2_e#hY>jYJz`%_7)GsJz>SN#{2oXpNwdTPlvc4%f~lwgKD1xNs&N&Jg&*%uP2>U<2Y965+;P60=AiIn} zf#5Tx>4>>b%MP~)!eVbjgnx&D?etHQP#S^BLJv6RBk>8qcAms=pPkZ^^Wpr7#5cf0 zCdwsJ8(1Qw(=B+b;3ZbEe!ubYy-ycBIGj+Si-szx4NLoWJm>gqQodx)b8-lCd|-@{ z`;2;Z-n2Nb@~85~+FMKA&8ujPZ>o<(6;xCrt(%QzL|4V-74;hD;)56eQ6!Xx`w&RS ze|H3g3N(U_0dOn4!L-UW^vciA77ntAmDktkI=+d#bz@J)9}%cpf)@T%OqhpQo$uoP723px`R37}c{@zbXPtusj6 zKg{Y7ShTF-o5S@XDyk3$EeLRszkmNcggpFcXs`pferKu8m^`(b#i^{|q$qqs14U+} zO=0v+)}^HM^c{#3mOsx)W=!Y-OdOEVeD&&8%))_3kP`uE<-qw*1n(aUB@}Jf=mLih zCB!X-BG9ESDRr!gqaNT8_s{h!;E(}HrMg~i^e3D#z|I_6cJo{b7aQXb*A2Vxcx$zP z2IF2e#^kvQ(nNVxb*&%Yc(R)NH}JrW`fU`W8}4P}jzQ<=^)1S`-lMk0)#N$p>3aJ_ zS#}wTih3=d&-=aNp|#JgVsz1!}x@x<<3bfI~>!5zR7=M7nq(A2?M<7(>5ZCB2OmD`4|>CQ_q6R^@?e{wJybkF>WA3UX`PzCjd~5~Yz4m6Yz5 z2Bo`^ZjeSnI;0H{kS=MYk&rGC=@yWd20>bZ_q^EqdEWc}`e z;zG6kz4QI=i2v(<5OYZYp-js24Wl30W%zNg?qJT2ncO-WiVRp8;5v)QC%USntkrb3 zZ^Qf9mFQ=fPU>OWz{>T|Rq6Ba&bR#RsF|zZv@4A)?D=LkyJ$0K7a7RiUJeyRw*NHs zF`)kQo8j2aox*EgES{S+#c#Sr>oZm2K1sX5CiEqE7NSXhl&`);N2ZPy-v1OiCiSVk z^Vfg)n%$iLoq0hY4bgtsV&!BotJApQ;jMR}CZZNAvwkNUaA6Iu1zl4On9f1iapjKF z0+CXRGA<8QIwPoDT`u`GTXc-0<`>}zPsOzUEL3UI-~5=-0gCqIH64Kw!Yc$yzU*=D zXHkTxP(4~M?VIAO?dXUSkY3rqA*%Io{l!Td^rwT?+34Sv&-?#WYlr-P(~3T^KHJ38 zrcSX?R-q+)$~RW<=mceLN5oA2pDC_cY}-_9J36JZfa?{`LP)cx1(we2R`H{RX-ret*k5I zvyIW1?6=~QxOzL+m6x7pa=?n~zVy|bmi?nwK&r>emQijmkJNXTXZp3c={O{K@!QH0mc;;%-7Ce5tYV3E*%cVZ2q-E|~ z>9wDaSwD`Igs(iLdGq>=FuAx7o!;>mi3N7~Dr3f4 zKOk+o67P>z!k_-lxOL0rjkayahy2THm9s96)wu~V)wt%0kBnse|7oASmx5aRe=aiQ zx9IUr3?4@6{J%sMoH%739hY~$R5UEc3OqCFrCi}Ykh+GoedNfU&Unsyb+nkub4>W# zk_MTZMJ6|Ck>Bgwv-{X23XN>n1KZ>Ijk1^us`#}kcJB}tpq2NCn!h25la(w`Ub#gf z6Zq(rlyZpx**MddbA^ay8b!T=w4N5q>an`o2)~jtn{IMEmX6YtpFwK%YlZrU$n_ol zr_r#j=CL}}Q`*Y9hFO;F)3}G;rKw}~b-n_$_j&AZe~(-?Xt+wfI?s^}rI_{)wuAGE z;Cr42Cd}o=pN=sWA_67$?-|Vo?cI)+F^x-8AkI67uTngyU7=`t^*UmbzLhDrvOW^@ zyZuA{VysSyfZpZXmBZu7ym|~=ZnSyIRF?Qbue^KbeuR6v_D>@(y5-4#4H+9U?l0P} zYR`TCx-az!o(w3Ig?&T$c6>K^@QQmL83Y7!?yPr#=rEv|DYFx-#$w*x9KB}4DDb4mD#r(_8r5$ zs5+o~-22W;ZORS9O~1RvG{;Moe~*2x9xp631~kNpOBmB)kA8Yl*&RaZ9HlFU=dA8OQwiOj;z5W0_U^(#CDrqR9qaVmuaD(I-UJ-0lQEU| z3(bEkEI42#?TXft9zaI9@{2)*226Ez6>PIRigLNVzClbTI2 z8wg`?C~SwqoSk`p4e9LSz}c5h`8F%_eGim@P^+w{4TVoJ*J23GW0OPSHC|FuHtC#m zNguKfVO>oWT9&w@xo}y$enUZ(=*yMc(@(c03EZv8LT$~udYobsWZR{7|bK+ z|7ne!w^h+(iGu5rWQHnaJN4D>c&9)Yebtd5H5=O-1jY(g=vKsgZH*WA6VYyuzV$g` z{ps|8YXqms(>hgyNJeH4C8PGn9WzrNk1VM)D3DPcWXM8G^VU#C z|61=4=*k_YsIKA2GjY3rB~)WkD;Lm@w^Lx`rX@@a0G`*&p`ae=J8@)q60g&DCOv2Z za7t8E-BsK|uc)i{#os}+n!GdrS8)v&N>jkQmO>C$iLOT*_d(A+EhIcucj-62q;mD| zUI8S&0~jdVxo^0kuM_4}##>^d?y%AC$OJK>Az(&$+q^))*Ctd$Jlc;;#Bll~wi3tv3Cw~8t&&ZAyTk|> z&qClo?a95I`54D_b4@(2Kebv!v(XP=ZIUz}Y_J!fLO1~%aJWf9fVb-F$Gt5<{K@Ck zI?VYM(q_rqyVRM`C-gTtnGuG0R=KR^1O{LcWi!1%Ar!B-G4Eq=N8H2tqkxjdIX`tJ7! zsx?QN?r5+7A-*Bu2>f0(V(RMj-}t}zVcXDm^|Y&4wW?UPFYV$ixSZS(GakGas*t{% zrsaVh*-re-tx;dyy6TO(p1k>+_uq^;_^8bb-G~(89$Qsnqg=T{Frv@8HVo}L+VrfUrQu|u_m z9f@dMef*02L-t^BO-pe1Qa$HF zC@V8>*^N*uwmk|$X_40r(NbP()|)V4_oNiJN{*;$Z3s_V)8%?msJl0d0mTu@m<# z4@ZSW?nlb_m#}s6ZOAPNuFx{Tae-P%GhPeHzD{wdv)jweuCo`V<5Qc8-*VSU%+~g_ zqc0}A{|smjiOR}XD(Tc`**ap0#7VRv|NOTH)!^g_KNmMG$_<}4eLwkl(gu>W%Tv_U zsn36j4)PR!;N0e|9C3pgIw`8dNYC<{h}_LC7x~b;X3qqhZzc;A6!YchtC*KYYpcnd zPcv3;7;{viAC8bJH=|$LGa{p}oK;cZePBIc!{vf~W3TR`p0i&5TWnCq!SbDA|{)nmE5zHKw{Kf%dz#YV{cSs0m@x6A6fo$IV>6@DlQt|JT^lP$R%N z7##_p{9#%Sdkz+6fv+tr?kYBw$F>}xibDw4kP|U(kZA|d12Wke>673}H9`X8V(=Z? zw`;8bvd-ZNzIBV^nyQv!i%Gz$0DH;`MYFI)cx4W0;vsvCcDp%)wSo`HS8%?*%?x-H zcKt30PA2n6r|u=)eDub)mw}pUP(sRx%e)`}=D$$oY>l3HSkD$?v{-e?!Ju*d4GsP* zY#PyBZ#*n=MAnFmO<22nAB7XKm;HG@ApG)yglW^~)nm^I9juFpo&9J5ssX=o4!Wu|{Ct<21+hGYSoq zdM{H$mr|Y8k3(`8YDRa1NE3J*1vHHr2NkjDav_`p9Q+{$u9dU%Vv(CpER7WJWjsU) zwC?cHgnYZp0r!G-tPZ2}eoL10&^K3oWSsWy?2_uiY$-(rzX_#2mNhy>m-(^nZ#G1# zGrxCnPjEU3=9cXL1?K=Y+oH%ng5Y&2nf2EM`y5LuN6x2(^z-R)L`Sx_C@6X%b>w*S zsZV&bp=Ay#kD_Z0ubUy|RqnzM`B~Rq_DH(c5EQg~R%xHIGSl6C-EePyW$*#n5`OHH zJsX%iLMMKvT+uZB>EOu4*G{<6tNJ9Eou*Aj#YJgauG;X{UHKxN;9Zx?w_U#_B(=zP z52CK%-Trp5ucDQfWr8Y9A|8vl!qD9h-2s(=%gt5+S>thX1iY{~Egyw&-b@=a&R^#l zr+a#goE2K!Jtopgibetfj&>FG9WvH#NT(5SUCnDdnO6@+vlr4tpu_L8H_ zIU&0yk2VVqia73G1HMYOKmA()bd8;sCueZJIT%0 zas(*kjNsxK-=}E{xfJ+*51y3RD;ZVV={Lwt=-mB(a0ixAUiNhKzMBEeIa}zX75-J# zL}Vs}|7XBTVK`j&oG{AHroqmEEKA{KR=6nT>;yS*RVmMpq>&R=6i6m(L3(ANQT*<_ zDDPC0L^k~?ZZFa%kC~}?Rf#PQIo@3$=(r+LZP5`#gNtjLb{JH;M~oXS23Fw$LU;~gKJsO%`5!%;-TT+ zECq^-hXHrzf}4XDW$Q2&{-E2w!Or$!k{7duMQKzzJ<*9RyYo*uXf5{wR;?Pu{Q}gC za9fL8)AH~#hqV4x}G5|(a9t}LFgaJzg1sNL_{sQbNb^2&QHyo z6uGp5O0TBpAQ+@$h?{}oTK(4IDr#GKHS1ZKS;P9{MK1@sNEeK~<)lkkq+%n!q~nd*HRs}3QP@G= zFmlD=n4mI8(7Z^B3RV#n{iD$bH1XrFIh^5n3=6d^UY`V1u-2J41v3FBhTQw%xRR0* zg?~=K72Y8)(|ifOFh_eFD7@75uK5lON6+7zK)yP$?`i-3M&!k#st2{4BJ2RWOmCq| zDgT8#`xr8$ie{0KM1-H40C!O`@fqzi;Ocjwu>rY}D+lDB9LR0CZQt=4@?a!b%4N`E zu+w5Ftp40mZF%^ude+{uf)w_1D_oqhzISHI`+?eD{r#u5e_2s%x=WuwCJjAJNT3Yf z33slxJ2!M&PonYq_C-{X>Uj*=yIH<1stOh=h()W$^q$;I*%r^pm?*dA)AzV&B7eO7 z&ommy>&!>)U}~aYUtRcfvg06Nc(vb^PCrTiD-KuU)a&Z{?NDOqXtyIl+2G%{o3ilG^^$Va)FVceJ`GO~KJ#Y&sTy zDVEu{E?j1q>Q#|hV1CL?!MtID(kX zA39we&|W;2c!OJ1@^q7-kdOl>5!?B=>jWfe?4XC4wi`YD0_qpnQQRP0i#!lAfhiPC z1X`A`SSAbHLGCk&;=c)f{Jg49Ug{RCkAy$4t4+=b3VgHm-ch+8MGI^><>o!c;1v>9 z3=Io22A4kqnO|`s|1%msG zL3&gGLWqHjYFVZSzX^CvUqk;b7@XJVox|u!I0AfiZVnF70ugBL#G3j>?N-H<9qqSB z1+JMW5#dWxG4n;Npx(p>#b>gCh;2Bl&qw^B+PDz&ybvfz zp|%6ZST}QB1b}P7B^l@dP;GynNGWkc!#iADT-AEC53g%9ZbBcL_@TETh?gxOKZV@q zo>?UJf|P3Xl5bKWlG|^B;$?qF+&tbx$fxo|K_r|IG1{37Mr~ z=OyRG;{MWr`6~X0fO>TN$Um}~e8*n(ERp%QEX&gvNY(H;%?VsQ+&xKCa$NgD3eJT@ z`W)rK34L|Z-sx2VXZi6?XI5=o?gn?^)(Nf9-(Rhaki@oZ1^;{o&xRhP)fIVuR)`)l zqP6LCWA_#&>Cm>BGD@0uV~-d@Z_5Zg$dS4&`KU4+qu~7@F?l6FQOqA4*~E+?hWio; zGEvt*V$PY8K>HNR`cYmRv6`?j41|CRaCM>bd6+PvcRA&;UW8b4A>XTbi3%v-5XQ*^ z>l2dxaOd8=*54APAYuf_`&%&LLB;&zkvnzU`%Y`_a-V4ilziDRM?P{;JNdULp4nXq zl2%ex4;?bktj&t=kJmaVI)3w!{`6X|?mRN@A`4r}^$6dr9fzZ<*P;RjThm5^#1&?8 z=<>q`9G$K|`jG#<@dI5X6>jo;3N_EK+K$emkuAq8pBb@wUU`^-S5rJ38w5}HhF*Yk zXxx^zla!pi2ci5R_P%u21u{2*34yAbR$gZ=lqwxs&;@JRB=hPsD8z+z<3=*E62u9U z7AoCd$>KCnjC{0k=#=VcgnC~ZHYVZMv!x~O@M{NOT<3gDGFSQ(R2Yv83{q$#K%JB18921Yj}hU(xy+McJ;REox{dN&?|j2DYWuIoi_Dqc);v8>IP^gkTesFZOu-Tzpe=s# z?gqfDe&*A}eY?E2)-LZK(=D_=pDk*8epmZ%9P`_9m$}(e zoBWn4{Ao?b{4U>G;ODN+hO1FhzQ1)yGeJU_wL=_b3A!a+qO{WWR}f>#?0W^*hNR30K8C z?%T|_u3}Tsq+4{}I3p`XT(9=ikAblOI*FsBXVA))2WzWh!IQY|b}&D7efg3Gd~T4_ z+uAZB>m`K{sn_&imR&noN34cuVEyRo?rvRmflxY>Y{k?c8_!{h{R=)qGEi24M*@<` zz*TGpVF=^~i3={EjfS}H9r)s$H^$i@*94#sNB{*m9k^GYAYb@=OrTrPR_p8v;h}BK zE?J3&=rey$4IQ-nR?*1)MdyL0UzQc}J^T-PoccU4uc3+KTJE#XT0?U<1I=O9$Dty~ z!%>tC>ZE8|U*Z0BCCGI^tnNn+Gu^v=aLVoPe<{WL#0mff(=BKNZe-lINhZ0=|K#ig z_}i$IuVk3FX#_gN zn%^h2Z+)aKZK2RffShJa2U~v5C86%Rr8<*Y+cn3%oZ04q?;{7kh z!K1csfk@xwCWOPAg2k+8?Gwb>gGjo>WmVoR_T@i3s!yylSza7(oeW z?q3{-Z)I&?>MLSBhv2R!hw32juJ45IfzTw#CwY)i@bcw~B`;1h5P2iftk8LBUfw}` zYyiXm2&t7ChhF^uRj}7nJoE)59ujlR#m_@a3&IOu^KbD3rr1Omb(P=AlO>!bIk3W# zC2{A{J3sxSROo2rL?3TSgIlJnz${MkedKgTST)`73CwS;J+J8b+v_FF4(B4*+Ck6p z!(*KZ3A6yd2(aew)6wNB6Q!r8lkX-;KqDc)(3Fg2p(!?%exmLjY55iPEA1bQOKPB@ zaH@p8)j9TYOo^~4lVQ*FVqb@xA7z;RyBhR5X#_ffa?AHmXkN47ERxDobOw3428f}! z?{+rr3DNwE*m9ExKg(3~6t0d?x1$6~NO7IgtBVv2-MmF2X{JmC0-wv5ud1r6w52oM zenc`+`)lQeS@{Ig@i>NxBhl^<1_}qbdBHfEEF~(fDtx-9ifq3jY9zr<6CgE^!`6pj zGdVSYZPbBl#3TUP5%j`U-Ub2A5&Jg5j)p{RkCO4+*;t=zkAZVzd z81dPudy2fO5N^EbtHvtRAS>g1l4gRdpn{@Bj^v(^MmO%OoqHPe%74dCPvi> z5LoQiSx+G!i@^f{zX9q%gGCiY6GlvwTX?1jTtZ@?5fWLdfF0h5##L=eI65!rf}=-$ zrKq=culoJbEZ5dH>GQ*yvooA?Q$l_7**@BvCkaG2ui$^p*voa4dTT7cc8T{;^Yy>; ztD_X}gOqpgVrw5_=g?umM3z%;?g?7}P}pSUD`IG=6~2rf9Bh=A-=?f93#^bi#~q~QI{oK^5eDYdFwEGwY(N&rJhVbHq+d~-%@n6@C=fv(-IU;c7lYeX$Dnsx-`&H86lCT^ zTSx;PJyMPI9;%7ohxByI!JF?$6;d&cSI8+JyOfB=reAQ7c*T3!kto!j^%NKNcG4!y4+wnAokj*XlI9 z`@TGx9!AZ4=*a|{I0O`cq%b0?JxL<{#phGW3bcqP@`m6qUyFnH;6-o;3@=E#oug0f zp@4=Nv+2QWSs58aDA4~lo*$mGl18R9XnfS`qCER`74RwRR)9U$gRqz%`DR{}UVF+I zn3x0BmB?NYOlp$_y;Ab>*bh(lqR03Yh4$3|N)8TmBmf?Zl=Dk@F;tYGK2N ziE#j>EGVRLV7D9tcerP)zB~!AN;ei-r9}s} zv>oedxbunJ#UOzU5vx80a!_=4B+v{t@$IYGpp{uz<^=kfLSqh?3P4lgQ2TjX*>U_D zy`YSYQLQ_%h33G-`b)>1m6of97*Wlp{WotFS~UH7=7Ak5niIE+dQlTv1eD*60Z~2W zthbma3{vjZa_@&1vvf?{8-=quEG6~)wb&voEKG4q+*x8tPm(qiV0|fiC*5Iq+#tpOeDc3 zz{gJ;T1Da=5YquE`3)>A)7{0cluei?OAx{UH-L`{HIr-c5c&pG^GLRBHT>ijy7=5Z zfoL$K%>N9KfXLp}@0AbY3_R!|ZRB@_exrB&4A=1wpL4IiL(<4#gfk~RS$XJ%Qmw~~ zEMUd29pIV}YXYEaimPa%LqT014Z3L)pdg?}9E90V=bf|OvJ()#&u%mh*&8qfbcgmE{yXw3($cEp(tyx+4J z`acVvvv zfp-2adgtpGqF5f8fk-ASoEAsJS8y&IOD=ji~7B z$SFO$+{pNcGQG5PWiQyRHXxSdlDx&0`$N05|==DM!|38A#0`vmb? zkdeMQ0a@=a07cTn{0C?s$FOG0T3hEYpGQIj9;Frfm~t8FB8f+G4q;dwCMbKgz|po~I$3o9x6heRBh>GZva>RgBKVvgup_^< z6T#MP0?-Qs4w5>T)fQOMz&E{5O+!-#QvzhvjcsTl(?8XB^?9xTp=o=nv%Nd6S!B>h z(ZaLr@Oxa;ar%Ox=|f4g<j8QO(&PdCdth;#B^L1u42OoUORD+H z9XJGJgU~Y^GTzzW&mbEMJQt|G!Wv)~xR&^jy4s zpYzkm@6Z|@L5%$7S*L$y&XGLIf-x+nCs0ZY6>ow~!)472F?H;Ihr%xBROjpS{{m7x zLY_`WkCp3de(cUF>O}#u>!k!~nYZex!c^2-Sx$74nBTD{W2!4Za;fDlET454Nptig zqzzkZ%VW?|Czm4NG`ilu{aB{vGjo#7cZfEM7xJtcyIQ%%K^h55SR&M=WfRMqaIURn zMO|L~odc73tdl*4X@Ivd5I+k~@7ZgSNW5s}m-P2hisajs?K+oKL~{%=b!JlJie!Q4!ZNM6*MMcEC8v&>ntQ5m&1bBaJ&lSHDNmun zyrTWJm>Q0aR$OQwN&}l~y?h#0w)(2=>IIy}PG>-^c`|9%fD^9o?R%!0`LG9nKlar? zdYr9mBjqg4tN(wHiXS$?`m%^4&t3PTls<6abi%L0+{7QVF0u4?$YZ`_9X^p&+#DPx z?`S;4>o|Iq<@Pl8Vv6KU<@zlInC%PGMq)!!k5~`QKISOkad3Kx^P? z5?6Xsm#Hz#BS4o+&u0Ma3Snz7w#h`98B?$`XyL#U=NXHIM$Bo63+IQ2bp~&tzflRE zqvL6f;xSJqQEg@Afn0evH*UzmOxc7w6!Nf^4!NL=tM6EJ;fFsc{K;|@FN;> zn1tlD(3#~+5dYa_^QX<0;|Xc7%Dzqe@h(J4c#aSwu%^Vn?im-*ikG0|?mPHjY(TOBpkeQ*!T97l z?Tk!4j0c44^rfq74kkV$Sk}Rkzy?SaFlU5>g|(Qn8Xs0z?Fn} zm7rK%J=}*4%Yj=B3l)EOui#CE^|)&1C|O*#qNB`)p`o}_%5#9 zE$BhgX&l6pPAkh19}4U?e6-Lk@) ztmEhmy=&{S@tn7+}Wmpn2h{RKtO=3^UtfIqoYX# z+-!4xNJ9~md645287o!;WwH}|qbDOXpvkC@9lqGa{aR|3=L3n~@b_fX7|U@nQasMd z{Sh`CtV+hfgj$0M*~@U76}*Bgm@oF6 zaSjcp!eImgBJG8@NPgL9Z*qC6C{c1#|0>70_1XExw0n};Qhn!6AD^oG*MpA1*dO7X zq8a`KkpttAp_@8RTx_8<1G#J%))7La@^uWc#Il@W##itqP;a8HdgXN=unTzoOjgQG5J2h8$wRg_{+5p z6NghglK!X~K9F`t9nNZ8T7S^FGbR}#9eEn;SfgLRN3JTOG54m90?9*z0Ju6Fysw*8 zhjjkgQj#0Do49hO&o*$*68N8l1c(lCl6^d=TXmZKSGtXG%xJ^%22KP{+o=ES zzS2ldqtX`Wl#qk1M`zPM@P-nwmXJtgyjR=r6Oi81-`>XamkX{Np4f2(OH|G_YZnkD z%kZb4MoLSv(687QUC+3!>{1XZ+`-)-H?*_PuZ;boF1EavOHN6Hfsy~F{Jq@HlH063 z3rkl;F;O+CZbyj5D+d&DiJss;dtdKzk0bT;-qZTsOMv`0@10_q(70`?YUV4ij{2AZ zP3;)ZMmOix`Ssh@k%aZ=b?P~uQHd!v_8vcLR^P1COxiu&{cSY9ZE@Q29E79|v=QOK zSbtrOjNeEQFZXVQ@;tr9EiF|huaTbUZGzdgx?y$9f@IKxK!lf^A!hP4$lCc*Wo(6j z+A`+AO_#NyS1kr!;|*?HKa%v;zCOM_GPbarZDR9N=A#y`Tz{O_0;3QMrF?)w{hxt6 z_eOE;+VPj2(H{3GuU*!e>V)T7w{qI-P`wTzlnNx%`!Gt%0JA8isM-kRvaVDvd4bt$ ztF1fy2iU{@&1l7`PH8q57ei)E`H>DSt&B?U(-#kBjnw|>L0`q1_X}Z0m(*h=s+ZWg z$X0XLRQ^=_s=}9ObNyGlP{eHd&*x_f>H&%lGVi%rUQzXWDN8jpbzl(nM{n~&7 znf#E0#X(CV4=;5@_ZR1Z!=p>Pwa=GC4KIon6PcISm$DU-zyF}V<~U61wDx{us+uCn zidTr@t{+N6K1P{OurZ9k*T6$F^H8NcAVBDl@6KUw1cD-FZ2!%4f$#oZ}bNvgP z&rJL{^hu31F!7<0i@$Rt19^P&Y2zArre4p{C^DnoxIX0=*Aq{e_Rn5@vAU~LJa%i0 zbIk=iNGdzsBqI~$6{=|XBe?+UaDq?QDeH=cbv_2M!d^Y;@uo)d*iEs?^ zP&w;-WC|a-r|lz|7T2h7vWs~%SlNH^bsn4F=Yizg4dUWmgEYlfr2;FS^EH;7XL6RG*6^2U zq?-1>IuG3&6OZj4%@<)@DNu}v_~XWwU25_D_p{;qN|aP&A?a59TZKQemT?rWFX)lr zoJ29F(NcIGoF*(=QJbvBOSD(j#SfV40iOF?V2C%6*#C z64Ggv`zWGi$9y?Mm+_hN?uwAd)EuM!2Eb6|ppby$Sri0z3a~#e47Y=v^mISTa6;-& zMH&wP48p(S3I_q)W`BkhA1=X;%I@Ge6}D0C2V?lJI$N(0=`}&d2bKu*myjyYBH5KN z*ytC`iuWgL^_Y>*746_TuYol5nY#M=axyY5`9d(gs;a8Eo10ttM>b$MnvXeGuhypg z)s|XCZb~{dxqGE%#nI!${TBBvo9YQSdDWUxiVsuPIilN-1FG?GIEzGh==}wC0{a03y?}{ZUzF9VPcG|b!{TyTR%0K;b z(gk0q6{omjHkfvowa0g)RIWx;hcS?65ec}C+V%3{WaobCZsQ<$g!dl6v z`JTwrzUGK^LD%rLFJ+xwRMp<=^>O%MfouQkCgw}4+>dw5(@MIWN%ISog8h0{+ups= zqBI&OvB=a-c(@)F*_x$lwk0npKUdM$B^3R@ckjMPH(Xe}$`VXM2M7#o( z#IfXuewiLNCkns}T3%PUN1-k9NpwQ%e#Adcrn)b*;#3kUZgKW5Ydk3ZGJV}w@%^b;G&@b*wDq*~Mb`(ypC!Zbfy$u&^#I}b0-5E;vy{GN8_uumC+o!z z<3CWFnVUNeND@(hdZz-N<3_8|nx_rNG;j!{XzwXV;k}nh$#sD$87$KMw0KZqHT3KF z#b=<*(r+dR=@*tzKn%jX642y120V?P&Q4 zopvo9$(Q%R8R;}~`#2Ace8?2tC^7ydDQXOio8GZ-)>(b{5f9I2FqJ~w`QNQ5F8$(A zI__P`89arKpALr3ypA7er5cYJ9;jr;sXm@7cr2H*{J5{_r-QLGFNxsJo?D*whU0gu z95$Dg{zbPtxU-*s_hvRUcsDLfWGW*=I%dE)ncsCD&@K;6U4H_kH+I2I3lVv;kqaTOLyP>nzJiee1iK5 zB~Ml5=?6T;6-~D8KWJ_%Gq~ArQFPftjTkUDbk2)%zeV1&reew%vM7&sW!b*3L}2H_ zH*Ie;d!ujJ|KZuozjHa+h24Z42^8{UDt@&(gg!?UsegZ4akHG7BQNrA<<0!6Ki9e1 z94+=it^{B*wv!p-@_q|BROC4u+}kasXuIi&Tovy83Kdx3?gq7Pnm2{wq5fK2%+M`r z%T&JPTG_=7e|Cf={H+6O4(|(2d6Tu_ym8yA#RkCIbG$fmfNpKk zqFOjgwiP?z!=xff+hIt*9qfap?bXn#ps8yVDOVUMwW!TzU_;RZt(Z1IPN*D=jgIaD ztCijR?@^3Hsb;akRjj51S7>Tv+s?o_DZQ$jfnz%XJ=LTita$0`{O3oV5Bn>Go(Ot@ zjw0eVPZTsBb6y-1V4)+Y@-X`wL%6V3kp{o}*3`zBGjFnE2MPTo&yQ#KcxGO3KG3RrB=)>02rrMgw^Ymv%ICIPTJ}jaU?Z!sX zQWzc`<>vQ+-UJ-KtiB#QZbG~93o2YtQ-51*K(sx`4Gp zce7A-l+5eTf!S2GV^`k2);Wi-J3kflpI1NWg!1moVpsHWE9KeisYBr9u>$qL$;oNu zN4>xsnLt>zKLkdAp@jvGiw)s*$-7Q|?B!b~LpT(0aOc-{PKc0r;m0Mj3J%;yZq^GU zbHAe;E4J3*O-4_&`8k@z6)%M)CUj+ju>s@Xp_8#HMRuK>^7xMZ1a#Q_q3^lT?MvU9 zuZ^3aV6E%;zRKv|G2L%X6d(?_>+|SN_&DI@fKxBExZpCGux4;k z?}te7%h74-=_T#npK8vqBHelRH|_9&gmm^Qe`_Ca;snb>>4Y)aFPe&+_-&f%?=1}z z#`T#8KO6LO1}A%S>tIR|g^F*(8bEvRUI4gDnazqm1_z_V!O4$BJ^+9txfrRZ8x}7= zNWE-rMmoWej^5pPwqG1$0)T%Y({n#JNEP;a1|90vnMR+2kDepynjZ?6t0rN&)fria zl>yOt>^w_?vvd|S7d=;3u91A8%-2au%75dY*4g)Jr{q}W6?H58gg5dlR#<+g~@TINbWZFlb5S0 z6W85QIE}ULg%;BokMbV84qxac7yUp&Z6U{VSBC`srt$Fnoj4sks<4k)6Pqab%hfu6 z_5B*N6Dof%m)VW%N*U?pQaw%+4pRXehA6!eS%TCUs`egR_Tes$>%?B+NlWttT4&%!z^#6#(u(iwTTVCW5>(2vrsx3bJ|Sc(02 z!{XsR0Rhj(&Cdobz?h*1%^#nw-;_NRGKjqw<`n0z^f`u=Tj&3PRK`tSmBO*8##JX2 z=*C$^t2%E9RGP_>eH>#0pA*tsu6Bf+4s%!&5MDTvT*|aoCj<@Okbl84%(W|bPYyO^ z0jWzEF82qCe<8am=E{0b694?^kY7^ImCTQl$vY_F^ZQecv)gm$lIQFeo~Xu^-G^9m1MNc4mP(Qze{MG5c%c+3q#sXddy|RbaO2oHJbQl zIVy)8fq{jwi_WX5pM6RFN~-M{t4}!=CM?El=O<0?{$Iy-i19>l@>x zy6hqs3v;8&Nyq9bEWw?a1IAME_pC-@iDzffO08*MhBTw!pep;l7_Eq;CyW+9QIwRt zl5O=iE>0Q*YkYHocq*f5Hc`+%kAt62?fV;j&;kDjEtlMaf(4Kora{Xb6k1dEJaDQE zxur>vP(dDJ1Eo^7+|Dyr@~AO3@4wHGwoJBsBEaBLVMMeUvq4*)gv%NqT91#c9s&l7 zTfevlY4*a;8euAA;^}x@$e(d;4*BkL7*4F?VscMscXs8@#g#Ku2OKs$4=if1Q{$4z z-IWd{p;=+$6vCo2u2lXy!~DVCfMDZ<_p@7%o79aB(W-9D`z>s;!~R?SCFv_Vq-M@M zR|yA8X4~$_eJPZDb7gDk*-*zN*6hwRO5w>KP4%wt$IcTC9+G$cCS}JdTc1#hlJIYB zoQ1w|9Ic4q4ytEg;i&c9OBz1CSfRx#)L_e`Q{Jn{N}N?z_7+y$h2PXE{X?9QAZT}j zJ{ZNutsvgD@nkEklxOHOpU=5dHpp7e+Biet-zeycf@r#U_oCX~bQ+hk+D}z)lNq_* zhmH4Ql5?-sHv9JthtE;)2Lar%Puv3=8Chfj3}ek1%V}=TL!BITtt&S4rIXLy_rnVd z^rgL6rL4T-wWcJ{7;A$Ez2c2-zf0Z7I$4m9zge!VS-$)+ZH8o_zmNu_f1X>X+$7o=jgnk>{!9UOivI+RCbs#_eyl zQeA#^zK1-dJ;CE?^jMGV%mQZDx0!P;WZc7%S|}W;$tUwyjw~~LKzLAyKN8_X(b3b- z4a%}CLFxx@2mywp%NoDsHQiXIZ0kwF&vn^_!<>;*_U$g@oQo(H7nEx-iv>CeF?tbj zdyVcn>XKsB$CPEHf7aYZozd`?y+fK)!7RaQoW&_WR5pGvBK7sGE=)^)=-wv#XXb{n z#;X{Sukqwurcg^d2oB=!S`V$1a(xXuQ*i0=v&!-#lqf$g(bXF+i@$!#Vp;a|iP;sK zpI*IDl?9|hj&GFiMsUpYr);1;OSraB7B09a(C~UA5Yk34Z@;v*1r^e%XFVdJZLIjfuL6Pg3Yj*q(6__#7^vGK5baJJ~Og5)wZFjIU*4a_2X-t#~H5Wo~- zemys7xvRDiyAmbJKX{?`O~dCI;OF~g&!7q_OqCQ&Qi3hm6byrw#V`-sHX zygi_xiqd8#sjK6;k5w%THagZy$I-wkT?KE6G(j+qkw84dP2ZqR3=6$hV}`*N8t)vb zfhtzrL0iLe!?CIOz>xJym)@@?y$5`9EBvTy;)Tb%&X326>|G|b+M1paQBjQSnBHiM z=15Z}Uha96xxI4*A3x{cQzI&zJQIePfVSGls@ZaL-#Gd%&q5F{ARa=@S9rGC+FDjb z8u1jYm#zQ#z;TNRtz_iXYj#vcxP1r8QJ(fL|Xo@>rLaE*op@tpKH+inevR1 z1cpRs*n(WvlBtXkU$Ux69ylX>Ym(r|{G7 zl=qDGj$G78;AxbjALo5=^NZxEJa?%3?e5PCr$@Pam@kwmvFCU{>{GQ22(5QmX>}Q2 z){eMCk!#$(_XO`%C+YsuH+Pl43l#W-%dCd_q$nvRSA0u)?%NN1!CJ~{^B1 ze~7w5==oUR%0R%Y1HZrKDPb-1|J>h;s4k^m<5DHa-`QR2M%AizNC(pwr+AL=>G!(J z7X=8FgpJzMK*>Jw{a~6K51!ICta7ESx<1JJ{ulL4yr`8I@e|AK5_#I_W>XII#GgjS z=YqZDZh}_??0@cbQYu!J)!0Uy?9c@#Kg0@Gr(F-Xo{y#T_|jw^Plw&2aNDYsb0Zkj zZfx%z4Tiiua12{3cciPiIfw??96id#fZ_3on(+YKvv52104t3 z$tJWMe+CgfNvo=gMYX`5Jt}zB`EKb_QzgBxkfq_LHLXlXd|Vex%UY>~ISKLBj9Zz@ zF%+F)MNGG z@XL$Z8@{UxdVssbHIbE-_Hes) zNNZa+R3(hTC&uXsaqCocN6m_Yx#{IZbMKClaE{}hqXw+4q&I_#ApGnTK4c8i#~=|j zb#U^@ZAhsJPwo5`wNOAuYirZ=({Yfx@wJCfge7a3pV*A6(EnlUEugC0qP1ZZ5mZu? z4waS==>|mskw%eD>5@iTkuH($l928OMM_XWxc<#Ob82=e#pYV7Nd%t_F zx#oK26Xc9a(sYxNLA`np3Ye)Vu0E{u?$;3ySbFJPqgyo0Djg>08ff&aHGv;Rb`K?Y zppUh4x$virBZZ*%yl&YB{``6I>c$Rk<1hg|9T3^6E4H_{M~g_#mlxd5Z)Gz)k$SFD zmfzN>pU*V$opE~f724h8-(J(<175x@9WP1gyk?_khN`sDHa@?mdNU%~N&KPf*Hba> z;e~^Rgh``G|E|omIDbcK$-WI}5ZSh`7V?rAJ;30R3aot6=ZmV{-!N4;-S-8vA3wNR z&O4N-&``D8Hb}${Yp;~-&I2rmJvr;@9np%9`LE53mUf6@pN34m9!hAP9i;esyi8-* z{cz>W%g>(&v(|^b9O04zLEONqwa-7dV?|EuG$WMAx(h2p8J|AgHMI-E*{Uh}f|>mF z=DXC(u<4Jd9E*bz%&d7YoH+?6)$gI-<<66&aQ>6xQ+ZmwLVx!zpEd*kU_rWBBVYJM zEqz{;{2fd)8M><&NBE}wF)fLF5qMMtTX-+n48kAYD7K!5)xfbBjEiWzdMs#iTSjFHwI z+RQQ=yYoX7x?n~Iw77+@AKV{Q)^|^Nu>GNC_u;iqsJwce{igozZIid%FYOJRMX~j| zbJz|JJ3h;A_0t6%Ub*Z!1{CmGm}F0?$S_^kX=|m_Ch-^1U@ih+MYka$G{~4fRR^_^ z^}*WJxo0=@59JMRF!9Ak4i%_zFe&kiF$EjP8dcw{VT_U}ynFDT*(1Z$mi)bRk~|uZ zVEIOt)ZcaQyVtMtuu4&eH~sm7*TdUjor^@4Niu{hw8>k?W^86cvvBLhx#cI)-_e$- zT~~arly7WHpDHC}qa3}|cFU|p=kDY5ds2@@iEIX?BKTyiiGmo%;%;ty{@N~kDlI@_ zyrlq|AHL5A9hTTYLy#5HUuj>X@4a=M9{mQFAzy!bL%98>@(p%PA7k(R)n^`;H?!U- zB!>rk6Z&d5r9Kk9ubbku{5>*kT>pl~p?)NAX z6-rtI>Jby=wrL)=jv4wG?ttdAx;PPY{jjKINWVs?)ZlcvcVXkZ>+^`*gfkkHA_?;3 zpUl{{rmDq33YRy1LzH>G)c2a*O&2jvw&fTPvaq8O>uWZZyX#N9uTx}zx0R>UL~Qu; zlS5SG$;7{IRGgeP+%#aE8>>dV-EMs>27$h=u~a4}F#eC*m8uBC5F1WpEoRDyD>g&D z{DX|l;jFD29{mq)UH{u-nyi|nhc#$h6N&a$+qwK1Ia5$6u5NpZ;K7jQXbmB4x)t2+ z)T-3Dje(4T(SB?<+d1U#V#j{vxJXaM8Q5Felri!=Nx_0+91vULnvFS!gwz`Zaly^m zGo7{^XZy5&-)j?IvZ$A}bBLoU3n+t}ocI8%Hw1I_cyOvkUdl|7qH^aZpK!`s`Sw}L zLiMR<)2~qQ?|=9DMM$BbAWnSDqiI_H9MnE^y*7Kz!w-!-cb-3(ueTDW(dshUVj~Ff zLeiL!`&@i{JVC_bE5p#p`yR@+yx0x|c<#gbcot>!H&ORU`ZL#JJ6fL6+j0b;OFPhr zwWrs9Va=kC&7azNnWkpj({nWQ%CIbp>%16_sKG5rs#o9@)&CSnBm8TP{uSCCN6CTV zqBM8EI%mJJTgUuu-ylbFbNBXL(mShe`TdU_pt8^%pz!Fa?UR0!j^*$}z?3(r=;hhZ z#A73R3xq?KWG!xA{|zD{zsJk+2A4SaV>42gNMc*^IcHGDJ@acWU}+47xiToGgkQHLnSZ-BT%Z5rR@W5T@**}myV&e?`?~7Q1=Ny6G$+i6{+m5) zMi{LOd0Y6wG-BPuZM0GNuX^#Zu%a5Yi29bAjkf3%)P%r`Z={7{m z-iYfMm@i&oY*)VTOmLHiRAQS)-XKu9$(t?J!%u-WHqzVR3EV-~udeL<2uxc-AtCXg z5NCgz{6>&DAW(o%&>Jgb#p8l`zT&@{hQtv6tG-qreZ6$%Mhgx2qBm#Pq4+*Jh)%oM zBzW+Pf!a*O__>F-Tx!Bi9Ns{hgbN(c!i#4E++U4qxc@?L`%vG8X3XuAp^u_+FZ!PycDlXcUudyjo{cn{k0}Xq7-0 zkE$r6b_(@N;{5FbHEjFLgp^Ea6PvvA_g4BhlluJcefKhMn4OW-zs7s}ylos$e=EjB zJ91d$+tiyM7nP%AiX3 zvUZ?IKJK<92Pl{zN9=5qZmRv8nN7$4-<#}{yBv`5wKKgKU~={1WkWf!RI9437lsjly)DK{Giqn9m0c)#VI3dIWk=8RD# zChM8+Ty~GhJm24w{AM}bN%6=)-raWGyE#;5X&I;6mXPE@IDu&IakK$#M#^>11$)28 znBh~XQa@Lf%`9~BJX120yex{y!Uu2tt)-8gjrZl&;R0klxMCXH0(jL(*}sqR4!1RW zZSo6um*}ng&(jtBIp4(?43ua*ZkX~4%)b;7)RT?-M&eHCc25*piw<4bVR4?zc2b*} zra)%fQy5;jZ3bu|+0>;~>ghLNBdPEj671g8hj(5mEcS)4F=8U_SW~n}IiYQaOZM=vE#2eP>#JR~-{n zHxzYSIw%PrWT}U~e04~Axtq()I$cD2&t|=90f|>YOq02y)L)c`-7GOwsM1$V)0+~( zbl14#lS6vY+*ridqeyB*5`)GfrAIbLI?)^$XGB2Qj-eYXv}%l&Raul%^sw2*ua}91 zqvP_>H+r%s8Y;LB-f0~Lf7zZ7efRp-Q`AlA(-q3&@N>)N|Hr;=I@woTXSc?0>2A=Y z_evSMQrs&l*>62ACtk#qy&ds}=O(96_{H;jtHBz?V=CkxkiKyomRwNY_8pPv0x9Kf zuMB@)2MzkNi3;aFdwGHC#G*3wjaDtv29xhp6{8rd##KQhH}=|-H=-YwGZlQ*!lz}{ z!|49?&^CZj@$pX%RQ7f8UVNQ6Vw|@I$x8(As~)94Qj~Lz`y$bo#_r{G6#S)b!)LI4 z^v?R|vO!0YL)Jk2_P+UKtm!$Z4f}vusrnLuG@3gvO4;2`^o$U+_K=H(&T=stpMq3} zRVb(}qKDe<_~8Hkc?~NlA%}2tHz!1~%v>}q3JJ9YPoo*g-EKB9AI)>)xbd>1wkn+I zfTWjIA;b&oVi{ie{Oyun<@m(TAPT3BwdM55O|O>(mDdX@nC_FOH&5UPxT02Th8VxR zer+%rQ&=ekTSS0NM6@w6($AtuCmS<&kD}%fqjZV${O)2!ZL}}f${_|J|EshL^&X*Grv3N+d5Ra$OyR62W9GbYbFBDD}CO6t!po5 zLmDE6@uhneM_%5jTof+HuhI}`bCqNWbeyiAG^mgpG8A}DL#b0#T{M!~q<4F|q3K3K zti^}5F`v5CQnd`h9D3gqP2y<3$KniZX4kGZ-W#d05I=La!tQ~ho5g+B%Wvg zMnn7ATRf^%DFa4fc4^An_JYyNopjT8wKMODxqb@?4pVOlZDZZhs927@Ot;%HQ#+Ml zrNtM{yHWcU=A0j*c!6wzkcgI`Rd7fGisx7*&-k{O6kx0!g;Bf@E_d(V=Z? z;B^6x4mgK)!5ls-e;o`lHWTbwJOPwxl!5z!*1^HgohiLRj`#N5VoopGMfslJd?2S( zOe{@6RrE{3p1qJh0Rb{h%NeUSidrdVa!WdOAyxxPJeP{+DUwdVn@&Gj`X<80U^*vP zg4ciu?r!f2C%=S^N3G^V4JKzW@2Ryvq|lOB5Q;H6JVj;C>G+zA&)@s$1#Q{f1-|{2 z#>}ph2nkjZRWaI7|EAJJ0z(p|III`dK8gi(@77F7zTLGA)0nmBUHQwO#hsSuhR21t zC;)c>;NS953sd=96&3KjtRSUhvsL&s)QF=Uk=hA?X@;kI-hb;vPcoS#vi6c1VRE=` z(bcU)71^7cO;+Rnk{eQGTM9}yTUa^Sh$OMoiatcITODm3yptT<+M&W>{b&Oc8=Wv( zY2Cw}B^U)?325%gur_sA*KS^od1gKyUp&ciHUhJIlu%4Fn;vU0i z7sf9eq^T)n79O(HzOx;-0!`}?ug$D1D}Z7%JV#)oq$`FLrz&M69T zpy%Y7r)Up85FIKuyRT0}T2y84<3=)^P}5`g^F7z@xeZEFXtwA(YgnRBSr*42V15hW zn?5Z!HzDA&Yd3l~-5CZ3j#n=8f1c&3`t3^88QD8)+9m$K^HsX|Zcf;Yt<$cqOBsu# zrQ8k-ShtZw+<3}Y@4C;cT|EcP;qNvO65G+heCw(?u5 zek%`80}t0mIqn%lfs$uv z`(%T8O@ayZ0nS)i2d(TGvCwJ{;Y|-D}0@d;GJMgN;9^F=7~1dXyb(WKC^J!$mhj4|vAQxdw|1 zcE$w!LrA6PS}9R}tt$l5@h_nK4G)3oua%vz91Df)kA{Nq)ZEIPu!?@7ypd$BFmrKW8w7uxnt>Dirfoypn z7s!3+YA9pt2M1AO#TM#gfc)*tI6jfVhy8J3v4|;gn*n8Fy$*Y+>>pNONq#4M)VZz# zoQp~<#`$0-Ji2^}*f&5_7x-_2r&f8D*nbaRylCrr;o|Fyc5w&i2cTI2kIb+-2M>Rd zU~<|t_7+c!?d9Z9qSs0_dQ<^t+l1vV`~_|6?%`TTwJ z^uW`$fB-0zHeB=ea0W!3W@6_K!;A|O`HI}2mMtedA6y}nBK&MGngU$rm`20)E~#E| z44W>i@rOKl@6GYJr<@pv18Ku_7Hcdj8nNt{687A4V@@W+%f|NXfvUa*P?aBK%yC7i zDpn^T4P>49)Am#4A!NWp!1YRh?-sW^k5Ua?y0nZT&PfZwhv$nHidcD5Kg>s~H~$^O zH{cnQLV#MG)2=R%R_N0mfq+OX;K=&7^^!(pE z@ZDme6zYKc!{gDxXv(}I)La@sct& z0|_Og@Lu);PgCfl4nR&dT%?DdBIjFMD*)>BPB>G;sQhpU(r*COM{jqzAMj;dj7#87 z0A6z}@s87ejsl6O4qN2MZo=S4@UkP86LDC(>ID%qx45~Z@TkN}js10zwdl*0qaL98 zhbh4H#Wr%Rd|G+WXiD;Y;9~nju38gTL873DzAkastF_Vjx30xv%mQJ^5%^gLZL)4} z^H=g%*so(2&VT0(XVM5{+rQ*3_M-{DO)_5{2ZOK|0v9tg>F3mCBg5)j2^EIePmzQsh-A6KD{=dD%?^+}^XCN zAa>RuJq(M8=${_^yP>=9PC4~`c%fXfL^Z*=GMtgh;1glc0LPbU-(%I%d*g6;{Rjsh z4?1DU@>gFAl|Bl0;ob?Cppg+(6v#aMe6nU)wN^elvh!Y1fJDD>a9ic9UtqdOkdwsw zjvV3bKHNqVo3QaYM|EOB1tF@8*K0e(iXu_u3-A!}p-VbjkfH~F%0pG8cmkB^Y=mZzLKf_&aV$|1%E#nZDBiv1Y z!{!jhIPC{^Pi`D+dceFWkdg*ZjlLn;!?+(lxOnJx)^eS-uK-Ae^f$oOwMzBQ!ZS&Mn-So~Tl_~jKBVqN z5s{H%gZUazzM+Kyu9jN4Sp!GM%Q$j=dseWOfp4tI;{vMc?#SQjQLotV)J`bS=v?+4 zuDn6mt;{m*r2-eH@)2-RBdj&hF|eO~GTBm|R`z(4r6-|WY+(|J-3V*cB=cl8*>noN9&}V|0WnBcUv){0Y269MY4Pnn6j$A31}t=niOUy}zke}y!39=KQV&V?&30;?;H z^BD;V2||*KVbi|~O%CcUB1~lUlt2!r%EsSOD7x&B`1tg~YxkKWCONk~*QVY+TNCE0 zXD!tq@>;^A?{Vx2{I0TVBdd2^A3$Xc7>&0^D1%Q z>QLku@T^EuFD<9iD^9hqK5=SzmCNN&9AweIbe|(Cv2{+z!M;+>{rtEOmL%T`dtG5M zj+D;V@IuDv;1mTef$yrT`JhT$RK$T^1~EZi>`v2v2bEv+07&yPtd$E-*5HZ!hOoHM z#Kgp&PKyc~h_6C;mvF15R?1ZlWs6-C;W8g-g7mlTS$kDg5=7h!)ln&IEx_OZ4e1lg{tiq6=9*F-PtxF)Cea9M3GV! zHR>!AZbxfkZ^kCTvd0llV9-MUbjTO3D1a0W9-3okkUhlRbV;!7c_MJQn1(P7QIo;{ z(fwpa3&{?YF5l=*6E87++w}Qwk_Zs56tuK(sKpQuZ{(yc_f^W(*{c=4-&m1|YtVil zt%-wwE}Pxyn40fCrfY%PyqqiN_sJ7FMq%g7wyr_NOqn^DzgukoVAA=V)UVZP@V%HO zt1e>~8-4oH^4BIk+PzZs;+nuOOT(rw6EnK2ENMX`@~iVz>8F2KWmAgPGaxpQVZqe} zXUs?#OxBiO@)Q?Y=aKGIf#4W8&v)U1C7j#3~$8Nq)O0Y zj0fy_iVX%|gAER18V{bkD2{L*?~%5biGh$DIlz&y4iq@L`Hfuv`kfD5$LTNDK239} zbj9A~a4Y;FN5ZK}5&Gv+acQ^m5WWd<5q*wfedpGG=Jl(8NE5EHf^PcsctN;=;eEhk zDziT(6r}Zml!^FPTs^rCwI5@rlo;l6bn2$d>2uAjI}Cn1NcbVhR0k77MIv z@yiG>J&nN*M8fbjFzo_^1jNLl1A7kQ6BFXyuKxnfrO}?`}tW6zbX^hfwi zzuG|Yj<9U8{#+{Uk$si=m{lt|V#Wz3WlH^-;iDSw<>sF>J4(?)it?TG)AX+T#&`_-RNCLR&!eBK#0%N% z_)MuYw`zQ(5p|HAlRphy#AIOF`v+4Q zBs>>#SwuBKdr;)8kw5U;QQ%0h9JbtvTS_6+JcbRLkXFxT-Gk0dH)vu5bT~9a(s<)# z7TiMW0EcYSEmGPwtr*1_Tg0faTnrBj>zx*gb(XQS+kHb?CgsB*o_ z>$pQp^XR{|DoyKka^OC}iC6ll5IXzBqT|@Jp$?a?%MS}No65Vae54n0XZ(i`418}& zySPz+=`3QYi1d7RLG{^^N*$1EfMptXBqcgk5CCX^oMhOkpj2>AWnxc-9z<&{*MjPZ)F5``$QFT zaG9;LGG@PbbO>sbHnJ+bWh)pAfQJywev6;KrlX9LhMj%;{m!WT>QV9o(+pATnz)LL zZz)7}uxZ1NfN41`_C_6guoxbzJK(?l>J=d1I5ux}fAi zkfi`%ccG1j0d|M`B$+Sp={)JN?Os37V|GHI(7l<%@2@Kj`H{3OCB59Y9lBx1h&<oHw7IC-%>^n3SWpd*vY zi=Tuv%9ITpHL)siH3`~-sok8ao)?Dn9T{}=+aLwS%NIcQ3BDJ=sDO&kJ_Dg zh>zIqqGb<41hz21C8Lcr1L@LWYBlYV#uRG}@;58Ev|cEDy=?$i#`H`~0$ICI>`ZOf z$B1jjWjILCw87a0`MfLTy8x)`JA)L$hdNxXQ2W8w0vLR-fDxHNQjPUoyQgLdazgQC zR1zCDbfURb?ny5*zIgC4MV_?d*$$SWVxGn~zJ;HC+i5xjosJQflkTCf=)Y-k-t#eq`Mo9cw~(t!tuS{#L| z75E3TJY+&yWe5!;RZm5+#M_uOqbN_8xX%(*FX_|Wh*XCr8O+4cW?9GX0njh8gq=!xERgoc`h zX~oNYp1Jm<%Y5AFAJ@(|ICIWMxZD$rcWK-^+uLvtbvvys|PnHd$n`ac`WR zjSYs2Zv%yHq( zr@_dPWkY)eV;tuPOK54N5(N&D2k#)Qcd$-?|y<7^}kX7`J!tYC*!{9@?_W> zbtYXoHG+@j0wU0yy_yT1w7Fh2PD_hgwR#6P!u`QfC(W1y%m0s~4J5F)t)bP{`l_8jag zk?RRDw12{X8gNL+g+Y_^f$BqAH@yRNW)9vyaG3(J2yaKC_g`*~c*vFr?}oWkNV@){ z$7jsrIln3f2YC^?#&!%XlJv+cF7K__tjxO%BOR%KRL7hdEFIm$!$ym%ZFzSEAFdE| zL_#i(IVOQ#jk!n5Pq|nQuTmvqT%D1}ch`6)!CGW)^D#`G^D}uXOUQFy#(3C2c4(wIC28$lPM%KU7p!Mgv~My}}1}GqY6-i8JEDhw|2w z!em5(wZ~iATWzeeJv2M zl8y+IO}{-n9Gw}#8{p9U1HYJrxr2^L^kum}CvI02>F>I&`k`$|o<*IH>J;}fNp|BW z`qZmw7)Q{;!E=AcHF{1kiqF(yE1FH3%l+ia_lZ-|{yF}*TCoDnI!7=PgxT)4>P>&* zO&*%31V#3z>Zvssg{dsAX9eJHwO;UQjumq{l^av3>n?4o%RvbIGuM{-7)8IHk9}?j zTLDb|y{ilTy4IzrsHlpn&qf@v$6F+T{qFOtQYLU&jD7p+b#1ubl^?DOxw%Y;OXd~2 z`1fr7av0r0V8q=qd3Nf6Y`)o@LQ-yvS%7P3K)(p);o%D5;o(1j{(LX6fvBnp0yz1@ zQ7fTcrDtZIgRRM&VSKR=#vh2s8S<*OUz4=3U{T760Hgy>FNOTqx1g7~3}j6%O7K~< zo`hRZ6TBXQWDI=E)AI9!q2&$dTwnp`YJeN5dXFRbA)U^0-lkM6xeBa1 z_2FDgl7>>OLGdC4HiDCeX@<8}Z(5+4Fe?99)d#a zSZaSm!1X*aJV$Q=trbw5>%&h|x9Q+q9g8`%J%GUyc4bCFh`*fY$;Jg zDV`vTU9NiRBMM3b*<1OUG6my|lZ|MGJM%tg7~Iv~`6~z>3Z#bg{{4`faok1Lds55x zLG|p?j|+t)e-b`s@X9cDEItZN6uuF_StRs->HTZC-z)zOVz9%o@!EqZg(-X*Lyr{> zk@&SpH49@qcSIPxwE#X^NVwcQiP|ze<5^^O505Lgv~=ga*1hX~uhz;2Go)@0-uOJ( zg+PP|DFgbYwkLE6w%<(gAFfV`PSP}D*3U4ZKzfq6Bn2ESTB)i`$<3VyZc(uD#=_wuRv-Gjot1J{9eeP&w)aLYDI-l|GTOlSxgE?W6!z4Z2ms&_ zgVThDhK51w!wV%P>j4$R`w*N0*KG&ooNl9?ghcA9=yigP5s?M?}bLXapI@!d_KWR{jRy_hN4* z4p&^>uq!_qHWF%<;4smKc;MGLal)Z6v}O-eC?Y{84iKqJ^De+g`fCQGbe4B=f|QEs zpNf)2A9b(va!Y64QNFN=(P!!TJJA^zi%7qcNZpc#Y@4monQMjIGYr%)=@KPs2d6(} z;<=Jhjx-@1N9f3`lV=7KoR&Xa&@iP?x_Y>*A_$bReD%wcDd(7PTC~{o6ul2)QsCE6 z9`)BKiv$VlqSlLocPO1zeJY#(G44vkux(mjLF2v&Uqgy933AlJmahc^z?TLFTs0xc z06pn=mQG~ayVIb=3I>3bss3tRGn>Emk8o7o;Wi+VCbEvK&?*>8G2Fd-@p{8Jd^dmk z3*4GvL0`bQ?1=#4xynUv0|Ns+l|pb+?fC4Ln`pp$aVQ@uf7DjfLBq5yA9B$JV?2Sy zbCm5{NrnMdFPD^_S|gVnEl0SgyJ7pY2+@czJR$VRTTFHdUtTO_;`Z+6o+%YgsVUW+ zVBQnhmz5#w`P9t`ycJj?cNiJ30qtq=&zncUX6-n88#Kw(#4(Km(&LMEc!7?00!w(= zzCk0{J4%E1$CyL|{bjGAt&nkIq}DkH7{h9H3CxzjUao2}H2|4g0bLC85?CF7Ytzam z{(zt1-#TNwB>=1uD5-zGYn^_DP41^dBr=_`2K@k9b91xL=5vnuT_<9=2qS)EKceqL z7Dg(8ZJxVl;}&hu;vH;dyk*M$o{jEaC)!#(y2 zm3!P)(Yn24Jiz>HE9&zjhGw95z&c=P4Oc)?Bw(hTHLm#`$L%*NvWnqg56RDDIn_{5 zu>P694P(JxIJ(D#mOh5n+t_LYWQ3seylbVsAC8bl3gM@gnpDdn&jFldiqZw-I(MXd zRTPS-i>N5x9IpB8&e$(`CxlV4jM@2LR(F|CXi{A$1R8JAtM@@AnmXOi_wa&NuWTay z6)e?}TFf{9hobApz>#(v)m<{v0c`XeQkPbTOTsQdUDBItk7%=(WKhaejVLY$SC6(mq@6&fa-?25z`oL~ z!#?1Y>U|0Q8^aNTaHHnuA+P}=sb&LH8(Uap`FJB+os(>n|a-8Dz?IZ7dVFAOCg~n{>I`?(L_iN8Bb5b6neb zb~1TZ#t_bUZ}?j~FZ-u^_fT&U-2Xi&=If2$t7H-z48ZL~El@^fP>KV_Kog8V{CHhS z>&4$S2RHI(^yq$^!3G_%%`lJn0aMC{(7lU=F)BQ#8}M=hX;a&hlP{d%MD#C{q3GAAMAm#Pf2Rq3Z%kVfkLJHu>NCyZD2%+ zp-#Gs0r=xMo?{*ACuVX_=5a*zLM=J%s3eg4h($yCW%!aW}i*+XI5Mo3^V z7x06*k_SYRuo3@UV}?Tt$$2yGp@xH$ucxs_?NvQ~{P+eTVF>^^D5>4_0CngBX#u_H z;rV`hB#zHL!!%Uof%8=8qtFNC5Ug~2>E~7W;eJ>_VVpRvXk>p#bo=70BGbvcE_N9| zf`3Vjb-)m_d-d9+bBr>_$ZQ#M6GDOlE@Kk?oaReQxn;WG-jLCJ?LHQ2uiT|&s&^#1 zZRyx%!BT8j#eePCiuH2DTb!GVxf4{UpG5MXZ@q_P3BlQ=*~!TsZ{NP%W0~bq$MXSb7&gWW4nRvp`{K20_YLX+t|Ci=U-D8qu3&y5Kqc2xt<2LaKPY;R8 zHtFZe^IakjQ16;QZc{u~d%?DSuR>n&`S72IV6B1b2x6ER4r7IY6%J4yupIh>B^ous z6}?!0ZP)PRd$E`^Ok9M-2aQ(xdyjf;&14ws#OHg#s)ZouKLUiW=uFntxh4u=$pBWI zv5G_>1RM@koWNg4)uYZoR_e_EU>-W7i3wsimMcWF2s~yu2s4Q(Qtj@W+`;?mM52`; zs~chS$U0jx?y+IduZLm@!$A!pXW89$5UqramytRQ{mEm_yS*=0+Vyi2`Qzk71tlN+ zgE#SY;w_41I>-a*GKH%xnss}Zcm+Zz8n+9Fnez_E|LyljHc#js!eM$TDTH=ayJYxV zSC^$cCAdz>4CiNM4aq(Qswyfl3?H}G!#r}f3o#3;s;X);2d6t2$MvN;{Vmbpw>PLX zsgz+J-!c+gNAXkf>cyMC7}4~5WNoa=C0ms&74R`oNf>*?*e7lHbjBkD%+(9XSctm4 z%qqw|kPh(paPafxFD7MM+ev5f$S=UIFUt};S0{inpuXKY{Ox;T$wC%?ooEyzq>NEc z)ZT_NW*Z>0-@v;=5)(otZ4vNqXZp0t+w22Si+w(y0plLP?VyPibB^k_ICgVyFaJJg zvUzdNqT-1EXlxXjGjR^8mrTwe92mv&8UW_z;y+TNa#Su>mjpx;NcqZ2n5A*~!! zR!mS+qWrWItSU%jzWn)!bJbGKS1+Mfys5i|HE_0(KO^SP*Xop%3jyhNX^$?x`Ql)4 z&5CIyYa^sxj)v0mC|bY&y8_Omlg0WqT%2Ak(#aPwFON9yqTUXFYi){GGPWRRv4TmP zK*eoZ5%N7wTP7Nk~R7*)}rs#V6c$sxGoh8zQNVIF-7S3x&&OIV%^+|T&YiG*T-NY>a;Tk7?x^pY8 zzT>4L{$L!W7-$wmA&f}q)eQ!TanOwERb;d7Y^qEIdg2YpI}38_SMuAdU~+Oi~_EPIb(nXNozXndzyFXx1@5I4NDZ-lD0pS_O_~KCU-s*VG0)DkZz3B z`31Ykjxrt{=ilZ{E#p(Cclgd+@w-n^md3NP4wXl`7yHk8*qvLG%tH)kc}s6jhmWr)cfXUm;CdeywtR^gdztG%^c+@RFo2IeKEP{#m_Q?{xND zx2WY>X-vkG$B&yG@*mU*XBaC~8_G9yxJ*ly0$;P=> z6s=K->{nC-7RBO=3nmR4NZr%&%QM+^LfGl_vM z75ve$uD6d-I6gw+K^_!aJuLk5Mm93_aUMO1F|pi2E5{Z8T*he{Q*K3~<|@r(LV+bR zt`r>5xWB93bB82-_raZ_rpW*k(+f$jyYw^boOPYY2VIWl70on11JVf2wN}7mwE=Ld z{Lh2P*q$|DWrt;(tFdA|P8NEYb-Uc~3Mz+sjG#mpWpMv`!&EtM(x6;8Rgi)b~W$j-_7KV6Knki@|(|df$A! zy6kbv*g)PV?iKOTjYaJt!QH_L-6A4rQOL8AOKmJ2MF{uEV4r(0;HW-%^?6dGJV9lg z(wB@3$d(;`-kV)e7xZ*#lr)z1Eq*n<@kpu!x&@N&J;CC}1SIVhg;s}?%E!%_(wg5b zDzW2zmyfQoDaiPy$-PdF zUy92p`KbG3##8b^p}z%If4omz;;e!ase#6=rgaRnJR2e{r%r0^CsLO9s;BoYg*mI* zv?+l&%kpc=hcW}qDw(tY53|{r=#s1a+g-dLbzgaE*y94f;_(PRRyN}k^q%Vm(+@5m zDZ2lnb9wk4(IcqBEj$ls^a0NrnipbcXAcuTa;fLqcc3N_a^V56%*xsslG72`w}a0C zPAV`|MAqa z7Z0h(YEKuON$j}6+7r3)gEjZhRKA&RdeZvDuET)sKmShS^#7e&~4J$y}hx&N*S9~W^Q*kIP)mV`46V&=xEo5!8{U0vwh7 z6tY!xx)I-mVnM6vCVP$J-xB4P^n?M;kXVTh{cfZ-49c4D9y2rp3kghu8IJTr4R2#J zCoMIm@V+t%7IjJh{Jyq+ohz(;K)(gvu1us{*ROR$Qfic@uO=pr(_tHuqBh6(On_w z0X@Bossm7G5eo~8tul*|Qa@l>u){}2O+13}NxE5*!^CT!k1c6$dY76EO43~#hQV72 zpwuU_vaK_l+GVOCi@yh1F)=6P37R!m9G1__ z&&y~J$yOr8qrx&W?|&7Ve!oM>uKQy`D2_PJ5H}D#&L~CV8uu$PQB*ooC0#3Ynz9QQ zy9Lk8)i%h!s&`RejKpNR+xfcHZvMvuG@jf~I->{)2~L)Sq_YFc^~#xG0Ja=ZmeZ5~m{igh z&UKmySwU3QR6N6&1g^_wuY`J~M^>L;9wrH>^sH?VX3XWetK;E_s|$!xNN+;}ZcA$-%{4i8@E^1&)WKmPKl$ zA#N~L%LLkPvnlB&-akIgnwyQh5?TU9qNw&%Fy9xf@7!g=_KQt(x&%_3Kt@|-q1<_g1XA{(Rj18X2 zxN0z?z|@8VE-a>Dt`)NzVxrPFbLL$nm#X`lQX%Gn#6Ppt%84&bp66l>d%%y2geR?b zcQQ+)67HS1z&wXAudo32hwXBoUvV+pEi2DI0Mb@u7r-nR*x#`(J+?6LUE^|{0|U<| z^fydO&tO6;Jq&5MKjL{y+Jul{0F+I)0tkruj*JT66*J}lOrb)%6(wZoqxkIECF+cF zCzu2tKsIU{VCRa5XS)UN=bBeysi~Dj8k1%g^;g&lGv8=cVdu@%Y}6Dl8D~Q#dmqDYX691af!oB`P*QDaC2nPilP$&B)m|!LWB8#UN-ycehJ8E z%rwgZ8kGSTGoJT{#KVb^qGAma@re&>d5sYh!DHQmz=@mZJB_RiP8#%Ah_Grw_>9;C z0oRa-c=k@7wC0mBm+g*RwF+zrf!p;PS%r;Dpe?-*N&{Z)nyoEs8OX4t4FUVz=YQRc zUY{ZnF-RV;q}(%YgRge`Z?^+#HabMsVcCihO%z2eo>r1!V5INs^y=3K+~UsEZ_>-^A2 zw!v#=w{u&$C}y{JeIf3(P3$To88*B7sSAwo#|-+b)tE?h8owTxDf1*r&_9Gn_>B^E z@7-IV{{|kO?Hvq3-sr+>jNOSGT zWh8go)YS9<1TrnqhzDmF)2A=?eZq(1#J5T#VU;ngVC-#7Qf_D}QeUyk4cqr! z{Kg?2W{2x#gf1ELsADKU2|o#yQ<}=6g*5_Iqc^T!w#oPRR(q7n4dt>)E{*X7o_iK) zXY(}g((ZHhdu@8{2THPYFkTzSSDrqkXb|z(B6goDaX1aqA(Gy6K$Z!d$lajRn6Fot zYSi8?Zq7Z>#ZI{_{N@lN#ap@EP&=Uc4YxaeVs1T8os3h+kId08X0}ha-3b3x_U>-* zabf}WSSzJ_JUk8x>0V_1WIwErFt`wLcOV(>b&0XxCb2CbeN+L?A}N>IFC?k~Bvw?o z7}SgzG*=A zBOI$dIh;RiHAMcBk9~1^sb^OInSjj86Q^*t;i<_7=9Q89&+R2lU4m*$1>Aluw^)%b z=)GGWlBdkLi>tF?*ucem`FGX6*>bqgq1VRd(&sb(t{0QDWx$+nM!4<-g3$<{d7MPa zRp^@5bIu1^?~?ENv+dsiI&&>ADi8|3PD!Q>`nvf0%<2|BIgc1YD8{x~mN$U+uF7RO z@dNUq-;=FD@}tXqS5i`s3_~-ZVZBC4Fw)qjcs(0>B6H<+B{YWzg<9Jnke`LuOW^h) zb5mr_YQ!N4)^>29XoEI}C!w=ZbFg6#eh&w4p5ZNAp5x}fhq7;)$7LIew_6&$;$m<8 z>TWW2=khH^K2pwHA_5OPSNdDI)AWLPa?QTFFZgz+?rzzV%H&YJmf2Egm!YP(W73j! z#nf-*1K-R;Zz|7{|3}wb09E0A(ZV1|Ns1zkG)Q-Y3W7)r(jg7f-JsIaAq`hVxz&OJ_xzCQZ*4`IovEO)n7><#-FGhRd&2o&==o&@8;#eo+ zUqjM>_DE%S9WwsDsIWsxQ9+>@%Ey#(-^aGCIeVotT*{CWBM=csG>Lo;njodbq*aVs zNH%>R4hX!QAJZUqM(s%jis@s7V#jbM|NMyz+YBH^3A8U#Kp6mP*5Ez;3Fd3kJ;%*< zu+Ut5tr(*NR+C(ULACyH7suTTs335GQ*OS)iXIEVKWv85gB!YczDd81{jn#gp!V4sgv6jSngGDC zLB{|Z2k2x&=udAQS3mUpleHgLaFGuN-(KEikl214h5Jf{{{hGQ0omt)`mD!NKk>c3 z`%4%P=`J8n#YrR$@Q%6m0- zEcM^^`R*^?t{z6zg4UX2dNkmf{g1Y1PFV}}H!`}b|#N)1tbif+kY^H;GyAuR%iKnOmqS&lsu-ov)AsF_4iU0UNy!?dt(gWzc0Z$5e&Izi`F)c5Z# zXRDAt@I5<|`tMxWCp|z?*ulq(R^E|P*%XmXjA6|zh>4*gQzg?Xrg>>h^F$H*C$9** z-%suJP=oDCTKSF!dl=>Zzn*ZuE&i+7H!1Xw(~3x|Pv~P~;=C=?srO4ZL{O^jx4E63 z*Bl7YE(0qL9Xx=Fv;iYk9|9Vb7~~Te;v-N>JS$x3>f0OT#02E$X+ztjmURjN3!3ds z!6mz{Uw5iAs(S(Veu`g;mM;b%*xcUdg@Qs}QU*Ar&v_}#EIM*zaRCTKI&raJH!?Mn z6GX|xCIxCz8{rQ%JQvCEjb32wu60s0NlIE6_p@*I!>|Y!Y8w*CNqoXn+j5Ipyb}CS zf|ldDj9$bqou+;$9zlr#z`&qj@Uet*=U4{BlO@^E(%#T_C9DiIY~)msj5TkGV8gW7 zepwX2y2f=Y3v;n1cfY3aZ*O^8(BBvqY2n|=88u7&VfXgN<#5)qRX5F-vsSDhs>heZ z#rZh8|LL0ujiC{y(ww5~laR$=I-O3G&gWS><=6 z(Z2%4pcUXk)`J)NloE7hQ3m={d?UG(vIL|{Az?-JM4>BN??fA{eyy$!I!KZxUI1t6 zaqGW9*$_QVo&s?Q6aWW6Lx65He{!+s$PeHy13*EeH4smDw$^R3O7dpcY zj1@diX^m^|Tc~1vA@zmLD11Eh6tAAxHxOpXsh*;L#`u#_9NEt{t2inI|Bo_-)YWt) zPub`;d=<>6+c%^0^(Uixq(+{gcmwAP8!A$KynzHTI|s{z@1ht6AQBpN0gBrTfWt>L)dF}V@E|wIc$~`v zAfq$xi+{E4AlG?ZEcrJ7VC@&5aut8Z`TK}V4i>aN&)1yy;}GQP%J<{8BU3CfQAqTc zqJ6=@t0}*QAD>1!k}9wG$?RdZu6syJ-&L4JZQQ7y03d%t@hQ+s92EmYu1Y>B4tnu+ zHj_%7CG2$^46GLD8s%ffDaN~Y);8a>@oER_;S-FWhEK(t>0*bWL6lKD=1^!Jj!xm^uG9yDoONM1&}0ab#&eqEZ(;z5weK# zmxmvts<>&I7mC>e=AjuYU3gmM4QT5FIx2u-*VU5L+BT;7iM>2mwo4hZ_iHGZ48Zw( zp*Rn2`|0QRXu{?%UOEV0-nuH$7&iRDmX}dY30;MY51W`7V((-fV4)9vn*VC_DUPIV zs$a79C(L2?O_Z!0{N5PR?`umX>+dv`E53zdSri5Dxb3(=#eo1CJ4?k5N^x+eq!WU? zIXZp^FiogXDNZ|rnW-;;H!a8iI~0(0tSZfN&+hfI09EI;FUm4#Nu-k|13{+z<|C?=UsG4~A5+oSj;Qi12e;@31mFjAXwWXB*jaVaUg}UJYS1>ui7E^hyg)L+ z-#CrrDNoP}a4yY7+lf=i<%uY8NaSMl-r3cxkRNTb- zNJPtCCH@6Zk5Xlg$}Kz$`te4!Ak$t0beO7v{61~U3`kS831bK}pVXezz$FO*2#tB& zl|2rvW7{E3GqZQF_*PO_gJz!_C@XkuOZmdh@+uZkM)I}#h#aQ+ zb6zt3&0dW^N#EO>z3H4d>&l?R;6zy>%yZp$NqRGDHM_dhCC0kGsdF!*1rUpv8<^zF zI82(|V4?fj(WC~sR}*ZYNN_dK)e`g*14Z=k!H>|SBD5=$PP3niL-K+QQz zRdfYQzmi1z9J>%d1|Pl#)9S?oMLs!!LSi*ZnJIgoKpO4oSAW8NIPmL1yT@UlU?13K zS!LMxmZjCi%H4bfl2HuYh9eJm6ear=*o^=m;T*MHN|TkIl;jt0DV~@NwXU>3Px%P{ zu;!d~&P#g9ey{ZE8TtLQ{!;~-_10-Y`MiWwhDhxAL+VNZ^t}aqe*vDSD?qt};-7db zdxrv*mg_aj2YIa@8Ki&o2t49M$5Or|A#JRi`4`aTsmU32Ko~>=@s_pIa%tvIT`@*6 zHUl+izs3bOjy>(7mG0eTHkzXWb!~$fDNP$La$?=Vg?yExGr~_@>&S@szE-y;ritb+ zPt5h=H=4g;Gn*W1_rrWmNC=hasjvYI4AA=s0fTrd?b-(w?=?ukLj_ji(ozravr?E< zTQsxvmp4@;qXR*|@T$^3Pq7)MYJd4A`N*m&Sgi%-O(KH;i$-c1)+l{PDS-(=OsOpg zC5Ir4;Fw}t?6mdY1v^-r`j>@tvV&-UB;>`fs|;jdhfoB;ro)Ba9LscR8AO; zviTk&5zRg3^Pr8_|35(wq<&(ssD>p}ouISD&!0a%2Mp7t6G6^g?A^`%QHKREDA((4 zE#zwl{5)Cy3N5&yyg539;jysnWO|fzMW0YkV zR)-{NlB1wO-ZAz+cD@%kKd%V>Fo?I7`apUXq`=S5&zJf@<(H4f?cS{O)=Cl7X|K)R zcoqLLg%D7ovHL08*zYcnv|5h-5FL3E>$B%xL z!%1(|QvL1Sj9NufrEEE<2391f^Zww9Amp_E^t$8fQ$FMgif}gEb7ieIfa+GRvt;>F zNs~72n8@Q>^*N_E<*$%Ej6g_N)AlB2*0^h%O&P{AY=1dY02@uf-2Oc zfg%ZEJrv6b%J_b5i_lKkP-!A)Xv(l{2n4||fd-ezAp530^r>nm%~1SZ1|SGjH*S|g zE+<5*N*`7|F739jqSiR{sq6+^cDzDik{{*$%u_DT%Ql%F9{w-_vC}A&QzyvysGuM# zw?>K?{%6pIDME7M@OtZH=HNp2^+(o}c)bS{^H;9?`yW?kZ2Hbz>uz?hRuT_*3Y7y( zOh#*ApZfdx`L$PM(%cSPi9@%i#UwD z{@j%G+*inKx@SUocsfrvP`mdJmO7jJgNk`>*eJ(2-OogD&S4PoIU?_{Xmqp~Wbr}Y`wl3= zB#d0FVB|f}*I@z_H?|35YUuRHR?Av-5tXk7Eig@Ig&l6H`_|OD|Il7BZy{!6t|3j3 z;R=S*e65J8_j2r&*3od}RT)=#Vy@QE?zv?;U7S70Uvu#2)Hl-xce3t*E-C`aqB7F6 zaHIugR5O*c-`J_s9V@c^%v4n95zkDhzmd{S$!VOP_VVZw?!OadF&ZFj?aTgAPJhDT zo_;?~Q(lYL6%h_ic)A56)~CDL<~2u(;CrO*qFP|apcS_XC}2V*RHdEFbWxL56W#l7 zXJOroV7A6vfT|Kn3>sA`?}>=peG()bFVIHi zae5ajgf)fw$G|7|2!&4**JD$-XU7{D(bN zUv+g7xXONh2@qC)ZW9kTItCd5@juJOHI#n%`>jiZbn9W#4rtXAQ)DA6OnMtpiI>OE zNJyCu1o9Hjq+!DCHrj(;@`oV^b$snl*?pa=jNSO47H#ok1AJSw&7LU|Q$hJFRO)s@ zj7|Ev2k60OO|pSjkJf}%$fy_+IzP)%atAkZ-(BT{X>gS_05juUDfw=-IMcyyo!NHVQG0b{op5#Gs(&0f1O@sRkb1yYt<%?49nk@yHn?3t@c901r~6mQ3NAwl zVgXKqq=cB~y9nVJ=C-O2I#w_w(wmXRW6$5P`@!M_!X2ODZhS$;hBqebCk1t2h>4IO z@~E9=ii6nQR%%+TN#IViU1ueC?hIM-B$Cao5yf}eXHoFQ2j8+x1qsnzYu6?@FGZlMyCHF zg?+k3WJb$}+^{!o-74a@bVwCGj6_v0fml9{vV1oR#wTDDclUMwMA>Bg;*=z_dWX2$ zar0Ld>=y`?07Y~CjFVSlp7v;RJ9Fu(R%3C<apieR^)a3hu24i#CmyHWY^~}?E@w}Y0p6V}OhL2HcZ64IV`}wKA zJ!Fu%9Z9dtmhTIdn}{0#$r%-opQS%FoN*9yG2oVAe$2o znzm~|WtW#tw9c7=`RfR04>EK4k(8$_BW2m2G3k;Pz^{F_gb$qk7VS?Y)Rjjb=u$ym zSMo7|K0y|C-UwG{`sG}DeW~1l1!kLletL;J!owP8NKcvg-KxyIykDQCYW+du(wn_! zgifch{5!vx8QMe1-h9%FoNw{=l3bm(5~q%r0-Q~yh^5iln+F^m9MgF$K>Y@)+n!gs z{z7$QgM0tdnh^0PkG5-H3j)F-E}0x6)7E0@qQ=@df_^GqJ=U^ zy!0Pg^yQ~-P_4)l%@JGm)wWHurckbT7bk)LAzyY79Tl}7WZ`N~8rcE^3j-rA@=C!o z#C3_Bx>n}N;d+l>tj*ZffX$P`ZDC$4>Ro|AaqLqBrfDw^enNH)-VZ)j1++Vy6QEHVSy%{J<>%xCX6IFY z!*HF01Nw?{L3HnL=jPmOVx~X2Kq7lE#adnOF`Nh!Fa9{dV3&HCMNkN1^%vXJ@td}A z{F{(NvJS+&^52tZ!pWGB^P%)pP+^y}va*W4NLlG#+zrlcR*UOkU`)S4p;DIHwlM@?=aN?>B*!f5Vfi-`v?OYjj@2 z*E}W+(CB=`{44VpQ9TO!iUiPnX5f4zDcg;^ByHzqbp3cFCQ@J zzRg$9Dd~>q3IU1f(!p|2y-R|6k)U9=Z!v=PO(e$gY`FSx^#aCMfnR*?>?2_gvI8n_ zQV(84kktWmu+7i-R4Y`@g0cDqL)C&WG89bl2|pjhP%&1=$#PFcl)bwJ z{Fk?XM3ZhU4!?BjNMpS$r=_f6FB|6)^%d1Cc~UR2r>qQdgT`g5-5rK55R+Km8w5rY zb>{__uPiZwUqT(U7EV`?Vdn~os-%iD$%1{hTaQ5G5$H+kh5e$L z8|&-yx#e&+T9VN%ZuZ`j_wbsifuwZ~B!OS{LGOAJ!FBo?ll$#rJ}r@Qm6r%fb+sWusd{@feH9$$(2^2nZ+5mn6w!nHo!fFo|DZ9rR1L_+M@E7I)U zynJ4-DiLbjz&+OmjM-OyYirw4pl$WI(Wmpt*8@Fi#w?33$e203xh+Cl5Vr$~jDEaLg31H9G z#G46S5ThYFU{zm+Y@2@Wp+Kr;A6{l`Y5EqsY+yG*R7U5*T2jNP)3M{xpc~>Eru`-a zAh4%>H2}~2L?#o*gyDiK$8cDSBsI6TWRV1$m-Kc21J(|lKxA!}&jE|vxTvih<=VcI zX^T^Qy1@=5sPza|IXxdOk;MYqDcqv-RyfTM0neChKjYrz`v>oID#fX8Ft3e^uF6Wv{&?^ z%WeUg&inZk)l1+kR#UiBz2QfZ!nTQ3kbPO|rjIyPxcmG?*KFg;Dr6<8uO>p03a;Kz z=t2b`@Ezar%~uX8xCK7DIeiwqnbmyaDSa|!N$k!Wglq^WsF@X8Z$bT}hx7?D5ynn% z-B*}KtY<=t^QnCHS3L8s%;@I7Lg(3566D$-pq z;m&nt?Vuot76dx;eG%{UDyJM0FjGC#7JR;C%v426cD8Y|)Hj`*y+rO$z#I$y*e^k7 zk-k%=P58*mN)jE$Tc6ERy{rrWep2($Nday^;_0{*A<&Mrf7e&ziay;%7KQ}9OV`_M zK%t<|ZQI6mKLP?j2l5|;0?v^@H8;^<`1Vg&mpY=MZC0(e5o%V2M?$vCaUJ2;;5m1V zUa&W}t+0`Y%Pcu&cmDP-Dsd$3k!IRH+b)CR(aLO2)&_k+UPV5yMYXG;C*E<5Ddf-l z$*C=;VIFVKSl3~Wz#2!IzAJ5v+#_M*>`4jsGB4X|oY??*W!cyEuOb!P+VaX}ODVh1 zjEqBcQAMoZ6?`ErGotBFD8jz^BYSt>7$sLlXp}yDuv=KYzPgBrh_HFrm#bX3(A?LG z3w^YK!2M0%>dbLz*&Yzo-EV$2132llkyZ6TCm;sIfesbK0WF$0I?%tiWKmQr(-a{w zW^*&mUNT(cx_4S(g~!TBNJyZq0C{~r-oBTP%4cmjFy+9B!F0_MB6O@=W+O(1C3q0q zx2tt?r1j*5F{H9+vO`Y2nudK}Tnw`*Mk@1n-C>1syyUVq4Ih2$`h1rC5O?pqswyQ#JvNZM*c5??94(Kh?0Ss|$etF~BQ;tIO?t_!n=KaWxIdZxsN0 z#8rt3ecR9Y2Egwp&xDrRw~zE5U?3I5KKT;eh!C~T`b>0%rLTihkMBWtRy)!s1dX=J zbdtsGzUrra2bGBz;N@JZn!REa_C*&+Rb^d!5)W~XV~`P&V2xV0p5-QecP^uIJejXo zpD`c}t4g!UdiE_0H&4H7pHgpiSRikX6Y{iwy@vmFfs|6H{9CVN~}c%lISOqqx4Kn>7}JObInfn*7Kkc z^6~oeOe3#NhXn|MUmz$@K+i1Gqj2$0Hxa$N9&%o2&KGeG20q|!7n0jr(2f$ zqWA!jdG8y*33$LKx6cZmm@O>Z81q*~tgHhh`Egq63U1*FReLX7y-c%ZqxQap%VIe? zoTw7>UQvK_ru406g~fA|h@Gr%ZocggQBzfwUW)}1$`0oOkNp8k(@S2HjgD@kxeqiz zkAy@VH2pu44G&(rE{YyAULy~SIV?UTJ|%ud^##rK!Q9I?ewUco1B@IJpHnu0Zz z*sK~R#RaWNw5rZ*rh6yLEVOEmaxL~i?gMl5YO-NGyM$xpU;qxo+OmtWI&gN`ohG5J z9yywM)$EzvAG)e|JYKforX{+3{T7RDDn22Ih{>Z4M^vF1tF(lYsJHf<<7a0dQ6uuo zaTdAL6|!$lx?jRgufOc}vdvshiGrfyV|du!o4w^jfc4ruya3{k$llUd%j{fSkW(JM zi<|4O47WDMu-D|%!kC3e%a> zG}~NZu{V~Ko^3}wxy3Wzx-QuC_HAEqrGmSs z?J)uT?|fIEn!1~ZB~re-a2WC8_6Y=r4G!9U?aa$p2NeTdjqpBhDvKZYT^u)Z#Fq@c zWxBs7CiXbgU1-dJY9-umo;1m!n z0rkF9cd@P*@TwVla7DVzdq3j_+5qVuh0XxXmbC8-dfc)V;zeA9nBlY?g%G=*lP8w>fQy7Hq3^yBGuAA%SB3I%^q=50smn zt{$+XKX%>CX6mTJvo|8bb-?o^DoA$)O^HQM2d|aqNb$QzU%eGJce#_m@b?R()uozh ze99|CR?DeOi1*#`CwmrbLly~l`d3V-Gn`U_xfK&07BnfDJewG;ZS@75QEbEBt=T#V zlk5B=prcYPztIV;6M8kx-{^WP(Q8y5HunKc(gf{o^lu$SQD(9FC*Qqy-cwEd*CbzW z&?ElMJ?St+gbF>;m7+g=luKgc>aMhDuB<0lyHx;PNCz1dI;rT}1M&qY8Bw|}4tom_ z;-A;D46iV4DimB@9$ov#j#gWModKi(=|!EL24U-;ddfqi?x z!XCBpfXXJYGt1zr2w;^{K1QxRZDHDnK9B&=`e^oquOz`$MNtt4+|~oyeBjYRDTS5` z0lQFD)DcZj==Qe9f1e!@ZMr(KvRu|3YBND*MSj5KMiyw&$F`Y(sjq4d>siBGWs|lL zR2>EQ&o1tJdrS#N`Zq;l#79&MEHJu7El%a?5Ba8AS*&E1N%lLgNa4CLo0zL( z37Rd)Xb-BP*sJ^*1>2vi`4QhT2Q=7HRa2AU_uw_`zx9#d8jh&y09BUx3-^|=i5iNC zTzJj(<_fDXa8;sS?f3~@Y6BHRJi=pw+e8ZHlkF$hd(Nt=s+EZj0K@?Vpb``rYC-nR zdv$#GRQ@*?HO%B%EsM|B8Qs|9C^|gB5|ZGJgnko2B4NXBgR9PT*me^BO%AQQ{EJAh z#;O7hm(ic+P)coH-L*5&PTJ#|tQ=dsnL4J^IzxeqqR2S8n}EJ3)?NPf7`*m>ZwKAJ z)}^2%e-f`HU$Vx7uRDCYMsWI-W~a`jfVRiTR>JuB_~oj{H8m7<3XPSQRTpv@a$F8sj&o+-@~+uc z?D!P@Wi#cpnGu8tdZP6HJvvrU%ont1ue^UE%z0Kp-W^0g>4HbJ&{{(O&`>i-r5l-; zq-13Hfo=zngGBp`W`Ou{IG|r)*R3ws*sqG+&RpFuyvjlTBEE%irYflw+C-WMs})uO z%`$^HqQ_MTa^BuTnB<7oH{*ffaI-Lr^fixf3})tH<^&Y)%a|?a>gDsucirssox|)x z1jSwCH$|ueqw!FkYQ)%%*6HwgOklP*{$$-HYDAWHc&LRdl;{{4)5^mNT6Jh6Z* zt%4w|%%(SvBY3d`%kN8t(H<#3C;KdNltgOrmccnd>A6g0t1fI zEcSu|t2c=PiT+e{y{U7*DhCpyo|8HeHfM zpQ_XOz@S$Dg;R@4I!D>vXSHvLCnwzgm=HRlCEm0Eu`%i@9!iZvlAegP|2?I>>V+gs z7Lkd2f>l~w5ZI^gKnrNlpw9eWL19S07uq2Lu#^U~LH)l8Tc+UPY?wkfmhps$>vjIC z!Pujbhld0z)~aUAZ|+ug2I)Qm_Ie)nW4RB*ax;0>s(g!Ce9}otm8`2kTgOj!*1a3| zl7+R0MercmL0(10Nv9hsy#(zba-4xq0KchK7sU}mpPZUkGI(ak)WtyD+uW7^akHA# zi`+b94o1inU5!V-{heL~!g*#&xHQ&9pDtYXQia?X zcWby*{p;>vE=jc_z;1;IwfAdHi`#9ms7kIXey@ErDe9X^$nyCtMX;e^b!0CDQX8pE zKeFWt#N8KxxR@cxXr(LpmJm+;{xXaqAZjDU_KGX_h0Qk~X7}r-fm+OgZ=-5!?+?eB z#O$ZrPn{7NxNxx948BP-KT-T7g|PEW%5d0?P*IBZ<8A>#?BrB@KK;28Ix+v)mip7D zPob(#iy#u@-eFvsE9VAnpJ9M(Bm00;P~7BIZ|a{2Q6;*U#Ca0w&O@YdFBV> zwC?SSdb+GM<6A^@!jC{a$-)*7Ivxv}{7-xz%G?r21c7cSVg$8};1;jy#wXyu2op{f`AT+fSPK!-{%{>? z>2}_re|NE(czk&`W3p8^=#;bmuc7e%d7(c}@XQs1Dv6e8N_2An5g&&?D&ZJAZ^%_~ z>mJjyt4B5$ee1SVQvmnp1cIqPsAeaS*zP>3yK%doC%K;g-51pLOWstiFaa9v=TNqE zL3hDP z;FLJ1%R_it-~=<@B)#|OZPIVPyflZHXDf=baU5|NkxWYqu*Fr0>p-m#1jNX$=YtR` z{GcR7JP?cnI^a4YN%G!XxZ2s05Bm1kFrvK`2^~s-RD4Xs9)f`qOp4a?f(V&{&Yk zC&%lPT|NKRlfA$V=YsI8!08xsLW$bxlCO?5>@5tGG`C%QVJ>*yA97rd&Xj0yQA{^p zZAw|VLNSswlr_Npaf5+rx;ijUmqR1%%;RR~W)H9$;~N_r*dGjXp~(0~!TC(I_PVIwfwXtF+nKJpOpm7sWnTtqFbQ4Ye&Bal4943}ARi3E8KL|}M zNI;T{W38^cZ1TTobS)fru10z+NtBwoS)3n8Px3 zO#5*-(t3oy)VD{1%YpCvBDX6rzk9<{Q1IK;sgCnz#=9lsQus{{cf#a;xFfr6kn8c4fp^(+=k0$+(M; z;N_Cu-AQZGd#h(1Tx2#(sOipSmJOjAoA^d@km2B_v+|v}+o6$=cqCp-GA?>sS=z+S zq|7FWkS844y$4#Y3Y>V`3WArDu@n%zc*nZ&k$obI3FL zOLwT&mLw?q=C!j4d2bl}7S#{Ir2}iiXSmUh+JaR~1h1Rw$v9pAUYo;J0CZmM`EdR| z00<=jt>lT+)1ZAn7mS<6)~n3RF$3hoJe-jnL<(w*C0q3vvc83r2A&PMsLPGva9o9RqR#Gcr^e3 z^Ray2sJz^$yQb5+m>B&#oIQ&7UEbxRQ1k=Gf~R#~nWh z<`h$eR$SmDQh{_4Nztd_7f`JsXi*m`vME~9tfn?reu#XWqbl>W1|%a?BGD5%4mTJL z=EfeEWIefCWuAxkpFH!@UH++W)K%cIn+XzntJQJKTSQp9Jef{q=DB$0SN5D9mTV#` zQ8wdVPSv_geC;7ikeD_rLifu>5_x&~CIC0G5q+gHcd@B)@OUGdU))54uL0OomZhjb zy{XGCT(11DZ%b>X?=MHf3%7vVRD}~cg&OzyriIb?K6j=M{C2Q?LMf_Lu#62&{exQM zd9PySrzMxx)8?N}xGbGqNN;}sg;XY51CPK^U0P!pR#E5_bJztVLfm0AtB4D(RkQJM zgGx?4%BA(+HLO;lz4xjF&hYR#`QKgz)KT|O_!J9?X1sXyEqL0>SQj}EE}1@<#|NT7 zq(A8+OiiqHJjMzYQ~}C?&WfGlzsm}Z*RVI8jfwnAQU5qOBy5P%5;G@ej&`T$yk;U4 z`{(G0^m^S}mr>7>=3{}r{^h#oGmfb1@4f6xb>#)of@K1*Z4(Gq4Z>JRx6O?{j;~*x zYW~sB>I22W9IZOyHmlz9gy*~Cb>?UrBwFum%m}`KmQ|jlWsck_E1Iod4~5%1(I9)O z_s8bvy<5c*oj2W-oW4!ru1DbB*fc#HoGBH?990<_Vn|=>|FKanikUVbabT+hRo$YeJuh2lL`z=fgS~RtMkjR`u(9<6_d> zy3X_al6b;_fjV)|3Y{8xD+ucp_9#y#9(gwrQ<^)Th=Iku+C)e!q{Nm_3FuDFp0;wG zwu=eArm;03S(M+yjfbdq2Ct4{T1eSgXTNI;l2sS{-l5c^I~FO(y&a3_aX+B+4A6{5P?@VKo)McgwnvgpINlKHc~tqjhs)&`af&kKAU#Gn5KnM zQCl){SDQMI8II=E60TShvfyh=hN=X&f?F!be+6~zoDj~>Pm5Kdb-+MV5Ve42#%8_L z&c>Lz_ZMqK@RHfr;X}Bzt=R{>-WO@E%`K93<}ch3o+a$@-cN4-^skY;3JJ>Y6jJhAoV>+?V*UWJq>OFOPy)~^1xg46%`Nr;=jvIL5W%%=})UbHVDjlaAPa0P2nxqkg;*YYXv9eshhGQ!2VA_&&(37O_c zQlhzS^EL(Qz0bU(>C<*r}wgFHS{EfAC02a5B3 zhOXuyLGBsPRj}Pp$xdw2cv9)(&f_hN%j+g6pNA`Mu6-z}Rr&9d1CRA~#)f1>i|xfH z14U<818eNc0X$KS(}U=-pK+*v%y3}#lH#Ll6E)3rxQ+4UGL4U)(c_;| z1d`q!7iAuc5?bev0iM-R{k@G8hY5+3pYY^K#5cprDGaekYka1RgL#S40NxeIBH##j z=eCM0+5US#h-$@}(!?@d=F2E_8Hh%8h_`#~CFSGTqv9}sHOjr`zH}PJ!qH?6cT+-5 zuAvCP(`PHUY7Sa@vkwCWiQsWtJ_mZlLW|*`c&P?)iZi22q8^!Y%kKPIiydn@&M(@t zE=IGN^xw6#Hu(4QqnH>A6~vLt+$88-kq3vCvV^w@OT_0WBI_WW$&Ll=kGf9kSquurKIW8h=n>j`>D=5No|m zvoBH9>&S3t#j)1#?w9(;*Y*k)UC@rYLzrOCGg!kYjW(z%g=m3T9Bw!alk>5Gga%#R zShZ(X>8bxJ*NxMoy0i0|z%7pBoZ+GOYE9>D#Pf3_0u;K_zq10L{4r@KOw(ZVye1DA z`Y+Fd_V1!t6!=r2=bb*?+mZ=A4&Hg)1<&oG!!3rb{DX>+ir9(=M`kbJ=t5a-dPtv` zX>so;-!&01E9%-f^ap6|`K!J%bU{*UX#LT6%d*&|{l3m%<8s|RZdD(GSpF9|iuC?_ zo}^b&Gc^wk@s(y}c%59^;!9%Uo^0pYx5NMD)y6M=-JH|6Q%bzle>Hqcow$5l1k~iW zTmgh+T~2&NHF$HZb)tB#%<;TBxc7`p>Ylr z(*R_1p_RA7(K=kPz&swx5(jyP8JbR99V>B4dDO1pn#TCBT{fy<^rvu?)fc$Qi>+<*MX-)PB>K^AmUQlbtOv?qUmYCw!9vTim&$bzZ<&x92)2F zTF$|MmaDEQU6{a8J$So6CJo)lOkF|tFWgL`AH4@ER|l(oJJtd?0fqKk6I?&Ypz~HbGqWR9KDK%6lGNbx;jg|bYdPP< zk7;t87rz*Uy01$S+)IyPHa=A*5g=j)iJpPBc3nVJPtx7}8gEoDclqyjB1kpLt!I7e z_;u%!_kFgCW(F>KWaR4quFL+pEDiOuUvC0%K&#ZBjH6{{ldVA9S?lI2xX4RW5#rXT zXS4l9^Z9xMnM!#su)Q<5z4H{t)bWnmBsytCvsA`X7anY(F5pe;fTqm=En@&IJpsYk zZnLg}`IvMy1Dx#)(KD3Q_pBdrBJyGyLd9#W2><5-k=)sdFs|t5Kis?Ce}Y^*T{~`- zE|WAvaZuvZoS=-Ig*Z}3O_^-9x1cU*vU-KRN|^536xizq#iokIV7?eG3(b(u{TkFN zP1oQancN>Gq|$SUi6(65XQ{e)EQZ9`D;QLCTvI7W|4h>nIBL+JXV@pQIjv7K`NW4m z*w1)G9a=e*2q%iMR$;u_p~oLQg4b-zC=I7C?Iw9?qrw;+#I!oME*$wJ+7hb!2e@n` zZC1_-N=Gx(z(!zw!WasLKI4YVQT-N&{Arp3!J2Tr)eLF>o^h)?yVU-WY1%QSGu>VE zeD2ewQX+wG3AgWK?u!MAX`R<@7X=ly;dRxvp&^pQ-;nt>V@IN|BAj(Ke1TbXY@^(n zYxT9z0}yrscuKtJpS>SFJI-L7Js2xqf5^$CCM{vc=N=99>cd%3AxkfL4%@TK|->JOTFiidJXwtQ(_&wgQ%yu-3HQ?v|nq;{tCwbaY%m1>i?cO5y3yB%dagpJ= zFU=~(g*_Hwp7x+Si90q;ecuH`-^GSw*72~pPe8(Ms z=U4G~0ges3_{z(e%|`8C0dTduQ}X=PW#4{WtT|2!)drEc?jWKPau>9f3=`$(P3wkO zn(kn|-;<2K=uenA#N&;oRg(JQs!ZA%L!5t~-XdG2x_iQ`-S-C+YIYG-N3XD$rc16U zwusjRd@>l)f5^Z(n5lLp5|m+{T;}P1>5mXQ2q3xnTJfxB%gWgQn6s4s2);_m;`vCz z`b5@1(0gUhAgf0PY@ZD9lKmzZIqvfsOH=m`STEDa`zkxUlD_N5`MI?lZnNQ;MdfJT zhm6}j`@)E)`??r5zXkfYY~Q#AlrKlAo&Nn9a_pF{ig~)JRpt{UBlyO~j!rYp24<{3 z%NpL3x>tW-8IZ@mPyg@aT_)e1yj}t`-%`1fP3h{m=>%6(!q2mMdl*k;!#{}pVwaRJ z3fnqDtVCOStwJhz>W{JSnSb+nYHEsjbOgBP8^kU#DOK5TTY&)$)QcW4Y=0wJsY3=Q z`t-l#dwYD~ov+NZnyU6`>}IL2T|qE}#1^|vpMFd19c|E?j;=nc<@5~GaT$7S!-gL- zm#O=>7X%a1s1Ua+)9s2y;#O3m!NrO9d$?*z?%^6td88u=WZap8z&jsvc%i%6qcgPW zE>d)erG9tBmwMiMkl$iSi3V7V^1WrBOc}L#2+LW%-bL72FIvOBP`Zvfjq=@P({O-j zGvxNv(ByR)$Z|j6iK-MKD%n$gVA4?krNb?Tb=CUV<2wJ4WEP@U>+oo&YTZp>Cey(4 zbEnkC*lF{7{E(8l)T*Qp(Q530U$7Ys_LrMz*W(BOj}^Y(%V4&GO@dH=hZ^EF~Zk>215UDpNgRTT{lwED3DOx^RR#WspOh;84$>e6()m{05HVp-Ww zD$A0jUNF@t3`}Io`B}sA1`|F}%LhB{$Bv$8Uu;uMzy)^CNSoO|Px40{CGbD6!ju9l z5$pv~z98CFJ&%A{w7s+L;J7HsJ7>DhW`+ zxa!FBoI2epe*08}1$PA_q3@_Ek9u9y<*-!Qgs0pIjsW8L)h9(t@4bfFhZQL4B&Gt~ z0r=D_=URgMy08c%{k_$FI^M#pC%CDG;N+HUMWPI;f6clWu?MIAoS*=QzI3r}L+e-AZ&7K(Jitf%Ct`b_sw*u_#{>e~rs`2CQ!A-~QHxu{~@a`bd zcOj{F+C3f!;R?;EPRhNbS&)%EqSKr{@2R%%VhBs;)Hh$84kXWJ4OEO+leqK~Wgb9cIOKHSeOA)F|UMU$aV zp0^}l(;Fpn;(6#vOTrNi663#4>Mx1^8> zNupHtLG%6gU>|0|U28ru6Vj{okziON-;P(fNO-=sR4t)Dz7nQSIzo)sr!C!yEYT-{P8c;sE=Pau4Bv~} z^Ot?y>Z^UJCj4M_V+!QsNl~0-KK}hZE*T@_m+f8?0 zAP$bhqjmo?90?o*iNZlYi>Y1=6{MVyfQG_3?p}ZUNCZnyc5hCp1ZJy7M_Yaf)mN*y zf@H$Y)w<3@9~>RP0t+npoi;d)VEu6UVfWQv_10dJt{`#?J^M4$Ko*VG#1|$EV)mxG zO#_reO2c9AY&FY8`n;&vEYrE;q*6#OWW%W17wBo2w<<6ANF?XZ^FH-~)RJ;usoY7we)kS;^Fes8@Q!C1z zaNL|Q^u!3BAhg0o^m(Ffi8`Oaow-8wx`GGOI#Xsyo**AIrfrLDs@ND$46Lw6Eyn5j znPY*sgMY+GbrL(3GP;^Nxam6o|IzgoZdEm3xS$|{fJ!KhNOwzvfYROFNF4Iejg+8p z=Ix_uUBk`=00CyZ?ciJ+o%zyWWLe#dAngcx-+3LlmfAi31&!3KB_u z?cxaG$_c~&Oy!67_da7n2FFY(SD)IRng3Jl7QLWWm2a)~NXy(qrENbR@kWK9)Wu@5 zHyQPJr4BP+Rh_&c!<%1x{3L7-4bQxoXr8pRNO$0OlMh+Y>NH`3M9&=B%29mVq+fGT zf>l3uGaYb&4!Z`6=)tneio#4$>r72g6Hn+U!8$%u$H`wK43*$ z6CTIgzW|BqdWWki`q=>4vYwc4*$iMM?0J#VSBVc1@mrn;uT83CB&_=kpO3J4gn%w9 z+MKwBoPn*vgj_6NR3Bg5v*QubGnAKtwWIqL-W>}eWW5)4?6cz@frb0+0mp+Q z2hGB$hwJ;KX7OyEZRuZIZ7UiYY=35IupM!xSsVpw2Lt%J>jfJ#&3$BAJI02^;%&oD z_N7ytu9W@q7)Moj_{OqAVhQU#k|jVBCU#s zX|sM>lC-q!htI2Nc(Xns^!DH$I0$TQmBB}n$B_n)HVN(ITD&#nLU5yxSCDIQ=|Wlg zIrlcuy4JaSScCBlNuWR0Ln4Tkt@J#XbZfRGQpM)$9H2riD82x(4?wjR`OBs^z+j!Y z^luP*a>VxH?yA57-PT&i>cTJxbBkeQ68t~mR9sVfwnY{OyB>2q%LpVwYgt6VJsWOP z+i2+DeyjbC0&RZi8oCDH#r2WNJQ#Kp_lbHBdk1RlXQ3rV!_NQ*DJ*Kjo@qT?T@irG z0)PI4hD__74*=a{)_Q1y0dk6eRd|{%!YFH^no6Bk!k`2XQGAO|DOQ|Dbjy1f$0vhQ zuV4^BacA2nIQAW3)BVY-bqye64#FS(*`e?!wl0FT&-Rn|tg-Sp)fshNC)%a>M0 zICe)LtGyft%@zVa@&44l9{v5e!~!0r+3f~8E6LlLDTYrl=NA4}l6Y3#pJ3oaX*9Zc z(w!FzY0Xqm;4VijvjD&FXYMsw4M6_@&0^MDQRwX77G@!L{g)y4KZ>{yGuBDSx8AEi z9)E}WzSGaqNn16JL3ET{A6aJ)*`S+r^7-d&_tWto-!*1w$BAW#t$X#Mjy{wa+(4+XK}^5P+9eD7}mK{aP&C^8q$WQ8kHPM!T{qfbllX|u3m4V z7&{a{$Zvz?(W62|76p|5PthNe7ybGcwcBg@OBm6kQM&oZjhzA3I=@cx(etpVKU-z3iwd z@(vCbfAvyKG$W1OvB5P%-9pKKYcIF5NCGG~Ew5*8sn>r#>BMOAkOOimV!Gojep2zh zHBQ$({ojtRf*!o}mmK1E+sXx|o}3@9{^|Kpskh(8l?lBgCg}MTwxxBW9u|N5^>~Q9 zh^wEvRO4Gu_QX*ZUDVtS3cZ!87Tsv%(ojU=N1DNxuB3wagw)0O95Ld;&(nSf>0gn+ zqCOGyK^t7B(6PqzwsDQtNWNOTRn@-_foY}b({t-0ZOcG_z6g665fsyr0rmUkBI7PS z`=_>d7e$my{r^!D9HT1GUxfoQ^#?{T@^`_LSS6iA=4rp{GOtWlg$Ow9Ljo(eOwEn7 zWO*y$;bAfai3%9uo+M1sfD44bub`jE*Z5fZhdhld<<~}DY45Ko0a0{cTozK}qAHi_ z_N3!Yy7)VSa6nwucY!mI`a@k@2{K%Zn5iN9m--8}3I8M0p=_}y6W&|u2ZHQXnUj~W zL}vVa0_Jwyj!RGZChB6z=xr9<-8vUn`wBItn*&=IN!sX6&N?{>U+cT!EruXthHQ5{ zee^7YK`J4^3fAI<4zj^@VpU|13YU5`U(XEdrU^)5!6i{$k<-HGdy{-l40q6eH1eD3 zz1tb?IVm!pt>-X6cn~CsI~FNn&owH~ zit>Z-_1wpFmB*0nr+tqWU&Jt2%!MK^zc>l^3H(y3D-ill1IbE5P>RHdkvv%g;oVaJ zvHYR0$1V55^Vpe9)}!jXJG zU&@R}jAe*IuCeyRIQ`DzfHH^^Oz^11xZvlLC|>(cv&*-3BF;ie&tIqbpJ zQdl4Ci<>-KnTc9-5;O|reYR`9qUfDL#Mw*Eq+$Z&eK(E#>txHyCVG%giH`G}nBVCv zmXU5v*Z7Wf!ntPwg6zc`j>Y1R10)einn^Yb8rc4N1;ym|4)N?>gY%Thx&Y zok1NQ*Tn@j5g(hLw15JCL9*_j31(!tCGYLXTALKY+L=Kjq;*xle^BCx?S74wXs~6! z7Q)E=X9W_fAMQuQ%kyzQVb*e{w!ZxKG^z;|9@O)kkj!Y?`ISU`Z;_v$MHgm09U=8@ zo6mk`Oo`yPaPN**EQC#4*FakdIxGYbr#qVp1BZ$lVxZudS_$304BfbUTu%H)%`^zw zsMQ<#Z&4A#Uako>2*pTK-+ux8$9u`8wN3S^Bqj*4fCj3|&HIF5zfgua9I#oMmD z)oXeACyn>wlFrpTSDGYie;rW{)pjOI0jc*WM5vKV;ei~!P?4F4o2?@4ga#5-WpIO=ga zN@nsb>zLHif*q~NnAULBD4F9zg(1S7tFOhcU(({$Ms2CldHjexI%HI;lik3Ifh^Ec zBQX36sLZ;cgTSMgbvy#KAu#l{q~6gM`0eRq~g!^3I zm}hYN`ovfpEZmrQAsh`f95e)3YBMFT8yb>aldzWJs12FJ{C@t!=%iAa+37tjWb^%Z zVy=jfvPz65Y#%2WNgAH2HW6Wh8J|qOd4uOu0%_H2wPQG0rgcUoE5SmZmv~CQx;!M$;>JBZJ}vg}DqHGT#@mT~knv>}2ZoeBHJ{)ayiDh<&;WCP{HgkN5$5&p+pw?X}SY)VlP9s6n`guP}$wJf`9z-ZjK`7%V!Gox0Ds5_Abq=a#9u}wn@#m15+2@pH*bYO+#F0 zMuaw|(@QM6YOQMLHHz|TzB*m6zO-+9-<4EV=ZKH!@J@7>;&Zq!3L1X=fyC1U0oTO@ zmU4I|mH4;p1pU8EB%XG|&~+Mk2P_!chuT6B3o_|syXKf(Ko3Xsp&$1?7t=3WdLg)_ zd7#f;mO6P!86MP^)bZJf?O;~gJ+`s0ZyG|~9n5r?yvnc;$EY$&v>~L*?nw0P3Edm6 zh-niuQ1{}%ZcfT6Qg8|>EWc+wL>Fd?yI|3FBMuuYxuuiw-DmF(mB;w$+Vcjm=Q@*! z{_|guM;I%XNymZF-7)?Hdopo@%&y7*Hihu8DMo(I)}m|=LFY|xmD$ZtKB*mn?Y3qjM<&@j&QB8Y zEnLD2u|x`P(Lgt__0h<2>ARqPy_%-r(x28$)A>+a1}WGWiV?W6H;C_oa7hr~*1i}Q z?xbuVKq_5oL7U-PY2;Cgf-dzhWOiO1ns-Z| z3H@5y9Jh7kyH48^E13IX$X+ilc`>T!mlh#lKYR6Plw5J_tDUyfQl>t=b;lg_TKLea z!t7sELFnPvd-urZJ?Pl|g#Mi9x7qHAJ5{741#d(g{^i*!>FKXHDF?11R}so`J~zZ` zvZFSgm=On@QItl=3`gVb08W{Js_S)H@#7LMOVUf_;!gS>G9Ghpw0gT(brRW=Ubj8b zv0~^tWOr%E&9r{Yp87Tr{n0ZB=;=qitH9tdq!IX;${jjWGBK&;pBN7o#I;ms`#2M; z|2@ctdkEAL$`a|s*zs@reFt7t{-4WDl7eN1FWl7d-%5EE3~Kq{XG3eQzMFk=H$Y7< z!V0oRUUUgsz0e&U;w!$T97VJL?nA5#L)KRxcENM$U(F&fqb3HxOkzLKzF0p|)fd2G zd!9fMTEL)Hw^#|Vi$r<0fQZ8|(^8b_(?V&$NZ(>ucEtqd_b5mIDlstcy%IYRDv9z2 z5vH&Ec7&L1JI|JrY<#1zpiWVyfJ1F(V%$JyZ7|w`aj$Wwhe<7gMmR|79@J;Q=z%aa z7_+k>*MCm^DCnY~MS-R}<>gzTa>cCI*LQ~f97G$^8l#M0f(nTbwo7A2o%6Jo=o+^f z0r*4shP@Y(4^XuN!;r+r-JX4?#z6tP4~9h66g(jV%=7Pi>1hoq%Se*BrW*j zOu>Bm$4MmA#L|=;Q`#Hxvm`8R6pWSil*W_{L8tZJV?q#Yvp(t~mYL3eBZfPf-y$bK z+~UnB!|RH%+$MaSBlHl%gdPUnOWCL>=g5zpOunw!@<0gOreZcd)^43Z4Ud<)*{eonm8`$Z&6nlz#wvnNvM_+Y5hk)~WA+{uVs|oM z6g`8mK9y$tO@nzK0f0*5$Np;I`qTA-UGy{j?RSEqj~Gzrjb8#ZJL}ee0j+OWiSOOz z;?D*kp8Du343{|$_FQ>VO2U(D9c!npbMby!amjM6o|HWIM0oADV2H~vmaQSldw>xk<`d1DfLT64Ro^L_iVrMiCM^-JvoEr zRnKk5LapyF94XC0TG*ekDfpwj0canzvVhzC_F&2ritleTft0mgYhkR6D|K6C758%o zM??!3R!JJX#cTs4ud2#owRHgACnDczT+sPB{;=>0alWTrOB zvmpDGG`~daT7*m5Jn*5{Pg-wl>gI?bBAD?Bh_>mq%x?#rZ4^bHx2UlS1nyb*ixr{D zav$>M&+t@X+x1|4-a7fDkHhi%e=jpRi%rp2PGsb9=lL5Ct>WHaFb@Wh14$y~`z#3J zoGp1JAMWKQYG3iO)=il-AA-MW50PPMuZYiLJjV}@A{8vjSexYN>?6d~Y+~&BJ`$WMwsW?A(zxy2s&1Iwe?celQTNJ)*`N`lKKX{W zr_ZDHVqNM~LJ-GD8ks(PKoEsa+hcva*~wyRjary_Q?R_oajT=ja7~>8O1tS~J|d@H z8w_C8GMs017icNH<}w)ivB2MiWtgm}^yzTEr&yEL6b)^=0ty_+&QRilQ3h^qu=syU zl=ThD*#rUr8G}{WIdVPrc?S5#%aF+AFN+w=yIJ{^_#v~4qkmW(fHVDTF~D^N%WES> z0Qowz|FA>T=*s+#NtdOywYc9)Dn!O9r3D;she6cJ1MB4<*JBzh+APVNUo;2iyjqPi z8?rXi&D<_TQO#!s3pjjt5i~muUL&Vv+_DJu5%mA;X+-=uB_CIB`=0l3_7qj=YQ>`L z@KCg(DkQLLZ*lv*Fcwx%Kfr}R7^r)5X^~N*@%g^X4iL=y6CC=f? zC%B%?uW$s(gIToJD4N9sW0(YFh_y0 zPlFs_qtD_^i_v&s4KpYjilqdt!j{{bei!NVfQ$cLrwEoIy3(k*I zQ7jKn)?$n`&Pm5Kpkiy^!8XJrQ1r4xPHws#nqoM3l69KsuJoN9=%u;Kn%-=cvy4`S zN;ztF`h2g&IErmg2?)nx&gS!NEO%+3drv|gzaOpDQzv{YV4=%Mi-K>?Eyz&|6WDwt z_FYxg$nc+M_w4iir8cK7cUw>;pS}FO?Yyj~!`Ky8bGgtSUQ)x^zcB40;)q{AL@;LK z;~<%*GaJFHAhjm>!k!}V6}T@16r;%-A2M_pto?b{M(5DEicVW<@t06osYEm*1osYZ zco7`iJ68L6o%Ky|*JhI0Tn!=sgiPA#b@Q>&N}81!^my~QOOk!_FaFX$Vc8JZC+>vRVaz69G~KH`W6$%Xs;M3qF5b^0BEg-BZ+H znnU}(vz|O{&vaKuG&h0{Ny|Ev#C=eI0y58L?5tUegru3Bv2^wqrkJVBtByyG;{T`N zfAAOT5W64avm(d7k$=`-`@#CFzOd3cZmVcG0-5Dqw;K*6w2lu!OnOS1rVp-p6wOSG zH|&vc?Q{4>`kOVt=+P*3fm=VvO-dE+Wnbb) zo(Hkx`~2B@G;onw`vQ44?uPDw_}nP7C~5+yipC?%JKo_1ND3W}Kbmp;{F)M*J(2Ns z(5LQpt(y5!-awbiROV0R?#n|kOV0^T>WUt^ncsq&0AwbhoGeXqPlW90YoFD7z^^R# z8Br1%Y2&RLht&{TjfTE*uP^=@G0Xbm@fu$)AK`6p*ZEi_VLE&MS;22iOLN^WX6SgM zGyb-usB7`&MCg_lP&)0;;aCJpr_VzR*jCatDezLQ&ahwtH=Br9AIzYC!sZ`Y=#CFN zX5MnSfz<=;wta#mKZ{1RLM5}xTYz(KA>7ovGLXhR4zR5TC;_SG9^anBHXRLUr_mH| z75t6_@bSkBHd~H2AJsPX9Zi~{F;*F#US+v~F^mDS<-Ohg!3mL-E^qUdlmBvC0_kd- zxIfRrX()HH9zraA4X1|cpz)NZe;y7{CQPu~7XHOM0P$Sjghg`j(ubOz^wIt4+Y7oj z7Zg5f)^jaCH;{6vU58j4NKKTykxqX~<{i8n8Ix_Vf{-?g=T%rid0ExpKWsR8_(P$z z+9)1zcdtU{moz%GS!BNTY)?d!eHd9j87&Iq(P9727N;=6YuoaO>BFCG@{T3`loT1~ z)sk(x`KgLVUFdq=`3_mf2tg-n9%X+5N$wx}h9E1K7FmL8w= zp}XxB4nCj?ry7?D#vl{95jhjLx97D!0z`P6kBZ}n;I|SK``fFLvsN~x!H1(3D?_`N zERe;7$T*-cT<@?f6K|$8_%5Sw9?b}}9IP?hJ^?tST0KKMu!MF0+N$nzi9e|jjSxMQ zFXM-2!HmKq4j;@B2_3YNf+_o{R?vae z%>LyobnrmGa8{Kcpwc|Px@}g!uoQg>r`c9hFa;Jv7|@w&C`BgF{))@V#h>!;GE46- z6P{FreC+thQpD2jh3oTpR4V}Rk4m~%XlO*$8hkRcH&wcVKEORbCb904Eh8`cUW+B! zK|!N$>c$3&c2ZpS^2ku zzBC17JpUhnbtg9!=zJeE*JWij#*z38us5G=-T;7PJjB5%x00l0rIAn$lP?oIlg|%Q z`JXvK_ws9P=Q2M`uc=Z}hD(&W3?G`QpW_uCGXl&>h7v$~sO~rhAisc-?(Od7`OjY` zsQ+IzF)i}K;m9rR1HqheKSY`p6*LEJy!aCnCf?@?YzLb>l`NS|)4bXt%H0T;ZXuMI z^8I2wO{=oCy0r1pJd*Nu%&`ii4E=WLOQ*5jd)Um_fMu)wXhi<>uR{J)u&I5csGz{$ zadSmCs$Yr)8ZB~rOojF`bkr-_SbtRh8;}Ed(iEI4y|@#^|0BH*j(cdLbHHAyyqWda z_F=7Wt?N0p_DXs31~)>d+NI20Emz(Wku-H>Y!S`Lvsyu1dA zRw=N%88(Yr_T!P!`{`Wn?Er~-zZB1~@c7Y^Twc}A+h0+w6GfY!0N>E;jE0N`OY(eA zAdu0>@Gk@qXn6SJQ-vN100DUsxw}+}12zdy!nJh*v1k}s%CF;qc+Y_*6;VZ(vdPsi z!8rZTch87`*{sK^x!H}mp@q~@r4GkJdWDo`Y(zRN+0PrPJ~8RG!aWJ1ZM9DPj|xoi zxPLVqeHR3g^gJS)8?yopD68)uRZ~e(tvy{t>qKKGniAn1McEyYX4~03a@Dmax;>n| zZ4>4_2rUO&XiyR8ciB(_Zqc_2Hd4eZ;JsK?0PY?$RY-6eR%##k(IM zy?4i~43Nwdj4lAM8`ZQRL$nML@sn>d}mfG>8E_+w1`6O*1ZcBq-#@4xP8DC$N0 zpI(Yp67|jT#w97UWoGta%Be=ih74)%O?h+XGaz{4ik2b4#kC*fDrLJQKTXZaq1VQ> zLKNT8{BB-7)0rci?yMm`cd1~ZHn2h-{mZc;R08N@>9i(ZI!$WH8864?F@oWM4<#t3 z{u`aWe)T6h1NWD>`|%81eQv+tdoM{1bWZTA`#tH(?m1R<&Iy>^Db#mXWxQo9XZs=) zUE(5ytxDQhQ@UUZV0r}C(q1GxpJqF2eXGP9uS^V*rpY{c&$^y5Y8c2!=aiX_#ku{q zQ&WSVzj*7v>)wO7!BROhwO{q6Z!v6+UhE_Rn65aou540_lG4<~dWR9K5?aKTAw@gn zvopMleu9o+Ol1a-7T32sfXfsFOb-KGra!zKD1HfPsmOR9FVX4nrA(R|%B%ia zP>q=T`^2T1ag5*zzJ)t!_+YG!oEKif4%pss}y^fc&}UVUh37l=if@ zw7IO$bxv6IQAv$id4WZu0tX1~NB7rSg}5hKu^wan@_=(6{8C=RPd791ePmg>{4%q$ zT}^JxVkFSuG;JUx7HEt>x)y%D$d0u5SF0@j{^Mnajz(AFO`D4OVG|bogetJuU9WWUY}A ztKxR9Me;T<3vu>ph3y<*4>Hmi-Im2~iq_qYQ#5ln<)E!BJktWyG>W0T3UBQGp%Js5 zGsnE~z(2P+l#T6=_#MO@%3Is>oQpR^dUQ??2{7&aT06_ofX<)X-Avy^-HAmq%9pmQ z%jR1y?Tf)^3L2U%S$zIE`AS1{+$e6*w$LXH7WRuAQZZLEi TdYwP8-z#a=2!sCq zlE3G@zecMnK75hM!gGM0K-<+_kpXbQhgo#RnirY`4>Xhn+y1<+wYGGboIC?41E72J zZ7wQ)na;ZwBYaIhao574*$s61f!5vB*gbo(ubC}oWDdTj^Qisp==iWSd@lo_wB>Gs zliT%6A0_ke#o}_SPuc~GD*`GyLz3eMyaK#c>h^WAjmRbA+JRX1Ewo?J_!#g$vc^R3X=}c(b=4P5VEL_Rj9~)SM zDgh@6@K=Deo8H0q+#^E80)~4+B0$o$Z2qj6(FJ5F7rHfXQPfX{7F;UW@pUyo6N^oa zcHHhcp$ae(eiE^Su`Ei@^$ky7esABSD4 z6#>8=%e?+|Tp^IY!#eRliU>?~e`VO>Rjehe{kNUGmw1J_EVNSZ=R9Al5EPp(HM!YU z7`eg_j4Ai2uQWwUOVx)K$c(7wx1MsJ6s!07ktbzkR>99cuM4!Ertn=3k8vE=@ROz{ z$5>YdJ+Mgt0mIOJBbQV>5T-QVl9SxH5}KP^$Keys!h&cwHzsP0lv#!K)};mv!my*J zD+7VqA~WUk-gnSuUpt^J`FdXMN&uc)z$9jW<0@dR*TdJ1E9k*Y3P?%z z@>qGW{OPwzfBgK+p5uLpgJ*%MIQX>R^xeDmIyX9;ASRs_n1V_k&bB@^ar0%~5#9^- z;Y-v|(R0mCEd6d;g4XWw=sPcc>6Orc;{zAm9Ylx!kH?%4c5+>x)3BJ|?y#Nm3$eQf z->9lGi^sjXI*J^X^;aLG1l4f!F*DZbKZ`NtX}7Z zV7A7Oc4(K}7+3wjzDmIGK1kc{fm+ zh$levY6FblW&nD+35I{p{O<0|lrD4Vdjg~ts`b+V`UD0GI{Bgm37|inN9`~8|HG2m z{o|P~TW9&=?nXZR{2H=i8<blnYvo=E~>5k#@K=yJNcont2>&FK50ivKwg# z0v^QX!Lpgh$;X94%7X1WOnLygZ+2#urNI+^pzF(H8uYh8>Ft5qeGMgX>`lsiz?a;H zRfLL);g@^_osDJ=;$F9^+QZxBs;a84H$YvJ@Ai^axwo_~exWQkMXFcNBO-dz60|jYTj` z?Zxi4KhR78wV(&0DY?tj@>BBJO#4ZyiEdlOMHp1%-i2;2$_BUAFSR?`2Qe(xXidA! zpb?t8j@FeQu{NEX>L89XeOwTMs=#$B&Xq-f4s$1I^C=dY24}KtIl{}H!LGZG**_nL z$C_n1@bYKcnOIrzE`~Mn)Xtw4@+U;ce^3LB4`LY!h@!H(wQVIadx45?;0OtA8Hvc@ zmiH=)-GYb&{j7#0*^bwyY9_oR3*64EAv+wN(I@p&Y*^s0Ul^TtTssE{rR=qq0!RP4 zS}&c_|G2GA=m+ufxo&grC%mrow7Fzjjr4_OtUc3Q`M{!!ey5U4kCShaZkbmA5N^c~ z9zY~>-3-VY(EU6iOt?C?f}attRjprkc*$&bK`Nofo&?10|LZk)`vBBBAVVWTW8v9n zuiv@E4k{Ez+@iMDu<2c#;bG;rt2u9n)JhIiT1^iW16*gv9~vrs0jH*CM-e32$utcK zFgVrw=~FAw=!`?UTx(?h&+`st`%`fZWrN*xswmKC-I<;KsCi6qH5Zv^V!}5!?~QV0 z?ZFce-Vf$-oO{D}*7_V^x^DWph%*?#uC82Qu=#a0pqgy{!_nziAlp&)vOLCq8t+00 zLq~$`%j91uqkqDeKgv;~rJ%BXq(FCIcV}U&Nz&2%;KM5*E#DFM?$vHy7TK?BNag3p z2u!YLdj!LCF4*>sXKw-SZb@}Pt`U2bMIg7)_(WQNg7r1Cl>~iY`u|k@LoW`UOh6*n z_~m3hdz~R>t}Si@74qb0*xXLyuo>tp@Dwd9=(MjEUY`fwo;@!sFCQ?zezV6>VnPfe zR28$ad(8inHP9x3;o~xT4EQ(`IZ@7Ulhddo>YrY$1(1aRhy|X^?Tq2l{nQ3hq{UD3 z3;DChf|fp0TceAq(}jo)TOTnOjar5O?RoNTs&=!xQAJZ_RJx0O=}K!?4wobHoqA`y z>y0)BCK5!*mOnv<)ctpn^FPcN-SFQ*#wiESzj2g-5ev7EUl>SLvKF2qn3ywU_ zXec7WZUHDhe{5IJ_8+Ye4b=Euv?&(2?=eDlCcZ?kyC+Vi5d|hXP%f?nlTfp{c7pzQ z71DpMB4PFF1WVE5n&(UM*|R=cRu%f;Tt(8BFuzwj)AEzshu#R!S6pT<33eU3xENM` z^n*d59HYLrVEJ0>XR#5p+c7|H!fzmMAcC4aOdX3S-R`e%`Qd@asbBtG;!1ZI0>|Fm zu89S=SO`QAATm*7f?DYq_>2Rcy>+?(pSDtBO>}g0m&f&}$A0X9t6xDu0rMRqyt9*3 zy6n(*I0G%#qU%GM7#8=sUX~Vf;zCocQN>@D5dZG;`zvt*Sl;gDOUzvEre>bJ(4seCE)G;QzY=e`lDO+iE}tdAk&fd!pnsaDXy$lD zIe{}-QU$_Vt&??Dl+f=yK3wN3I|@|S`{!O-*HO=`P z{hF7iW6H1jthZ0|sA+D_Q%pOjx9 z)&D1*u_W-P6Kx4E!ot)5>c*FN(NVOS`y;D@M?Cag9ndAmg4W0qC zRK1e`>Nzmk55kWm>V(w)#P8!@qOeu{~ z)B@rtWYOIuNB7YYt9bXGnMQL+o0eRy@zQ&EFidd6fY}*(2uL zz9d_tm!+Lor7dbnMdtHtE~X|H1GZdUCAw`j$0O>s z^M1JfId+DCQ_vQcJ+?n3o`JS*(dCow3vlUn9f6Ew$GBE2M(_F~|9np6e+tT$R3mC$ zCnrPBSxrq})3;r$gP=8AL_mwzSvIyahm+Iv6T=a;lLANoUOt-6;(P?$xomV5KDRW* z<&zP1hwZ}o_vQ`ILl}Tk(RG4oLCdyou!w0pV{ zWhsst>o+ZxQ3S7GF z*4LaTwiP_yR4%)m;o8r;xto?pu4s2^edV$nvF$tKBKy_zjUq-!)>{>saO1QONn>c@ zET$5AL?>u80i;6k^|I+XW0~K?(AVFXubaChCV~eJyS{gq_-c-I?yO!rrzS9GPwV#2 z#9sGt7-f^B%cRJV%{{YJtessQ{lZSRCoP8*f zbS3qEXR*e2XXY$jsrjP9YxA90m;^;m$n9z&6)9zUtvd6FVFXSHE~{xomQ-S%yc9>~ z_iJ4J3b$cX+KWAT!!3?C`d<+iX`q=;XkFGS4ux_Bi=)cSysYn+YuSH8fdG@kcjYA4 z`HpfeoJJiUG$LA-!KCDY`NYi>=K#w15tdmz%JovI=b$|z9~#r<^(nWW4Rj8YrYx~_ z`jy7ANU=sAf(n2bvpj0^gfrZ5zlQmq z^Q{tIG}}>__lO+Cd(wz@vLlF(A^9OKi<*~Q&Oo^d{F8R zHNZG{-3#Y2lQ;O<61ss=93Hl~RApw!vX$Q7T&H!)diLG}Ao+?+NGJe!#)U_ei`=e zgMlz>096%_emCPpzULsRuYs*`p;`nZSY6I*Ki{|EU@S@_Dzj}spBW-M51SkqD(9h= zA!8xq>!MMnhrYxP!SiUcwThhRmw7*_ml5DL*$bNN{bova8Zv9*c*|^{8g)m?WN2Wp z=7(z`v)CC#?b_p{8mo5jYjvXH2m$^&6%FdBAHTLlMEv`7tLZD`P~qQBh5sCAvn>Iw z>MfmG>WUAEz5)E9!FU*rI3&~yBKuPWTdSu#Y&PsiE2gzOac!V%)iMRgnlgn!kD>ZO zjajW`6^S^65eW{TgP)jul2VP$s2-DsCM3brAVY)0dG~!5uWG3@L3C!hbEUl5t#;mQ zj5g&vPEKx(;?aCyLW9B?(>470#r)y8TJ@Yd<|~XSmCi*;g)dEl!yZ@bV#ZjSde{$l zAL*Tf2b1C+AR48IE9#URW++ZZg1xE}*x$|gF<$ypEe*CR6BClElB2h{z9=Mg3LIgG zFwCh#BFw~32_UP9)NeGz89yhpb+9mj1>AeeFvHH)>Ha(i^YDRxdTFpTE~c~fje zwEZ4;Ov->w0r&E5}o{R~3dW-aX|VcgS|SGa@fmQ9(HH@5N4r#o&E(M#+^<{AwWP~LO7 z@!VKWSFS!mv&r-=fW(ZCxlsmQ&L(h!<37e#aunh_wr)Hz(yd0(*B9A(eW&l4M# zdd`;_$&MEd&AgjFWC{%2@}=*JJ54M?JC#yh*2U^&3llgr&!DlAe&Td9^k4WL5B8Y+}8a99Tv)Jxm{Qb=ATIqJAN$syaJ+={9e;{5M!4-%WB0k;eTb1YXy z|E77LLX7Ai9n6$iGuY7$(nfDrAF>jBf4!(I$cn?6=>J+(b<|3J^k0(LTJC@d4{i5TjMlRi$~3jqz+s z&Jp?46JU(o-KT(mCz{SC9`>ndG}U}ocxL@6%8$7BhqH5oZbl@H4A;nca{KZhJ3d5eW8P*iJxG0M2bY&gmcFcf}8g(Rh5 zBpXpuQ%WJ|+lqY)mY3uIoQ7&49*awTt{=3-)QdQFA->wlY1xB{-;}%L|IYmQN8t0{ zg_g5OLChG)rhN^*7yIAFd-<4NIEFwmGe@QoO!wN$Bc^Cl)1=|=f8Ntn;LcF&8mu6Y zh<1wbFGhu?3n+{5LZg-_DAF_VC@G~UW*9_;lw_VYQ z2tBFQG>s0)c&3iA=HVhYOvx#voWvxf&sC{BN~hV&!>b?oj_#aaobN(o++>|Js?@AH z&VH+~^)*EH5E2x8O8IaJG97Z9l2Elr1sJH7(0kK}U)??Ucz!-$@#eeZZEC`*O*J zqaSErG?{W|HP`D;yRqa80u5D7f0o5RN${weI5wk%mQk&%{@q37pIu<;2O>TCs4h)_ zx(*hehcr(TAR$}&> zxmo;QRbcg&=*}c6BRz%)q_%l$LG)}Ks9INj27VRMxWis-%0U`BdL638-4^Oj_wIwW5!@QKuAQhtQ*hGy*O!@MjNPdR@fJ{EXsZ| zEARpz4fkJPm~DSrK7ZUX5w|~N5oG@BqqUYek@$hSoQMUsApy<4O$7amugEp$V4LG> zgu53=3cSFXnL*s}0t>+bxmlYh6jYry4biCg>!v(;86lX+;&x#xnata$;;yBob^QFv z`$D~*Zn&X{m`ZKg*5t?LDq z0&|?;mSxB@Gct^48k|^pkYOr+NY^WyjDHKx5!^^>dtE9@#d+PQG6?Viy;d$5%5z&M zt(q@0PaMx|>`a)_>ZwXkAxTE3j+Rdeq2V}yh6#5f^)h{|R*i;+O){x*Y~o&@b&U27 z^iYdT%aIjlgw$SMz}u&nW3k>sZyr1O^tpixJop9^z_nz^H=!!C2Wpp zY?DNv`>p7Ba-Tf7NV_V7SlV)O6KUP|+T=n{*bh%%%v7jx!uIExmfLTN{%$Z51_O~( zLP#0Uz8IA)3A_PDSg3edXhBL~AX0kT@s9og1vzngyFD#5^HZWTsJp$?bfU@HP>b_H zC8&9&;B5dSCdf<%GekQMMhjHalXzT|Zaras`=_A)X8SfXG{dH?xR8{roUIyWh90JX zArzk!pUYO2NyYYpsx(ol2gT04Idx;IBGXL%O>7DtCY6CG9W<0woFfUY10K+BVd-h+ zjOF_JmDIdi{L!Qi&KIpVo9>@_)yX4L60cSNKrpf9sDvmD307s`r+ex!_4*XW;f*?w>>kq#V|wBs^w5d zcX*oWsm{>ZWjO^3wW{#!6#PsSb5062P9dVnZu9YuNsHe~*@+I2I@GndP%^BCJNa#Q zXP4~6g!uhdu67Y?d6SC`W>eqChoU2QBVu&pBljFkh)IUTWpRSlql(rh6oPMRsHglU zt5_Svxq>#_mQ|AWUfOnodzgrNzD7&L;X#$UMd%hOM)ApO)u`tnk>&+fvtPc6 z8t2WV3vWi0Uvw{15fnU#r3~92%Z$iRo&!JIoc`FdA?wVocHVW-2ZQQIbV3R@qXp|$ zT*j20Dv}!HQIRpWI1XBThDFdpB^|Axn;PrrS(4^3HyJ;XbitBJ7?hM6Pu->mOrv_x zjcH4*^MwSk@D%XVbmLLNRU8;*)OkxWA!+I8!`DATH`pvn8f2&3?=_?6h2JdFBN!>i zO$VCM=R>!tOOcZpgj$0uci{`*BZ_D*FKqHJNK zLT}9MjKfI8>EJQR#XnLnlJqb^NW?qKP>Z6TQLUM0gn$h&A`HaAJ+kks@adu89!#nT z^V-NaCch<{l{QfmK}+XZ$%3KERH{o4hIA{)!*!K`RlwFl-Ex*p9Si@o!m69RWhl-RHU~xpziVJmup(<1p zWpZJq+(_eeJ^MYOwCqJ7VlyFuW}H|9z19|1}H^W{|k*$F#v zHRsP}wJGv0@V{h>rTPC$rabdh3S_I}WzdN}$ z>^TXw8l)$LMm-9XcpQ<56T^}j;TA0{FcgotYe^jG6{~6O*8@z>nAON7rHJ}OJ0TiYW)Lts^>NVZ%14LVH(I`~M{88V|$sa)&ZiZ;16+uWSwxPZuNIVn}h zSuDFTBA^--YIp#Q1r1EZ;uY*K{r25ViGl+CN3{MU;R<^doqkAI0bI{gkKyG~87Y)X zA-_M?bkI~JlkfGz2Bf&}-fN{=3!?GO)oA3+$>dk@K0d%9M4 z|8yp1madqu&#ctd%x~IzAFq`1#4hmn6F4S>*woWoE6pBvjpwRHhT{tx)6d$P@ z3^v3|4ir-XlZ!^Fm!TWfrY)UJ$h{#USD=W+V>KIBa&f=lAJ4sx9IFwHRPCPE8TGaqU4g5!Oo8nyJ&~+xkIQ>MkJ>cvnrC1}4n{6vfmh zHRj^7r3AiSbl6>CKn&>^lyF58tHb|GjUCGAm^GE!l0d3zp<3jqY9FX!p{`XdE+o{N zLRvkVnWIu|R^RNXChUah?+XpaRk|f4;E0Cu17=o+8ER zP)l)T@)iIMw6=XcZ9Q2Qq^3h@tPmk7CQrDVz8OYo{za z$VfLch>Qq!>Z z17ew`H4?lalBirxjybR^JmN(x-$$xUQX(^FSRG_+5hTYrc2!dZ(uTYrA5KO}=T1&o zH{SPa^|@>^e(Y1tYxf4&M#vRxEi@=7qDiE`sj#kxP(U}sNZ`$jJ8w_O+-hzJC+Z!F zftfX^z^qvhIWsdeY#z)WQRq*HU5}y&Oz5(YK-sh`*o(L4q?_G0%$fvyya>)I4c#D+ zl-$4|5R+GlP6jIss%UGhL7_rH9z(7gg%-yWX(&p|nVj(|Q9AEy_Otu_wIF`I%P9!K z3BdLx@<@*FHsp^XYs+R?V;o&($fPp4!Zud(XnL?0e~YUUc{&O;dnZDk=oX~LVwqb# zk(32YoXP`6$_j7Ovk}YBHs5Ae-PA~=>M$hi28b!?S|tN%l;tQx(K7Bb&3&*}n1O+4 znZ`U(FS}f~_1CImWP_eE(r7Lr3+V_Ai(uy<&icV5@`rN+ia{JO?<>`hhXe|pmMKXs z#?rAw!(=IBDA4zM2$4-zAhwO==blK9AbPJm*7~2-QIZ!W#v#`WwK<>sAL8BvD5|yT z8bv)OOh*M#qDsz6kR)IP1|;XK1c4z*mJBK)Sq2!CoO6&cAX!B~7y*GHCy_h|D48Ms zz0WJ>{l8m(-MV$_R$Z#5#-p4ap8a(9>eZ`zpR!dHdHBI6buB9TZT@}!TEdj693}aY zKyjOX8`qQwW=D5<;Ol41Am_I!G;-LNZVZP*~+R^BI zi`X?w!vaaY>6-gujvps<9e=0MOxRVm3~PEhhhAvikd4LiXs{{Wya-NbQYANH%lyNO z@R%(kr|f8n;uV)?aY`9+N(Dp8MfUE)e2rf?2J%%J!*3cy-|rhCvy2z1|tu%W%I6A>lTrDvU5(r5xw)!_uffQBMN5_Cz;37D zmC?5jtGRns6?&eO6I1Bl{w1Y$zt_o0JCMzB>2g=DbS9l>%CXqWz@(*39g3`-Hr95| z;}TO;QDTW?%uY@*8pg0wySz&>k4iW``K367Da;D2jb_KRs)a%Lum3ETk~b2=)GiqZ zYwQXb&SmbX9*XZqJTHEuTNz;t4M znfQRljHp}4P5s%bUQUNa8)gA?N}i{CWc1rB4WH#OcMiK2i_^njUL7>eU2UH4p3A#J zN&JJOZ|&R#-1cN>?eWiV&wA;Ve}0ORJw^v(?hM(^ub>*gm}=V#h4heYgN}A^;XC17 zn&~i)+^{uPh<9Y;_D3H>NsroFmQ!t-kGyQMC#iA7lwG)@waik<49g-bUC+k2+={}Q zv^OvQqBJpUj!w+{_E93_`cmU(HkK>5i@8?mJqx%7B{FLNI4uGL;_REgQmt5wnDMgh z?VO>E9*<@c0m$X;J0V=mRDr2M53!^2&sA4G(aabztmf47?tAyh?MDEsBQZM@J0N}S z?{8s-JOrYM!z3Qhj?=5h#uL0h^`l4UHrkymzokTpu87F%M)N8gMPzUcC8Q`l>d=V8 znzGj_pGT=QsVb^IzmOlA%lqeoR{!y$ewU0(JzBY}V|IgosH#vhHw$0!+diz4HIpYC z-K$}g$;2!W63!)e1)qxQ9Z4(f8O~&pkD2-1#L#<~N>2$*#tocPL+>cMJ5Pk;ODm5P zS97aa*4{=D@hN(G{B$E-!PZ;GQulftl*$8@V)FGx&dP$BixuMZ_Zu3sw=S8f>v;dU z9O4ZGv5@yi)as8`linHC>E>^|4xeDtnu)mMH}E>VP_Zn1Zq1=8>%KpewoNn>d#;7I zllYd-s%3|cp_8PlCuv0)-X9w<@Bi#}eoToBXI(=6_Rp^>P2q&ncm*0Ejrc8>e8R(_ zDImO)($ZN*V%b7~BW4d9H>opy!(d#>$!*tQ*{S9%2jNL1nJK3nUQ~z0)BpZYpVvD#gC7HKf!ah#Y~KDx5oqvrV3$hWhxsoB!{fD@;8CsjjqHLinrBh4)8==kX( zF1ebf;I7)eZMm{?bJIjqUE&Q7M_kCC6}UBg3(3H9hO>27=Q+bT9(SM{#4rzJK7Kj~ zyLM{6r88G@Ue)c8tLz>$ypvD^Vy^<<>&gD!eo{`Eygq^?_f*VL^b4vCoo72XuvYLuVtS6_8G_<}!WJ3pH83Nk_Q}mcJB%}BL!$&zWo#2)*VD#Q{(f^^ie0jx>ql6fQe#4V`Wge>s8VLIFzk?%ud6N` z8UJ$=Nqi8recW0urZVFnqqlN-*_>T6Zt{_Do-hT-Nvn5ux(H!pSXt*r+!wo@di7Zg zdv$;Qj#Q^9ov3e*DzkLY-jQx#&Yk`RqZLh%Re?SERl_wSO9h4Qns|Fdm`-+TL!@Rfe4p+JG_AC^v>Xy#bx8o#D zgJe%Mx*DG&SkGKrrE#gOR&LPj-M1foDX<^n0pnmS*N)Plp{sI{d&xvNJc=Zx51IX9 z{J0z9hTgP&8qN9`iqUIQn8@J?H79C^8kTaK+ylnEz3iEBDtW`hG^(XD&)adfb1YHR z!9k(DsJm@yd&_OFgQtoHots92o8eS3MmCCDQCh}pJ+*~%p@d@)nD)-9XGsd>`w^Y&6(ywoLC`)h6IDKiV6 z`{(Sr)>F_?spF`Q|9Sj4T7ceJsY~Pzf{Vkf6x&utxyv(SSTdiR@gh}fTjr^b4=1G9SbX=Fhm1B9IqH=`=fW;bGys9X*1&M(FN>t$Wu{L$RVxKHQi1knT; zZ+PvX(`vUSetvfo`?_}8r`CJVVP31Mc;#W7m@M7a>CO))+EhHAZ*Wxb{5g6SCm6+t z<6keP-XE>a)h|?3{pzb%5%>C;-{wGkMCL5bnnlMhTwTSm&G3`@Q!VV+w=+j@~YkkJV0i2~U@7%4*+P&{5taU5&6tYSm#!1gBh{!+Z zq0UcZL8;sSyE2;KU470!Q1Hr_s^M3k3UUoR^@4(&qUA4-&X1Sln#yEPep&t5uc~Qx6_i?^@|d83Mr-ro86Fkq*@OxId*HuE;rjqFP%I4W577iI3BMfr3+wAS#K z;k@TK<^9Y_h=>#`({lcan->;)(w3{xJ#zg6@naE8f;K~1>!aRvEQ$%V5<)z@{JNtx zkc{#es-Um66%V7kY6_3&3+SBE706qf`xA%$lP$%Om04{m;|;y^u3MI|x4fW$()}IZ zU*p&&l?PjY3EG_HZJO+H(%g~t_9i(EJ9f#FKoONQF=2vIG0^x=CKnBv+(ot`Uc7Vm zYO|-nuTR&}LzQ+;dh~y8g#aZ8vZM?6w)St&%J;5J=N=9`{sQvrNZr?&dp0*O4`K>L zD=L0o=s=HL(dH?X6A5w49q*f4>Ft?cjip&ohT)D(sKfmgIxAyb);dcy&klG8cxq$i zxT_LUZc~M*N0iCuF7y4duB0|Zd4YRvOzSDM$UX19tbRXz8g;vl-19KW!#!tY!10!d z?`tbLn2~$bX_26`dA&92=86lo8*#RnOAbRQ&==dZU&d{W_mgI~Y$;c$uzk^{{pgPL zOt1=#R#Ab%HvE42s?MzvaLk*g+bx2r^x?^fv^U*C=Q;`A3Enh z=?3APw(G3UuPNisJinvmIb7e`@@Q+Sy$Z2(U$~3fFx;2WeXb9MFHiISvS|KVp;PyZ z<}mpQ**xQBm9EfXf$z_X(B0`yQTqGJl<#K9(|G9@_qsNAr&49qOLgSyO}}jk(~0TX zdHJo?{xwIx2hFQ23UOJ|44!K6qV&kRZ-u7()97KL6BFx`(OM&z^u~FGPM_tbEctS0 z^{4w(j}Iea3)H1{%jfbvW*GuEEux2R*LnDDl00-fD(otJl>(d;#8=hCe}6tteZ%U_ zL9e$H1~$X z;nyC|OafBkZ#OiG&ybHLCi~NOMrj(bpSS-fGffavtgVhy*|H$yl#moJ=C0LGCDUXjArP-KDT`5c#Qv-CTMFG-Q5M$pwqa-V*iK5!z^m=GMX4Qaj`BY(vd`9Tu5V z5aM__ux}l1LBjAVZ!>6ebqhA7Ok4YOgErQ&;hi&Sp@x=nBWP)BZ9X?M!A8tOj!UT6YyRtBgmd94Scy8*-{dn@ycwK3HG3@)QH4pohAb4X zJ+;1l75J5f$~3_#yy~%y!;;Jw?8Uc#)mLsW;c+$}`D%Wb`vbFNfMz%zqMJQhVeHtM zME}1X8ZkiihdVEAacd2%6g5s^R&|QHe0-}*l{M7g1a6NH2x3yK8k`;|rDoW*SP#j` zOB-fp+E}GGZF|jc{AR83TsLlMmEp$iym-m&NE9)4TzZ{QY+XfN_$Lb|d%}gFDCU+g z1DUo4?)TQ7F)vY&qOcLEqK){I_5b&_no z31GBuMR{kxFk+J@bJi+4!lYMG;elnZZt|tujyDm{q%y#8( zt&X-Hjihq?G--eEif1XioRFbg1Sxboy_&wzOQ%%jKiV93Icv!q*#V^BOZf@)3Q_Y{ z_iJustkiJmCviQ6J5rX0UMd~B>oQ;)&WbS^`JLFn*+fI<1TQfIx zGYFVWO-*%zwvw429pkGVsqLJ`i)|`{p#ip8s!|E*BdJHeOggJ~Gw&Nj&T$0vT=xD_ zxcGQiT6?jMS4>RNLZsi#q29m~R{~857(K7e$;#cpy{@+VoGYz!wlF!`QLor7!3nEC zC$7YEMlxmJ2N!%7swGo8FsISpMT$chJnTw(Iww(6wdu3;oY{Z;$jO;PG22Rg zUvBr(rRQOC)5@a5=!74O|~YxKgp`Qo!hS?gY!h@=Ix)0%Y-3el{A{~{!UI^sx9?!Go0 z^;)SemV9%ClT$PKmaj=LwcD_4s6Ew*mWF%Mb7y+>3ME1YINX;{r?*7*6$ptM48J_x zWij6Nnl(^AVNqJDWcD^pridFk)G2PD6R|kqkUbSoUw*ssyQ;nQDlah&&kKUvhr{+%I`S*B7 zkmG$}Sg{-nHjCsRXMCGy)jH}5mAjwai|(ariH*N8Oql_itzyiagp}y&%7Fh92m(6oK>6VOHn~5Wg^X|q zj!LlHI%Czf&MVI3{#(}cvFYsUsVYHt&Bvc@q6Y0x;MAmy-3<#4ZKdMer-b+#*^9)X zG7?WZ1z=K&g9k=H^85*zWJ+@ogp_zU5BZU%cDy;(>%-LFyiGqNG~cmTmexYWA<}ol z#UWDEqvU?*d5+(9(Sy0CGfyLskyApXmcxJFyo3)RhbaW258XdKz&EemFScg>F?7ex zbjN`1*I`+%(b7kZ`h9zjt2U$ApMB!ym$Ez;cc~r6%$aW}6{hwWNOCOl~Tk)pB9WD3yajQ|&ri%laxUDV$)7cx8FCPRssd zpD!{$9pX57KY^i_={kz)6n)0XyjXK_-m50i8EdevcP0fj=Ge`#bi3|{b|29f$e|iE z-(>nNUa(fTn4J0|oczmJnMYUC**e2J54gg>wWm%tb+@U#WEoub!X;00<28bK#057} z90+OpzoQ)Xcf8tPu;ZPcU6e~GGJ?LXVF=VFA z{{BeU&Br5)p;&nz)sB*-h$|;L6T}3wcDb|=ei%bLEI!z0|+o;Z!Q(KN^qieF31M-KRF~I>Cu@+q;F?q4L zJYTA~d=33Uwdr5`#NRG>FDc) zO#E(?(fI8gQ#ubEbC`PU!hEjT_x<~G3qtO%Wg&hJH!*r<=0iDtl~_rq^NzLZvpv?- z!b~Hy(j}OEO)Ir0Uhw;N5xTd;SpCd?GDU`h9UXU>oZNn;Rd-K`82mP-$hqpVm3-9b zb!M!1g*uJC2k96N-6wpt?h*bpQF6@6X=b=W^yPl^zI7390T%=8L zXM5XIN7DPPfli5PY>6i`HfUAKV#wmTV4k#Xg1W+(8@A|<16RP*Qd`-S|$*&kj0x%Zz-ja9#p z;wG#QT^F@bQtU6&AFlr+!y!vjgT<7=cw}!eqdG!jdt~;+R5Z}dl9^v{UL`jbI^Noa zre&ptQg?G}Bo=HcgG4dgmnFf#(Prq%?^7{z1P7_y!P%OB=_N8W^E3M_wYP8Ix~=t_ zt$sYpV2;OfbFi@~!Iz-X=VSWr^EhH}p`rYlv&_eiY_|-QDqtq8zrEAhYQy6R+ZJcc z_rCHHJ~%U}QUv3d%QQ3`9S&PrJPm8qDRTFJ>AaZEU%5`6NJAApq~_TetP=Jp^IY)$ zE+?QtX=VX0EyxQv&x)RIi$A*=D`eX<)fy{kJs>db&@PT-?r8HDY^_-4QO=2F6vn>X zIvXoN*VNHjXwZu>OX#Et60)$CN=Oh-4Wj6kdfUNG`Hoo|%T=yx*MTC!3f(fRj`w!skufnG_+m$(A?7f0Jjxy()iApx&7Z+b%YE%< z$FR$2J|wURmmnx^LK_+Hc8eV!{i4fNOf@n#A>;Gs&m40OTN#rvTO0ADU%93vbIX`# z88g-IEyVOi7YW$=enUJJpnqC>TEA*^jyk6|E1W29Pxevx&*uDa+%JiiqX9ZqQwRh# zROfQLWMOV$t>|_iyh*`n%#SJuy7J)7r#`#e(~_1yJ_n%ti%sQEQ5^*m^aozigjPDV zP6=b;o?2?NpozAdT>Qk@#UW>4^Rm`!fyeVVN!ra}#|FrpvtjN1?-dRyilSPn1*4j1 z80mgWib`l_2GeFsq#kJjct_l)Wy!Kma|wR5)2(vOm)QPw7uESK`7Yx@d;e04E~zsT z8%a#;l9GmskelH`&oU1dJ&sawv5K$@-gfOLpO}{6lO4rD17u!KsTXZihYSB+YE$Ex zj@I>>tZ9t}whON#(#*r8FCBnaHfw>Xo-#VkYrL(ukYT%o6fge5XFrC0f6*XY;f7wG zYSpQJTI9Si#9lXAc;poI%tVA@&+999Dd_0*lgofnt@$37f^aYihuff93p9-C3T^ElgW%nPgO)#DpG9MbZjHS8%Sm$l!K&E$N!OJB8WQpJGkZaku}^-aCdu7o zctfql`oggKh~cA0$-pk<35`5W{c2xPQ9OLk-IxhiR`wHY7K=n`@v2pG)eE2cCkr-#K!mM=vllS?amLX zx?_o3&ad#3DuZR#QIU~p`MTvCoSdB7eW1;8Y!jKDnJcX8`Tp^_oORW^cS?|2N2J*w z?mNv}Vm(+EXh7bQx@_J|DmQu1;fz(7na=0rmb=2ZLouurdOmWrnDQN$pH<-y2l-1s zwJe|UXhTe|FsXSdCzDssAey@zk6m%l9jlX58T>d&@IjV1MY|yH)sfodf>-h2G z%suy+1ON2Ya0p+f$-hq z5})o_!6S&rM~?;`t4Cqe-}D?Or=A_>rE1Yx#aM~m-ch7Z>#vdsyNJbuZdBSfM9lit zxpSCqr*E^=*B-r388^xqL+#sG(&Mv{xsTv#AUq|K))xy896FNay|+71>69xKd{LQw zJ~KUi*7qp=t1s>|$^I)aY(V5{dzrQV(=k8lYY%;1o~EOgsJDTzgA{x+9=ugC+Jff7 zu{!sfkeci9!Cz^nT^ahx(pU!OLIlcf*j6heRUi!smZ@oV z1+TijtZ{ZOF0LXTj<;eufhbHQt8xDl(~*fj?2U#SDj&vGz7(s?*yD@1nkiYEiYK)0 zOP&T1BtB48#*bq~YpG8vv;}9*=$KZ>3pXJ{|;B4q-3CY*V?xK{-q-NxAoYL z_}<*a&ZT9?-4FbPR8LJOQR~-7^o3=M0{-QE$X3tXuAuJBR?XyCthb|;kG;WtbrC#y zl&N+)fsN08LMGm>@siHFhrVek)-0p0({%cl+{+?T7@FhT!fq^Q}?Zk`{bN(reuJUiojYxy(XIM)v zVJYY+lJB5c$LDijZ!Vr>dsr2y@ugWtqeH}}(cUkYj#OpZfd9f~LTT;Jjv6JUb9xVD z(%ksjsgbBS=)kw?*^-gCRm$Cey(kc6lTz@!jZm1eeV@_W{mOxTMd7WQCfN$npAm`@ z0Wx9q;-swHiPOC9?<*XpIY2;fXJ^;YvNqFkcWZr7)7iNUqG@(dzHaou0@N-TiOEEh z&4V28&Qq9fmWpv`Z1NP+qWpNGgSoUDnT_2+h30>F(xd{^a`?3`RGQ`}$RJ zlP|CBd#(G6(+A3J3nL>bQ1_=B^7X4UcDB}eHJm}<5yNj5%4E?NCqjarcU7ZUuuRhK z)*>^CPSjB;_J(zfIKA3XfnJ!3m_AFYhpF})lL&dtL6Idndwj9cnFV3jC#G?BOxk={ zr)#`fKEt<82<3N9TlVC9U|=gsOnaj>dGwCxWGTPk2Gw}vQ0eXe)*i@MDas1->S1lD z!WTwr-2I3Q+Ph!YPFHOF2&(tqYxmw&MZ4S9Ze}bkF9&tsp5#3CBJz5qryHl_W%xcm85tNBTb-F^n0-FU3B?`edridR1NDx*S%&C45ihCv zXFU$?=i6kF&yw`$)RmO}w)2WjBQJa)R=f>85VBmosnTaczzJYVAAUSo^Xt=zhL)dw zMR&b7@rF#O>({SW&Su1N2wYHm@Yw5?-^k+@mrb?r9BSV}HEu!gT(g#)z20y8pBF`( zgH5#WH{scttTEq5vp6^z|Fkhe?Cc<}1)--q7D2#FC|1o>=>Ft)A}S%F0EsR5Xx?7t zXuXt;ji(6w0o3^L{qFNK5?XoIY6WpV=?|?+xBYS_Rc_rDV={hL4C+}r8tp#e;t<1% z+Jl~DZWXnI)kS?3cK$0CT`J>li8(BG@{>0X+vnXA@d{ho`G|hcvA?wxplgnp1@D7? zDP+bw>-aS($CxF0Y9J#U3T_~oRgL?)M&~QE|HA~uk3Nd7 z2}vo-C!(m%vsQ3v+vEALCc(j`D6A%SJX+Z|c$?ZzMY9I?)Z(6Q!1WI5-rPcx(+p4a z@s>Awm;5G#wFBGQm7OC~`;?N04#<^EdosG{H5i(G2r&QsqXD!QNgDs>o1zd|nkghg z7zk}N8aw9&; zswtdV6A~Y!(GT!*S6TDdqT*%d&6KSGVYrigqGjUUu0Me?U$Ie?ZxiLYTfwC zu69lI`p~|7jEwCPoQy+4LgK3DL`%hzu)XO}xOGn{phJUbJ$sZN(Q2R+MWy%Twy-!1 zq5>swK1N!j@QjMzlp}`jq_aGoEapCS>4sXtix zMTTn+ZxuNR`I3S~4n~G<9*7Kq@Be;Ois3rCwMG#eVmxFH&<_N(#VvJlvtRBqSn4Vj zSafBe2TEt{aAzS!foxZnT6zBOVy=e!6;X31a_(#RmI(z$SU)Q^0o;I+(C$gLbUwX1 z%8BREIc^cmDChe;tbz%Ty)MyQY)aF7Bb3&UaFf%Jhq3?YmfV8}OlZrm|NBXkk#7|JNj-N-J;!dm@$7;3w=O3^q|nR?(XVp8pB5%W_L6vh zjytfNmGWie_htN-aQq7s6Z{*WV3CW#{8Ej9n4)1mto}-qGhRy>bNy(L@O>u}hLa1Z z1SQ{d9=H3C8XEhP_E`xsX2kv)K7Y;v=zi995CW7EjWkwrb0Q+lT`!BixriPGtyHbW z#io}So3W;@%&5?2P@*+*kT*8VfF(_CFsD90AX*Ok`v)T*%c_e_Q1%q#?GY4&2{!R(y3}yLigMhp`{&{P`%%IJ zPOc}d%$^U2E)ZM1zDs+8xGV~FJX4%LtkPxC0L6IM$voyFbE*#$yXYHUd|(u)%#E2* zjT5LVbzG`!a#Gmi7k0wKP07<=*JTAezJuvT{irrwZRC%@vxSPB`x@VCc1Jde80itFwY`8&^1ZTE=BGWH2V2Y++UTf!M}|7K|?6 z6;#FjFn{~WQ$Mjn?{uBn)*qe_DSnWZl}E{lT6#awj4@%?Qu|EKaU+m{asOPjr|RSj z>tCL{lmb&B{JRh?XQ;vfufCohZ4fAv=)JocB_g2qd}njZbP&tL$jBHS&lcS+g9%Sh zDVYrZjX#~%LiZpdI6OF9>Qh0DZ+(JTOv~nu(5)c*b7`*e=D#uG0k+!@#Y6!NeSGbt1Bii3Dm%8 zOW80sWO}AO^)9d)2a7H?#oLolvL)Rue3kNd!UWr8JbNUwZn0H=qq5bbf^(f_E$53T z`R>o_U^+c4$c25Wt2Fj+S>Z3R()VHu?-`8-n_ZWuX2YfF*>T$i`c*65d+Xk3U4@1n zRX+!c5xkBro%tej^w2vgYt_5Ncf5+jWnVR`JlO+(r*mHC)n9nch}SB9#^ZW|Suxq< zuzqncv&$D{BZEpKk4xw(-uvQZZJJx2Z_&WGl!0=6)kVraxWo6L27Vt7C|z)oKa(V( z`~FEXIXQXYWT95k)vNQ%n=7lmhS7aeKp%EP>`~b=A9`Q3yA)kF!|S~%C%o7uwn%^k zbG5u~N1j`^ERsniG&M|uBm#e)FQeiAX5Nc^b)H}QXs97Zdh#s{X?5SF;%{7EIJiD2!B!g`7rAd6y;R$L%bef{% zj#_4-mz!WHMH6RJV(Q|$m*L|}<#-KDlST?9#u`O`9urPctVgb0kiyp(Zi@QNk~P7m zTV&vpeW2nlc0o4>jDvGdW&fO6s#@%tiua_Y=io7P!nCeDMExzfyI~wK31fX7C!;ip z2tNf`POi&qVVjNx^PSbaqb5ft=x|marn`h}Yk0hG``VxWsqQskfWidGYou!AQ=U&u zN{-%X}CXh7O;>2^{5G5AI11YC5q%L=8*Dk_#f(|oXnq10vW zasTgTKCB!j{bl|@mVB5h+U@E7)e?>~LA%OD6PW`Vg| zLWbF} z_v`K{hjGu{Jv2)72vx5C3OMt$xYGTc3T zdwbmcDfg>$Llq@%D>KSn^;yoqDva&*`prq6Eu6l0$w9s?{QRWIogEE^MaHkKmDwD- z`g7ykuir4L411hbC>2qO-8+TG_o4B^N|huxhU~>Rv6C)WThH$5bDn88-Ap_*^`)J( zHm^HtjXgT3x$myY$kdT0n*E>u%*WXOum8(G4GD^qk!5j1XikHf9khiKpP!$PK`%eG{O6j;?itzi$ZuC^&?n$4H)#nCmKY}d zMPszv-(={X7(q({P}5J`R*Bt22o|S8k7enDI|9m5V{`0||`G0r)AXD1E zGcxtB`dx>}l$K)zEK*>o6zkPf-_5?BO|pWI*Rcc5i|dv z9e|J`BO+2EbNKxE9-=$0aa&a>w;7(WuL#X$NNmr99LQpR0Ed*>mX?+xf#V1HZ8n@z zbT%!5V>Zx-SoEm=NWNCl9Wj>$H7~DPBO@b1o;JVJ{D3-=WkZ_{6iKsi_6`m>aMJmP zbp{p|7LcUNDkx|`ymT6{$V37_)PSLZK?0UXKPQAnhyx7XtIiOv97*LB#O4KXfJs!g z?hcy-E6d6TYUJyrL%2eIO<7qP{VmD2(s4FrcV`P}5XC-0_JPzG08^NmBgVfTHAspk!d|OH^dwCEdC+GtAKV>cOK$8;_FBl4c zx_^K%-U3cUHBZuP=F-+qI@Yo~C#o@&E?3vKMjd#U!qQ#eR97hbAn4E%N<}(#UNz4? zK72bFfx)cg8t%v<_)kG%bHaYT1WFDyd%IgC$R1XA)<>buG*YJwoM6mN*W%q@{*qa6 zwUY~nvJ^xhYX(+7o668bS-0GV8yD9**X?uQkRwpwa~ZKVq6=P{urOXbB+A&k@C3TR zZ-!QRet(_g(OwEgxmIw@C68M>0gg`yD`cAv;w&=};;L8AA%t4M{KJ+HH^Pr?;Wo5u0vGyng*`--Paemae7u(c~Fwb;VgK6 zCmQml>+AlN<4CtiB)e*8#qK^bt*Rbf>}V}~Z!T7^d6oZzkg zR%+G1lvTLcQhC(m@#7dgPgUL>2RV%ELd7&kx_44FY%%q*-fX7v-%r1auMZj1HnErN zz_yHm9Y(Lp5D*YBg(@w|(CVSg81IA+bff`l+2C7=Yu>YA;P;uls|-VOWz-CZ?uQ zRfSM?qgR#m-xJZ`ty#gKWMs0G3}Gbe&8c0ba+voe-n3d-v(&^JRk^iwAwje|En=#B$11wM8=XkZ3I!r4deEa7nNMH!nt`1PuV;2-u2fGP* zV+K308&nW0jWo71(RJIV6{HhKo>$JK1g-R&B~rR`iHK;=50*zajduVejx%lwebppn zJ&*=*5T3L)q7OM-aLx(vdXVhUDyUtvp$d%d$$NXy*9Yn!889LD6<=X3DW|80GDD=H=HpeD78P(5s~7#JxN@o{F?S4FPb;2zw2g zRSr_V?rc1ub`KH6*bE8ByH35OzWMZ>sN>9KD8;pyzzju@8g#*^JD2&P3jVk~&>&dO zd`l+H+j{F3)FROcv1vx>P_;w}&#R>7wzeMdG8xF6NC*&P+wh_a{ozWdF5paD;xx%= z6JblB%7^Ub?v02{$E6}AJVvQtHfQIdg<+65u2~`bF^Gy+qM%mDb|eq7L$SCi{*yVn zrgdf*geabcLOTvAfm&@XfN{)#AUzD$;=tteG&Jisbx>;RB}XWTqCosjR37wRYG#FO zQbBUZQ5*{1qT|8*SKII-wVwZU{dmKe_OW?^lHy{=21?DDCI;`wrg4!G{C&D`m~{!B z;sNZsu1jWd$uC~K_+<WK?n-w?sf;S(2LEK>mmjc6KRg>s*GgSBKY@v)it!8SAZZ56Hn9kb{4%`+S&>_i^e{$Zj6WJx-5=N@_1K69z1LV0xiBU zQ;`eZ;?r|!G5T3YlWx3$N>LFPUWTvCIu`m5R^FG7ktqmB+b{z<&U_0Gns4rFj3EB~ z@WLxMqy`}va#7eB>8=SkId|^dS9s4wt9AMC;}OqweF#QIFu`3ld)=owf>pXy$g1yJ zL`;lML1pOpo8aK!aUw%j7A2Y}Qu9Z5l9d7`A=;QSy#D7C$E-ZPIRze=%VlR}WyN@5 z(2kbh=<};5qvK6sLiI0Caz21xVI)aF z@i$DTeQ`Y&gfaIBz(y=TxOS1NWCZj>I#!&;fnS{n@&JMO!G48!Yd2Dw+uJ44n-8y< z!_pIt3e)>b%ouPB;e(o37r8oPL3m-*uf;EEDf9Jatug8gq$UUNyTkSqYf%nUU$Ze@ z;5=YxTn0#4reK>3DN1anX#)eq&c>Eix-Q)>Y1OmQ{mQgfJW_2JOXXB)8x7O-BRz!fKm=`#L)bL})+GQnP|YBj#*% zZ`g2$s7XoBPs9TG69aIFy!-f(=ToI!3PDe~^_}}orSmo8Z;K?hn<0SZiJ?vTpvTsH zxq1%+v5ZLGg5;b8-V%)-h{aOp@FlJJa1Gq!g>RlJLQdo^V9E)~2ho^OG4<&k^X=@&4q!?pA}if4%3a2NXPT zo^8Dp$S$Tl_LqR`l8N7)E2#UUHsWAfz{}ifl>uBXLq$OmS+x*u3II%x-T0t_Bj6F0 zdx*rz=~T#HS@aT#yjS%pM>VX!CEdP9QhInWO9A9yfg4x{2m+xN>Fq^NVb82~NHd@~ zbbNOb_qH=zG(8XD)bB4I_n5kE{bXWJBS(DEtafkLJzu+oB~D^}$e|mOa%53|kD~0$>g8}#6kg(Q?@X7g9}H$VmFTLZ9qKVGi<^d#X^Dn8bGt_f zra0HOUM6HIOP%L+#D9G_G`7MRb~6POo(Q)RgQ&h7?mMV~0nM0R zfBhi^VFZ)_<552gHtb&)A0Hc=3wQ{L@yKYvl#~=g8tQz!eG7Z#+JNOMNMjG*$5~hDD%O+qH-(L8S^x$U zK@=i^HbgeDE=e1kj=cPZlJ>gN#FuqG0ExN3MAZF}YCQrSAyF!Q+#2&s{D)SQleaDi zTnZ|Q9IGJA02D1NBclXXFW|NPR2=+zLz(|!Sye5qEJ#gfV{K~@dNB*SDA{=jXSoMt2HcN*^XvOVhjFgAfrhE{sV7jG&8Tg@DqvjW)l2 zeXuhnRM!Ek|H&1CP4C9<@m$mDTh`7KM+ZXb#AGqGyCh3}*U6}}A5Ub@{LBYYYQ8~@ z4x-)bE;N)B-}+hNae{{R1(LkLsB2GQFk7hJdV_RdcHRv9e2kE-!M%}+=>*+uPqInn z&i0L=Aw8rQ)NOMzIw)!sLN$U5!Af;pMm@`sBuV})-+lV9%pp(M#F=jJU_Q+PX=VWB z?$*1I`V?*>sRl!ZE>GR!Jh}BEE_oSLHa{`3jNs;Jo*| zI;b(_imVrhZOTcjbD0ip zBL58k2F^z1E@PfY)b;7PtB3(CuXHKS0hb=F=4d_-DVNf0oxnkbdyo)zL*biu2d*{x zy41&qO2G1V^0(hi4dSVU!BLJF$T2~joCAP*7O+C_#*Lwwj#QY!Z`=Kyk_^`bw8+&? z*g9^IEja-~i?pO1f_zp0kV_3{8!Dk{k&AwFt703@{|~oGckHjo@bF~trgOl35+Hf0 z52JIl6||rDBFoSe?=o`fx;In>3xH$Lbd&thLA%L6Qf7^yUY%NxP35@@6cnJQqwmBp z%dLPQhx5vnu`DOx1|8>dy)V1!Hd}24Kz$Z?hBk{4HYResa2Jr4M_aDPB509Qvw#3D z;A%Eo))PcFnOD-*2|*iny=e3JZ}Y{G8WLy_S72p@jCA*qm#Z~6lOJfA>r#~L-rihh zTmouOZOmDgypayDND8pOmf-C~K_e?*Jy80JG9|qk(k~qyoqLcXBp@2U-xBWxK;K>o zYlidKw6B5vLP`2@oETOwDMZV2@gxv0_jO9m!|7co&LYFZIw8YljK-_CGUn+;3I2I} zP$AdY0YHBu6QY1ZJOnR6;j2l`6$_Pfj-4phSxB5tGW@DGX~r0h9+T9AUE~Drea@wB zE8Z$rKlwoR%-z|s8#Y5(d+-{!6Gtg@%5Cxu_ej0OuFNo~t037~Yn%xG589)^L?N{v z5|nClC#-mqsuMidw8d8ms-{?x(mP|Y^f{RBLvR(LfLMoLRYNZNnpY!v0P1LQadEGz zAzipm`x<)600|`^V=Qn0FI-(+;ptTvfSCW4dZ(fH4h~^R5u`0Wzl@Z+!0MS$fQXR! z8l1laea^Qdle1yd`E`|9OVcXa2_UEmxY=AJTLBGh1QWt^03*tSQz$g3SwZS~AaH0d zIzzjv=a`y^SH#y8^bWk?^d=dX`PMZvM*x!fnFsZjrz?g5eV!F+%u z_h4wX3b>A_sHk+?x*ZMM>fbLREB@pJGm)I2A3$wyqe+ruRY%F20(M~S)>q_)9b&3I$sq{|@T;;+`B>+h(d4x1@*Xw`lHRp; zRl{d@M(pw2O#}^CR-j2bl^5+G#TS5l8l7DyNfydNLH-u}4(ksm5GI=d!C+88g0x-` z^8gssR>-x?kldyOY8nx5-&#OQj*^!bgjjSP>aJO{Fv6p#_ldoIvG)I}?z`i9-rx6e zjAI`oNz~Dhrdg5-l~HNXq(w=47wvE;LJ@`2mbR9(7b>MmTT@HgT4?Ea-JSFKet*CJ zf8+7<_?+{2bUt{$U(eU`dEeK4UDtg-)6BJ=x}a-!ZFt4CHQPjdr_LleH*)x*#nTZ$4~z$E}(j{jK%)7Gx{bjK~dX*mHp`~PDJ4t-i$wR{yS)i zj822tS`~}`degZ@QZDMk-3CD2)3I8@i1o?6?K?fD29YuLfzlQ3^Qg|B zLk#LKP#pRYystty z-xQ*A)s(!f>^`kb+uL8mkU)0`3Z_&|E#T1wT;$CCt|jnz65*@#1vB_D-2KYd7l?x# z-pUBNnN52Rn~^8k1q~wRwX+%cpz;VZp16GPtuFfMoVy}r!|#jBz)k85+)+F+T^*ys zOV6wJu{Km73m2z(<^&PPYF)3Opl|>s>4W7rB~I`&uUcA;*rGHrCWQ;uN@lk1MX|MB z=Lamzmq0G)e}9Uq33LI|X^Zi%AG>qKYqVA}3hYOXuv6_52(u@MfjJE(-+Cj1#pCzX z6ikP8298ZEoHkuJ@ft+jc;w+(?tou;bJK^(J4^BD`|`z$GT~daL&bgbN0bE^mZ98f zb0Q8wS%Ro-%yCL1!jL@C?PmE^)zyLkyfk)G&|&$JxOHf-;HwMQX4$(d^+OciD@3ZXkb9OeX6PL*QVC^ z1iXEk_x4J+aD~2sK`i|FF!j`K3tOCd2C zL{{2mM>UZ_1<=|r)|4h^5(ED2^P6>(G=2jn(pX)4m1lduIJ;o3RMaw8RIVOvmLPUC zaXbyn*ZdxJ6Z7;lZ`sIEDFcepu;dYUAN-ams=7z)Yn~ljNygCOyt&*1C#caUfmn6h8n(sQolC%*EknMHd!1CRG7Fu;+_CXD|jDe!RcuhJdQEX9(lL7GWfSO4uIk`Ebl}Y z$>T^+mcBn+z&98{-CgI9kn{aqbhm6~-lx5pm>Fa1ieN*?TXxN4y#Q(SXMih)&5<5z^V=fhPy{W|~M7p?~ZC*8_!GS8BOVOeFb{Aa1tRUVD(hc`YB2c@wt*4tS;7-EeI zx883&8i<$(oI1xA2VNO$3@<`k#*@Z<@4g}})BtYgI|w|Mr7Q?*xlMED60Z~3{F(x4 zQrZ#{*O5(YtkbNzWo=jZt#8Y#iFBlKjM=5I0jsQw# zy>Nqw_Ej+Xb_0CgI|&n+#+&nr1ur>WTzhfV1tH#{UlpQbWw&hg2Li56MIu}Lz59ZM zRO{_O9>uP<`6Y$>6{0%Vw8>k5JfP+h6U+Ta2FC$RPdtaA`iphmWDx$6g3F6hYwDZd zSy;0SZ=Kgqt<~(%^Jj4a-kR%@u!x;3jEXAQu`e)oOsWfG@@JnkVjxxj1aPB!wV!_P zO-Ahx_^!^Pl8brdK%lc%p+*y^_tW**n&I=gICvOkN2clv_3hDyzHb4={}{Pv6_N`| zp1zYH?$oqw4OOWoSIx~PC%ksHDu8~qg2y2x*66a3a?ZfpN?FG{bTz;H>An->>Xcew ziiqt9&y1%_QWy_Km36qOBp~41X@(QV7}A;KboQ8d-pLbrc?-J6nzNO@^qMk&Q*ND^X;W&P`cwGjlN@&~j$(WFM7Jpc$uGBg0`U=S zJ=B^urw3CQibv8QG|?qMRCC zP;SP^FtIB#dCf5Cd^+5=c5bFa*bC&La(w0P6Xt(SB4`SAjL_+q=v-{<(hf?1Tg8w|>?4D+(0W@Hq(O*m_AYS9>) z`q6SlCy9SoB)WPAG?#`%Y@6H{6g+&Sd+)oieX$93i0s=cfRTw1h1kjHpDd79aa;s2Z;sO$8lnA??{p3Suh3u2a!ST*enqv3PT_p^}o}FP&YTa_pFP znqah0#pH%1Xfi&Mqgus0=xP4Iwu?DJ(ja8PgbL6^4eH%n*^qx#@S4(`jB27jT6wRG zGC$GEDlG`Lstp z^+xx;wIwehNQ1Bt(X~^@E*QtAR~BBNdG`97kgBRGw$T!ii0jkL#&_8B8nA?`V`a?( z#<kMPA$tyxM<&n^ z+bFU8V!wWN`ih8$yrejvYKE>l=_)i?pjklMG~Cb#bCEYa5FcF?1P(ODpfZSorJiQ# z`0}c`SCjCAAh6WjqX!8ckb^QSGSv~n>zQcl|k2? z5VbwA?W_%zT;_r(D>x-Wc#pjz-8!qq7&lu)cXI)Y&?9|skrfA@AnP&VI*ES%5@ zV0o$M-pv&+BWmmZ*^M<;M95nPvgZjhZrc_I)S^;%|LVEI{t2qg*DVvLa0tO%;0iO4 zi08>GNPAW9mIXu!WPf{mE2Q}@)4p*Cb{h{J$>AW)N-7fwy?cfE0>NHt*lE~8DhXAh zPwjvl&9rcH&$h6XIu0VbzcHb*V$hyV<6uqW`kX>b1 zSuLMULc@CUhDSrFBa!-Z)A)P!$1MF=UE(E>^V@c?G)D_uoQ$WYOmwoEO8+B zJ?0HLq-MVDd8L+0SRkcd`%ZDz=j$uI&J82Hc%5u>o10yy6U0*vZ|fE-R*-*TV29Ll z-z@T&tPcM591OQ*WB){7fK5eo#=>n8LY0#nRtJB_#ZRpOq5ZeU!FJn$(K#{m>=g$< zMwR|!txXT=!(|y)vxufw75yGy2qchX;WRhmjrl}}XB3SC6?Z-d%rODPY|ltC&Ch0^^q-R-_dEQiQ zf^7+Rgx9t$Bvncfs?SK$H&FgCB8=-KM+|;dXB0U}PH}j?hvmYLF{x#W? z2@%ZeoCVnD!qz*K6OEx)gnO;!@*^zh^8M2}h(5x*pe+M;g@cS#HJYVL@^2xta~XO2 zc^L+sZGbzD1voVT!c)@pX^BxIW)CnY3RWNn!FQJ~*LUL3!HF=H(Nb8d#G3C%gqX7U< z{kC+~hh45CD`~kP!gz?XC`cW|#ZjgWAs|ZCVE27>MoEdCWv-KTZu7cC;bv?UK};c> zbk4QZMwvP>-8QEUp)BEpy`HYFE#8M()EY%5n24zx6snGg+8^04NRY3V1u*?q?Lyqe z?bsXs*cRw|=YazU0@|R2&35l{i~gSU8-t4P@YDYygBK1WR-L7a$kXhB+y2#C_V2XO zub>gpc#PxDx4V$T>WCMF3ZjB1xg~rEIa_NA$4tv~$$hgX-_+}Cx#E{^*s*b%t(t#+ z+Cj~ko})%|HcVG`NJ8BYkB!fiA(B-fqia279Ap^&?>D`$yTfH4_kpIVrA)Oa5|0F$ zROw5duLod7FW!2ch1+zKC9z_WC^7rsow?VrybH~mv@Pkv0;$CdDup}muGr>rEwdMD zRXy0rXaXoy-V{Gzw?jl?oL;1@s_G{`JFX-)^)+k?xQCskzb5)2ikYFX?S=DDTlhxi=xyJNMj+K+^7atRFaXADPR42)KXG3 zFt>nB_3Ks)JWs%%+zF}(XqQOj+ZyW4fRGH0OP5ATL#?d`V(^N+N7jYvNQGU=Wr)c1 zLK*#W!G#)Burr#P0chi7N(%h6J#V^YcpPAMYyIYHo?r3%lXPA>=4eysMt>FRsFL^Z zKcYl~1x`tq8KE^?8?(92q=f{)KU%QM(QDc8N5$%S3KW8vfL|LNBL5PHjoA*#$gBmO zNT_6ol3A_IdI#o8kG#cf(*L@uc;Z}YbW|pTdjDfsiXD{1K~fzkl{*2dGmwl;(71KI z1r0wO0VMT>Q48An+aBX>y(E2+txNgceXB)>M6Wt!lLX&4X%e1eVGJ7?QbH8s87g-Z zP&eyzW`vysXz;|%E3N6EU&kR+F_u~Y=E9;0rwN}~YxY65&0Gw#+3HZHaW2n>{!;w0 zbuw4VhcqK-2tau6yREtT`Q^B|xjBdSf55|*2Orh}S9H)=$9G~<7eSQFHn+W1>H`vX zsdFz|aMiQ*Tbcg>1vO2pV@Ye10SUSIU7`r}KS7YzOosB$~e$7l0<(>j9!7>+4 zEkrd8R%}Y0-#z9GzbP++3!sBh$crqEG+qQ5#w^^`aEWm;dO9SNl9Gt0W>3|FY0l!d zPU?Nst0K~<0jx4y-!BfaMzJo3<6Vz@5CLp-T-5KugVUzR3fp02ph{}P!11_5&~GF= zmF-2a{($}S4Ch`geU=&nu^DJ423zj^EF7#ezt!Q+>^e}$ev4QUk z{a!}&kKo-Eg4g!J^es%5d@Qja((X23jqDngH69$CoV}RQ0s<`77PZkgS@n>O2&8q0 zRgpBx9wN_=@I?&!=sYfw$_=!dlvh81p*6h2qCD*gozaww;)-&e$2KAWskNqYUX zhB%d+e|i{SsXJ?61gd0H#2o?3V?`m0#0g#MahRJ~)cA&Gs4 zNeqAop`0uL`*8;dkAsCD^d_R_17>HG!j)BGqs(JuRBFTd)4cqm!led$ zgtynJ`}`?UEN%MW{>35I_>9SNjy(^{Zm}oybnfvka?TxVaedlW78d9|{A2#+7|T6A z|JCKa;Od624CQgG1HBg3#YHNGgJjtmrc$xM%g z|J<^E-_!C-HdYE`;6L*I0d~{ZuZHNS;N|2z2Hu%REj}PdIr?e9u)6-#@2uL4X&>hIsk;;}mT~T&7-OtjljpmM)D! za-nM^#v4r%j8Ok)y`aoyKy~&!8AKBR0JpX-&`Hga9x#i4HE2O2lRBwMjzX+0-e*OCOH~OPM ztzWFXw2e_fQ_;{cio#WG)M-f+s7kPzR)4YUE@1#4L<6EheNle1U9q`1Cb4J3jR;D8E1eMVMRQ%z}E8Nx;~25j!nt zTD&<11_u4x9;=u-9=VdG5vfw`HmEsZ=FPv?9O>hprb zP)|=J@a4vgO27pYL;UQ)LmnI2aO4+sg!2x+u&2MxBDQ>Hn~yhFvow(a_$n&{4@DwH zHtXxbNFfJH)pbxweEKRe${&$n_hU2r3TQPB<(}H{5PoH9Iv{$l*v3TGz@A_>!1IQlX!W@kx*tj3PxAHfvkM+PrzPt)j(ir~Y$<{KWiA6@o((%S7ZIFL-r#q)Ttw{&;G0xH%E5zaO}t=K!fd_oO5USv&*a;_^0Pahst-N1Nd?#iUrcFA zJLBh5^wK^HU;bn(-DJZXAWZtjAhbRS;F+MO5N74?>lHkAt~z;lh~HVeYa$ ziS{G9rDO0-MuAg0);Em9;J&Yy<~tz4Gej$>6~ns@5dX@eO?(!Go@%&^Z&!bREKo$W zb$5w86FVR+_6~&s7^fm}Fr$D{=Z)Bl76t4*ZVmul{pPP_G{6c#Bo+-WG(1Vhm#2F? z$r=Wl$1;TWH zxkvAvaGaZ3tWI9;_KNrK$!H&g<8o?WNfUXdahAv9u18&?{ zj0{$}9*k3Nv5OZ1qV?Lz>1oEMv(^_bTyXDy3c`f?QE5fcHc<^C&vvN!p#No@UK*Jw z2)#^#>mta_wZs?D>Z_`%2HEx1#Zmod2y{0(KF&8jf&K*=-bQn6z7MxqRL7%|j5!l7 z6B^&S1KA22_hhEkm-86X#lbBkq)K<4z~s9smfr#^90U3@jJV1*TuX6QXb24KBGs1O zU+^mePfD&hnhmlwD{5{GDHM~T#H(;)gQ@mDByJ#QKN7QA1-rIWc49v~8_N)3UzX5qmQ!W+$6ga6MPEBdTDnO?bFxP2xP}Fk?Z>H~O zWoem+3hisn9!>59)?Z_?o*N&-6)Z(=GtjCQPciEc)td{*EO!R8ikIk&lGJ>}FT+W# z4O1wc_(sspiqnw6Hqmz_bM#NXYvTe~3Dr_s>6%#HuA+j+(;|#uWG)z#}Qy z+uL9J(TZ(34yO$7?%lg9Z`>C{lpqryIiDPMbZHuqL<_glE9c2k(GAuzMpU~-cpQc_KZjRDHR_d|Q|`^L-mRelA>SNdbv zLdZV_R>l<5PgO6snOs0bIYu#(#bx|7(7j08jR4zDmMM5BVAYg7CM6{-stW3K)%)zj zb2G_`Ht54W^6MYm##sP)Zq@Bbm{Y#Oz9--B2KvUm;cSy?t3aeea4PrM_wNGkPrC+T z8mmWNmchdU(Dl!#84xd!oRFgv6Z}=1E`NXNn)lcB>nmF7kqjn4%xP34_BEv?kQs+hu`D#*@#(PwWIK?D6ONkGbi)u_y80?z^0|JIF1x4Vh?i~-M0XE=; zpvplxRANbfKWbAd9XB#F_kW<>91RAWMiGXWm6pn0yLN5Gs#Ug)n*topq8|;GtT4PW znal^P$aH`Z!Rh8UC))3g^A(@l+0juGA;&z}n$x5>g#C;B%DZ#tNmo}_p+VII?KoKT zn(XQ>frg>x3FO+w>@S_2VId)ZVw=8xIXH;qTm$mi|3@7Q3fdKG*3`j;I<_?yVUu3B zX+@)`&6m4K0tWVOgY82@@g6LW#p{a-3U>1Im$y3rQ~c@nPu5%i47r%RbNp79E=5$Q zE)%uxx>XS*X0nr4#wS8pea>(QP;GSV)qpR&GY_9oeaObnZrA>H$zV&C8mMnIs95p= z!lAHgd<7L)1qXiTOBNO0LG5-h-s4r?}2KN|Wg+ z#j5i-L`wOrE5mg(P!JIY+G0^+tsWX6q0AR*><~mZSbiYi1KP8yyR%#yAM?iJ(yCHY-H+AZF1lI(hVzzz?+c~q_I2S|=be9*6#rO{je!+5w;8obx~G+)GnGoa|+=xG&I!xrMSz~S9t&Fw*&Or zqOvBmiU2nH?VBQAI*8=FmY2iLv~eR9So3>cVE=#={u_lTC5U6kkN3el_ZTQ50f+Z1 z8W$65G+TiJHVcT2M->sNddrl&HseLSF_5@%uJaNpEt~`r6xFGLXYv> zqknsNcpMZGN>y~}PPOiqkrb2uf~V%v`4k88@y%`zxL2BL1;oW$2YP#r44Yu@674@X zCkHE^`q#;&%a$=uO@VqGeUWBd%airR?=if!V~D#e#EP=f8T173gl{Yw(_2HQJ@4M# zkJP6zMRUx?UKqHz;Z-s?mT7!KAzyCq10Md0kWSWR{?ZTFXR(bar7NsGeaXZGi!>1`l7@yVo^C4)tHBOcb7lbJb#8`1+OD7@a$wQ4{C}yTm zB@M(6^ZD|qRDt`5KylcuQPk3+1sCxCM+~;dbuhv!_h-jS$xlFl<{3-+fOO-5rI<@# zyLn(Si$7FnnX3hiLHg37A^?+z`E5ziT&BRw@mpaiH54%gsOgWQqN1jjP1vnz|J3t4FOcYjJbSJK8p$zOi;neGi*Tz&pW*aq97g{k zU`bkk`>MygbW{IKiwvOmvy^sC4Ar?o|gW+cKlm(`sLd-$?{mxY=ib za)BbibLQI-h$9;v%;Wc>w=KVC$6-*bk1jC3LG(68`=A2`dYkxxX5t%xrKk&MpxznK zATq7hv2;CV-3Y&`{fw?!^U_a$)nS_k^kkUV(OeRI3t8u`;&ocN&IcN7N2Rr8~eTN?&$^OTpoyOynGsX?yhA3;z@hh{!oWjO=Db2uVegH|5ADeetZ*>C-=gXNI* zQq^BEAZJWD^$l_?d=!p zH@CL7lKK*pkJI8wA}WBO@h$C=5-bT+I(n^`Q>slJdbh zx1ppTx%W*Emrllwf`~cNet({2k_o|rXka#IN&&Cxix04(t!N3JSn&)nju;AJ&`khb z-Sf%=KO>mlN0j0WLS;&R$Bo~Z8Hu=4?S8*)Kgcv;NsbFNVEAyV>YRH z-yW%YW*|PcSM(=SCIY>DwgbV4c|G^{D2=rE?G+|B)wHi)zwW8;s~LSJyatZ6tbPrP z`X@Tq4Uw(f8$R9$n}}&Z!QN1l;8 zY_YUYLPA3MSra&zRl)SYQ%v2gy!-dpTw?y%RA?%6NO=w|bXCA2oGtci5i+xM+kBG$ zgc&SEasmaO$;sy&92^ZZ_tvnAo9dUd&tiowyf{S49HbLW7lZN54N6gD+A-Pg=z z=n1?}&mxFxwl}7|WlzyPuwN zXL#-9yi0iMEI{K`y@94m%3`mz3}`qeUJjBWhcu4An!5UveBL7*P0uwEp-`+Vhs!Wz zHTGQDr0VSK{9yg>SkM|kyVd$whwc(zCH$k3b=fSdN4+_BW>hxtpv`f&nGugrQ63%{ zbT5-f2|G0O}k%Jzk#_TSEfLw;vuV{`K@hjC6$ z4*&aJ@OE0+_NRf^QnQ1QiC;~&TpU*Y%PT(H+7C%)W3-Fwa@aMMOKl)DdZ-G@kk|g9 zh1&u2Tr=sO7+&tw&)DO#{nHNdZ7rUg@wiL$!igDGlmJFi1#I2-h~aXIrnbQ~ggC~S zT?U{K{a|2QeEw@~3}{Rd$8p2Zp2w{HdHxemuP=EDj!j=S8(F3wf~VD&8qK#N%Z=9d zUwqeH`gnDcolg-Hu*pEo6|MvszPrtC8V1>F4lR{+!@*&l4gSqM71I(gb1Ir|U&L46 zxQ&so z&o9QZPF?aM(D*Vx9G=oh2m$#RL5;ytx#opatHhktg9}vv?j=nFYm88Lp~~$1*=0R4 zI?8iqdQO>vM>+AuQk!>9`Yy*KF8V9m*lu;Iyp)`~3#&|KBjNpif+s4RMZu|VPZdx5 z1J8L(@}HylM&_|V9E!6(kc26ZAjirB-T1w~XdRV6GB+5NprLf#5qH-R(t)+5wGtfImUt?qrQAUM>z23C*(uI|HR?Gq7>Ytxw*L? zgD{#%tYz}9#AL4zO7ofCx(l1kpPxN0?G&F<#9eHY6$jp|$E^jWK95TO;Rqy`%k=*^ zkdRszEJvF!E&NsPuChyGW;wfFJhVprlV)a$SLbtekQZm~>iQwHnyNz7;j==n-JWqN z0o(#}2VTTu(?Ip+7ixyvfOi#VIk>o*W)gK8SYONSct|P_*a{PX{1t209+*iFhD<^} zGDVl3!s==zG68dnTXo!!>?xUz{Dg@Ek`o)&ujlJ|FYN$fl&}BP2W#{pZ#}inJsRI>%laLTHf%L!W zvAwyaMbM~Qxpnr z2C12Qi+z7me%35Qt@5BGrWEj1CMPEk%FD~cev6Q3^YelkEKKh8xHF;ml}YlZ4IA{- zCol7YSln9x{PQvzwNw~t+4}XYD29N|_+~g{clTFzK`Y*BY@yYbd03h{w6>?ZdRr8> zP63J1ymfvnxNo3z_kK`6whn*iEtI?FO%2#?oD2U^L>yr^sRdgn>x^z|4>eVwxRNiY z5+=ub`ax#ezS}sw8BKlZ+r7NJ>hLzU4H*F2EcvdXoZ;5+ctzS0y`wKGqcFYsWvmu} zBDTs_S9PSsxOTgKo1uVS6sS{}96K04hpw0{9iKmc7VQ_C9j|u(2@rVM**bA@S^$G+ z;H0>mA_rGGh>z9!4lbL}9Qp!iNWCK}YG>AlkC5Ypf`43BZ)4PPnec5fyTf+qP*b9D zB1DiiX39cbOzSF>v1AXn^8QcESbvak%*{2Ja*(_BD9nxD16R&n=L!*NnU z!8G<_8g}0I`JHp#j&txX!81DaOd<6K`a;6MVVT@Uu|6Pg=>>~9pse4|q#~Amraev% z-(TDtki#uaO*sQw^ic1w2^2eM`T2Zm&OdH|J6eBG&)&UNy?V{dvBDhE6vaUcp*q-+KkEePsT_VX9QHG4DGJX9^nJY&wNh-y1=EaoC}Ym-+%Fydey7^lVhGUg^!7Pt98%0n+a#USI+Lpx#<;(#l;&)VCk1Kq1++8SZ< zp&AMTXm1}dtYNGv>|wE=1Ii9=TmXBNeghB-4CY-Yh|(1Jpk z2+GjyKq&k8^r@2?Ml%vW@G}qQ*nkEj;)(-Y)K}jZaUc4^*@}!{0cnzW&T}UbqqD^{1b^`0$n&EUi%I z=aNQ?DNVo;yjc84e#Y4Yu)w4L^(a_7RVk?(Ygj~|7)ZpU&?MdqY?mJ_c)kuAj zJNsROoSz@`HWpM?Rwn*!0AR@q|>ml|XB-9cb7C zBD?G9eR9F!YYvE9Nm7nJLraNcd2y@}u2Y6}>q4Peob%B`im8K3@uJO|a4`t|FR+Jt zu^Uq-*OZr)DI=gGk4Hl_+_`=G_R1RpD8jV#W`DTNrFAFwL(6zMjKPUleZ2lLMEnI; zc+3fJ7Wv-NrY4Ox=dpWm-_xbb`Aip=0Hs#ZVFTR>8<#X{cNK)4fW2G>C<7m%pM)90 zjsp2~VP^_j)Ss1+5slwVnu9Hv2{50H@m6Hh14@V`G3IpGJN|wrCnxI860!dlf`aj4 z{pOo=3s&hErrWS3yv$ZYmO6`2@c~FFV0c;+$CC!b5-88SJlLl=M~4m1{~V47@S_LX zY`$8XAhNlyWsQeRTDR%-WI0VG%1R4-SO^VC&^Cgk#J^;&1pi?ypQf!)O=kRa>rWKI^s(jz_r;m|s zT1T_-(-LjEifu*Z#Y>g*R+KcFEcYxdi~-gmkewK18-q=YUp_c^!1mPN*7#!cl)`za z9cYFp9U4oFbMg?xLv~IM)sN&3#K9IA6~@sMasHV|*b@{o$;K!g6cOxbJ_B%kSBA(( z6@-?J3=Evoz!d)NrZIC;q^`v+B@aHzgmWEa+=F8Y43~_YoC*n>fQlpp_?}8W0U4F{ zbwh)C;9)my2=GX&-Bjn2osi^H)wsWv-evkzECE?vgD`0~(s2y>YBcCBY9Z5FF5o03 zC1nW2BLZmv5~VWQUCH4o@M*8mk1J)^1pIA#C{jTm-1x^IAvmRnY77vCkx2PRa2i&} zN+9fi7M&LkQ0c4(n+J2q$;_lhy`ZznVt>a$_2!k>=UXig^um9KkH+%?N3~~*V*P~~ z{~ZbG`5b!{9)Y;$%)Aar=hY;8I}Kp7BA5D@bJQG06Yl?FzE}`aERNZ785u1ZwC*6M+Vg9H8)uZ&}ec95E`TtJ;x+hWDC# z|Ic~Phx6fi#(2iq+wEX4=9=rC_kCTzx|Z)7Nnxy;csJ3|(6CSjD>-NftilPz}ot~ z6&E9;>3_d~!Q4`xk>upO0WNamy@-+(8XC4H^6#Z={wzZ@v?6TOtCw>239Dmvia5f= zKejf;EOy78-oBKRTg7#v#MAbFB=#yW?veJY@3#Y@z~@clDy-j(1L*hF?_80VlY6s7 zXg0y`T;cPX=t=`~^1@Vi{8(aq&s>}cWedE?$jvN2{ z)j;Hm%eiT6vQ2mU63w4i(0e)uH0PFG}~((b3^oq8Zd2S8VJx0w1$nixqDQr4GDF= z@r~t^?YHCewT=&+<=kZz6(6}A@BSPcBUDpU3kVF%d7JkD-khAAJfl31(qMAFJ>Gg_ zoDdruJBmTAHH=(r+~_ErLSp)0!p$vDw^mX}=-S#y*}_n<{+}e?`2IYFndxbn(f)fk zhxji}>tmR-eF6jNw~K5y#?M}!{-C*SzQZv$SfyHa5VRY5xYNl`&dbZ&($^=4_w3z= z5}lTNhUce8GFg&|WEnEld&~WK<#~#;GBPsh0RaKi`Au#`@23QAgqV$0zNMSPfAmOX ztkR|}GATBeIwL(@geG$N>i{JcmB{7v46*3ieoKCwG~wVVG-@uc%JRIh`nKf$rQR&D z4>&|bNycg~mM7}m+*_q{gwcTG_cHPibEB+x3!P9=PLHim6hWn zqN1V-oYn0T)EDMv)NnT1J`PCScXf3QOHS5ESvbk>p~?TSW_lAFJ4-f8vb?-}b5O%g zQ(r$zeW1?GO+Zj^=)183(?H>3wj51QZ*O;h|7!(>vhuv0gelJ1`=|!4Hgt5%lX`>FXT^qJ2~v1mcc=V;c5q~{K}mRjb!72FQL^n zHRSa4^oA$U z%g@i((bRl$^~x2IgN=#KVtV*9TDi=O{xLjsco;pf{|dGWerUKJz24s6k1F~8ot&GS zJ4d6=RYqJ~Jm?H-_R#4YFJqfjBI{5cV>DK_Q?mMb<8YC#SWA05eW&Bw0(CW51eu@` z*t2;D-(q89`@U%lQn8R zXV|U{qa-CI!%9m_XBQW#un0Kbh)PO!SY3?OI4Kwn6=mqWdq*-fG9tFK&>0P@o3C0y z+Fzid(K9$0osgKA@!fcEK37{@JYaXNY(f;4{GTgLO+Ar_Wq#e-+A32Z`%UZhQm+($ z`!}_rB3<$qmCi>+T&DG2SocWA$}I#FZKr-+%~2}W3kwYmMc(YWLDvf|hi#pp>Z0KG z3AYRSy|odAqN1WOaR~`>RO?Cf8I%VyILJBTvCKX2Ea0xvLqhK4n2wZ&1qKGDr=>mR z~+aGvi-q-~koCfJyI%%UgqVRy`KBBz&`g#}p)!^N(Y<&uXJBSSXM zM~o*mQD?CewJue1nc|O8Dk>_-GudAq+G(xSy@ib(jb;c(DU59HEUEQ8j%v}OHIJS0 zWYl5xFH??aB+WlNZhoHPt7-+a@VQhnUu5mYxzj&;(cRr$M@NV3x$%H_rS(#e@yHg} zV7^-T@?Zhu((nks;qZQSY5eZ>iwq zF8KVJB**1s|DVM&Gh@Jd^z4Nw3e}xGcI1GK3Y%Z}-01&MY<+zl*?DpqqPRyqtp&z~#vKYJDuNvqJ4uf~nsp=2^&`)o`2jhi>MV1IKCU}0fpK<(UJ z>h)$&t9+*2gn4>)rc`2Z_o;ADI_yj|@g$zu+k{+q2nYgH&o{b?zI>sJdu}WQbuO08 z6wm2kU3Yh}TSrfi8IJItBV1}OT#D}r3CV}uU28Uzzc1ijj-X~5!hQ(iI@=r6Fc~iK zEvP%k6B83tRa4tBSy0e$X^f&*LHfbg{{B3iPXd0|1o-gIE-vm19u!Wyi!UQ--p24a zFb5O!>#dJgByd=K^nb`Wzqs_fwKdhSH&ZT89$Jjv`_0LhdL0SA-rg9nZZ2zl#4LK4 z+RZ^3(rH4wYa`KGWkwTQ7FG|M>ADH7zZc-JA%{U;Fo9jlL`?Vgdq!7*@l3=+`hm!w&lU_iuKzzMfuB ziJ`2~V19eRW0tPLL5z2BaLFhsd&|r<=;-J;tmZM{2Q%kqCnn?7oP+1a@F!eSceupx zi&|9_vXIbfT@R-)E?>TEFj7kI|A0Q#crYLC1D&0n9W5+8d~0WC=BEb++#-g)zW({q z0?Lqq4CcJ{x+!`qWT(a%qJq~P2#F)_8U6mZ!rf0oJN za6RRKOUjfzsZtwaP_Guz(jrCHkR`d)bmZQ>d%xgbVtJj|prE2DDk6f+l$xf& zW_|Sd@fPg0ogw|?yLa!pdwRZ3I-1Xb!}&@`$a3<>CHVsN#5~@soRS6i@bE;dY}Xct ziYcL6%#=;IromR0%a!wG)@?<5f3k1tdU0kqTxK2|ACHghI{3hcM@JSjO<1dIYtPN_ ze?T$H)~Mry8jMNIM*z9`YL@m(kI!(y@tn6fdXOoga%O zMvb9wck*9k_wKKaP+kg4N=mYZ{c3YO-RMvLVho=gb#Y#KG9Pc=XGjw%iZlRdZizDr zn{73hmX?-e;1)8jVd2e+V31X)GN{$L@+yqjFDV52`nDK13#Dgei46=4Ag{7IZAH#F z`HSPFtYpjYx})V5oZH!o-?Z|_BclN>poWKsWdMO$?!Nky%&&1deZuubA~7o2_~Ixh zJoDt>t#*R?`CCHxFiVSzBG3n1g7z{c;wf5mnD?A!03wD7q=ZmNCd6ph0QeMDR#vX) z(Xc(-)V%S#pF3)7f2!VlVR(M7R5XlCB8i7nxJLbtc27ELxg;j$5lTu*Dy&7P#|sO; zZ;9P}{LkJw9=~ff(tSKXeVU~*KHQvw%fGpZ8v_&&MC1n@N0f_;s|S{w)MoXsbWLJ# zuv8FHFq`SH2=X};6=S&QnVDsnnVG|u2fpS>w(+q+XYA7`}{5+>HIhsfWCVV%aP?w>?kr5F12u+O@fQKrtEaOHSd!Pk^ zf&hT^8%8r~M4=guS34AZ)mGBBv`Sa(^oKLDc z;HEkG2IKZQkj8#Fy*Ep0foia!VK$Ig@~cW&RBHtF&hY7kbpk6dyM7%b|IutX!3rFB zMq%vaOkT-?y!gz8&#N0AH*slZADw*HX$d2!86)<i-`aCGFtz$TK*sypUp_rm)&Vc!xQrkJ?6`M(A6 z?Bc`D$$Wug<&LP7nhagAniW9X<8pNvZrZ2ELb0_sw*1Y z6WsfWA=%W_ghob2W>9htmxbGoppqsPt^^znOFCNP#4dd7|MTvtQ(I-=Cbww8V?YvjsxSo0^?{ z1r-!6`27H(Eu zf+m9yO)D!aOf0N#MMb|lJF!U=oOXuY;6re5agm0%SZ36fYnYE!Hp0cNzURRc`B8^H zVrpUn7fnBIJNWxe8#=3ZrOnE{$B$cpmLkXms`1%td_qDAOH0e7jrN`lu{4>Sa=Q)n zpxr?i1^{SG%*vcW-Z~#PB|#aGl;viQjXU(98?eg)?NHgtz=H z)|X1MdCtm8_x_0ST)8tUGV=9IQ=r5C>SN$4^&oCQKPTd`e*xtVxHjj&1>=T!q$u*5 z$6VG1Nh^96FVM2ps@R|skdl*o0$e!%%r0dDJ9|>vZd1wx7!#W^SkoEk${hP?KVNoN{w=#Xf%Vw z`}fb>&W|V&aK19|l}@qnL$d3!?>FscqX_TJg?R;sBd4Xe6S6>K@fMUK7CvM5V-Ry( zDlzK!BNvN;_irJH?%#&<^-(lqdf=-H=QVad*V7mPtOCiZZ!%;o0Mip_kUn|x1Z~;f z19&GW1JO{JQqt1W)Xjl4T~s&&u?3(eT<3ZQwfq+lNZ z;c?4Uqgt`UzfKj<;SFqTg@ZRZk*BK~66i)~@C1~<8#RKK6Z_ow0aC$XOLJZ;akRH5 zLESSmGvgdbe?12n{nMvUg_d)om9}fSlKcRbQljUkre11kr3y6{z^@>C2n(MLD%@+} zLrBrgS5rCjR4vw%*z+B0ZN2H@;sSSKHD?1$J!|Pudo2&38X*V(dy~rmJr` zKhw9VF~9%J6BvH9>S;QvRxH{%K>i5 zg3T?Sz!3^+$mhH~mBr>IZC8dBn&PD;_nY&;a1*yccr-c1}elRPe9g%E~C| z>9IbvM$&4VnzpCD_U|)9bJ|~hLPeF`wf`F0Vb{RG{6ex@$Cj}beC&qCM%)%YH)+@X z0(bY9wR+F6r;#c z%Li_n%Uvnx(7$z{1Xe^fHZ_HT5ayaVHp6VQEa&X(+!tL6bfrE;(3`_T4XJb?t7>BGRYXX&$H5BAsgL~aF-cVV( zFt7pA^uWNr8KD7$jKgLbN53OMI_tHta520SyV=RMzO;%8zPY(M8H)7jQ*^4+ncX(V zx>mSqCAVRTXz4pV|aH;FJZ|nX1coj&tC+HG5qMQX`$!98{k02NQ zqEuX1R@Mxn*r4eKqfvM2RUsiEOd_5~o&rDhlU<^;E1(bErx1VF;B$Ah!pa+<`#T+- zSN8TCBW31Ub{pgH+VZa9<+(Xecp*HddEm#7@bQab1qBFig`55kd9pfM5ib8#Imetu zN>p?j>I}$?p4@ip*-luZ5mfGgUVi@kxqPW`vzk)xZ=ohyMQsSBc+3xXbPS|1z+wuD ziTMNg508p!DlzQcR@10Z?Ep||0HBYM%laL(7FL6<55Uv>fyBoFLzB&wqXwl)32;|Z zGYca^%mK5<#KgSGSLK{M3dR8@1C^?OA!#s2F6SN^|LG<=G(>?Mx#^7!4YF{1z3Auf zexLye@DoV+45IFfpFzED-MMdRn*#BLgNb;vm5M1qQ`cP^{(hB6mc@GU4G>}pWWo1G z%=>%qf(Tr_r8=CBx(~P*5!Kn)*sff?%H($L3>s`s?NN4Nuq-TMvh(&0*aS16Bf*pK zLW<<#7)T{LaJyHoU6ZM>)D0ry&8;=sN>%8W=(P#~!Qj`Y8wi~RI@twV^jCeoQmylm zFniOIM{>~QYiK1h1?n|qK#3704xv^cyO6iXvoki&MnN^}fx}A2@_ecR{BhutiolC^@fu?;+x=j-erSKahYpCT*_>cysMB*hgX? zN-SoCY?K`xE7J=K7!8e$7h>)`9RbFb4y0NXc>@50a?qLydObvX>YC?4&8jHvTsezK zNT549IzsND2V{wKI4xqJR%gtD#*0XofHY)aXJ;TMrj?Nq1e7qe9N<};JpM+Wdc2M%CQOE{&~*XZOM&k8_}hP#_{y8H(bKH;oJfO1~Esy$O{ZTE7Pf!nwV)Z*yOP`WgF7Q@}z_?4LEA ziY)QVoBZ=f99)8~{{HsR{H(0zt(oSXQ3`tcKsecZ#*w0p#rmCvg@rFD!f_2@$`B2WK62ss9}jMe^1LDW z@V?u$)TwWsz(o}_4t(usttvT;I}aW-MQgaWfU6=+g>!g(ECaP0VU2Izyvg6wShw)= z_pcusirw6N54y?C8doS`1bogh0EyG&Xm0Ud9EAR*{FvF`^cpsfOy5HIr%zW#M@NyS z41&$~l?NjkTp&1O2N7@z$jIE=+S>X%G=%haAgw#98p6UDgoKHzOK={cYdv&4gN+Q@ zar3WV0ttr~Qmgl0%1+miUKh}$Ufx03CdFZ%I8e@8Kmq#E(Gdii7!(O%0E+%qqW_x1 z6*$Jhy|peUA!Ik8#{j&1%&c==U0ofjX&Of!`9E%PNESRv-~^ADw61`$)jT?yfbbfC z^$#4`RpsR$z(+h9X8U=u;s=$FVoo1iyG{chQ2-=6_pXYQdhXgUnhBr=vk>zgse&` zDrp4;$s3F^z{fxuhEipoG&uEii(%tFDojLDl7fcjCK@+)l~#Tz?5)j|qj%_8;91yo zf#@dvnMaNW7eEo{0ku{y1uPr}4}6o7h|r{^r4?0F{M&5dy#!Jm4{DEYqV{7bTwKny z@_jc;b%}RA1{mK*j`<*RJrk{jwq{`hEBHJq+0L8)an=Pi@BR;uefKsf7DVV6x9Qf6 zaPTCCpMv9kb}*q)aSHwNw+=I)8o6wnf3A6Y$35IDe>8Y*hdis?lE1V(j}%#Q0VcSf zSE-u)kCqXsA={~!77FAOK$QHeZX%Cu^W`1)USl2~)KpX2t9)Lcnc%iiv{MVtFk|fL za!3B46Q-8e@};5~TCmbF@$uTm#%*A2_qCCdk{XOvK8J;az0;c_h(W~f`bC{j@6RI| zx$GZ+Mf^bW1{i2Q%0NbT1zdaR!d89S00jZ>f}+J~`3XdT)T}ITP!+&Mpk!f@t>K}j z_64A*Wnka~KLu3TSKf|I%$E+$0jk2(`g#W(=`XTU2E>;$HNx#J!UcqAQzwf{WJ)a~x8lJKz{uD!aNfl_yKm5ndDO>xvZ*xc+5 z)dKwuz>@jS zsfcP{QgpvJ;Dr|*tHH;|$EiLtG;|ji_caI|@b=4-7;tv+Wo2a%dCWZ=JaR6B%dErs zfItCf$H0H20n)<4`vV$*sDc*4t@2$(M?IzB#rW@#zM zmDjL`j6xzV-PHy9ILOMEK=Kd@VjIks794$z+-|ijU*!*!luC-lMzClGDI^fkVll~< zCfvjn@#m`^Qu9ZM>IMxm$6`i`W^C z=?9EGTVrHo)Je zGl*S&oHo1=q=ATy+LIuVA($L@4Pb#jsWLQob%iAHIx*|~z5^($u(WjE zo7nXQ!yp~sM70A8$XS^{WDyGzp^(6hK5ozPjz+tU~*89kdQPpV_;w)LN9}A`HMzB+@s~Z0>qx< zKR--!Oau`C_7NdSpN4(ecfoGSl*tgq`)&$Q!Q^k@C1PS?fQpUKUNuJiDUhvf9xv{= zRoUi5!N*4ghXWcHXy6^+=$=21+}zwG*m*P;4SNRuClKvGCka05je0)n0By!oJLqGp z<2CW1i_GE`!#)cke(l2NpvJRRWVaJ9^)Cn^B>P{$9_jmkF;7>>^@M-G&mGqP|7Ezw zlMx`Y!ybNbWhE#j1~c?HLo3 z=8q4T8kL?SD}3jZs^9bX$DpYoWgGzMl^^~6ksvEs&Hstz8I%$edrD6)amEFo&7O}* z9`IC<4GS|f^!7;6$O7+~TB@oAT5jU0v%i!ykvCCczkmOJkwR6gG|q23l-v@{9}hQk zWC5Zt+s4En)J!-BpKU{PQ&v~o?@otY=)zEWiXij^`xTMr;pxpon&D5}bK1^5&}9Aq zQM1a}n5ZL$=1E`(RvlQqVKFfxj*iD}x%vVEzuboT>fq$RhK&lA{eOL^huIuT#J zwhxXjv_D}Gks6;W_(?;Y4C6MzXYhunLuo&f7|@-3Q%{jj2}tCKGIu+Sk~PcniEgx# zwGA)Ga|`_RDJO_}|JuHJ$D4ZQ=YWMkoQ_wlvZV(+mIfgg)kil&p_NB;o)hS zn1}-mv_C!k=6D3aB>PRuWe`J*gD}Qw&8ncw!DmD4TIpcui8E7EDXuQFzwbit_W{6W zYikRxJ5TdmVR||lD8GJSNx;wkBybi}*#;xv$Q|N{w`-gZGQp)dJwMq%dLn{%s#MMQ zBC+ybQJxkYG}Cn7K=Ty;Im^{}pB5mnEZ8mi>eoF5q)R-4dc&tsK0^aDRyCMJ>r%}=kc z!Zy?`x8EWKWoQ}{KDtVQgf3!30@039$*VEJM`>OH~6;@tP_<&HQp)uP(;u}?c^|hT%t6lAv6ZJn> zhMBJTF33(T>f-gRFW>1~WL!C>sv~9tX892EsqkF&vycjaH1uPts&kL`6?8 zZgGA-KG0~BIa+_+8XS$0+oDtCtoDob*|Xnvv7k30iX|N1fAHK_6=B$6JBi3~BdK15 z+5pV{3i#AWs0GP%Wt59Ke_Oc4Agqy@pv&oMczDZ(U>SnKB2q2sugGZyI)%|m1fzB| zDgwO5L?4Q&G!!q7zsS({)p>`tKbO+Kx^RJ~?BU)Y&(3fe0QNk1yUn$osR&@pfvU#wB>8aZU@J@q_>;aqd@8Ji=IV+0MC9~p^ z7Z&kv`A_TD+N9nW7{uz2uX!sp700cOm;@<*+vljkCVR@AZeBnlyZll7TBPF(c`k#w zqz6-`1^`*0W30x7+N#xV!lfDV8;^J$S%C`)Fhpw`7>INx@gf}k!Gi}-2X^YQ`K!MhUPrLO^uaVa zCrGG#C#x*LX0{Ax1ScolRoxsD{RVKAM(vrNuNooGz)x&u`=^sC_c0oZTskud9T$%~ zJS?PR&0h-bMo7)3`4(_biW2K242jLt39XxPNe>x;W)ULUEoAFes7k_m#ju;J|2K|{ zXEU9K997%5KR`v8|XUz{BvhS2zJ1lEkB!pOQ| z9e^l$LCi@uQ#=KdTL}StEvaq(*Bu{xD)`PQ7EhNEXZjpk!;MavigNys_>`ofv^o)a z8zM*7Bp30Gt@tTog?)3Vuu9%!2+OFL(>BZU3Fe!2jy1^jXyI_g&20qj{UQlhuU_9y z@(m7dZfN)fsyC1RZGdB)Ou^EwIdU|6n3<`AvlR!L$v92l9-dbP1YklGTmpQYhAS~2 zH!oE|XMsIsU}(4nIas7#Zt&G`?(Aav=hIEk7stze`zkQ8?UJK)a(XTEZ?+D@YL(b# zjxd!CZiM;ds|v@UMZ8m|{Osq>SM`dn=!7Lqlw`dl52jSs{S&=ZAF4y6MfOxud`X?k z)E+=5bN9?YX?=lOPj7D)phWUt6n~o6oi#nwaQ?Zz27A7TlJD9 z%?Kayy=_+J&>g4sdyeZ&l|sXE=4&rwEB6gn=5Gs2F1qHOyV*Vr_M}VVw-&A~ls_Of z(H#G;p@1fcSJpT9-dFmj<#`;Y8ZR|cc+jE#Kto;Jz=R6+PGQlIsIzHOXC00WdlI~Z z<%tZd*F0C>lA>SF^R=!X`Po*DFtVUeukIW51vAb(B#lO;H&OXk>IB$$+VB2h3DHsh z&8K!)8h_j8KPoAfNZ2QFikXeN3q2s_(0e@9Yn&{2jyW_4PCpiwKQkH{)UpojmMhFh zd!b`Mw_rE%WV(q({L60kh3CVheiLbfIilqh8PsasmX9e$u=GZYW2GS>xmb#aYNV)Y zIZ=x=SHoDhU?-19AsaWhV0%76`%^VuaaGx8_M0Ct+c2mI3C8!=LG<|t_*c|$ zc(s6<1}<+%PODDm7%yE#(I@ULJEWG&tf3;l)A^@|U9Tf(uNbrQsf_>d-T!WT9gTN< zFnJNr<;=bSy3&Ly@c-Pp7<5_uaidWz7O&Lp@P=`#?#Qhl>DDTqC$clqO8iVblP`3` z{Ptog{-wF0`Kj1{CPQZL+`lgf4gVQLN`QS_*}pwoAnewCd@%5e?A4oz96s3<$;Odi z_>rR114GRT6HeY)*kOgVnjbGdY;#1ib#D3Oo1hG1zi``le@=^EO86GYuzg2{o;T%O z#=5oSs^xkbz4 zFFbc!)7%Dn=vV9YDGoGE|`C)Zgaf9fhX1MLg)~3_@Rj*Fjm&>cG7&3 zj-HOi+HV(zRezExB^t9idc(eYeEh-c1%5wC5$(Bd5}D2~>D+FURQ?qGHJAD{KIV%_ z$Wc=_n2!D~#-&BoIX&dn&>)yU3y}7>mOfs6#PdF^C8-k?a%Rr-pE(`pbjUmY23!m} z_B*g=cdxJhJpX-$ImGu#7!_k6MAOT5wr>b!_dn80ZTlRS-* z6ye`oPHf@+WdE^5>^A@bLRJYV_K)UfVZvu?E``ziC&N1&OHA#m@!eIL=Ato?B=npf zR#o*C&Wn6Y)Uo#RWsPMyw=&rr_}6D`tx1ha&yr^V`{610j^+wS%B!GB~` z{)&z-JH?OW4DaaD`7c!FB4-Y#&p&z=^3sq{y(*#1`s4|jd*{AuQ+Na=tDUjrhGmTf zyA37zlSJ7%Z;QQ+6CGVq`@y0#f&2Oa^t;=CE`)S=@DGSnbpBd;Q3sLs4tUYHQyIAg zj&^ya%VyahkKlv_5el|7UCT4acsHDa?#IqE{VM-!Al*}R`}2Aw^*_Py;URn3dacd= zNn<&+IKlpR4r?P2a)`_TLgn*SeUT2BC7I#$&Te2-vEF>pb;fX6R=M(fdbAYdsQ9Re zpTx}JyoxISKAH}5>wwK?fv1$1;@hr2&OCh{*{+UxZ`4GdrgOaZO1iNfSN}$S#WTH& z()PeC>8m1YxIKQ+-a(V!<|atH{McW{1vlnpl_&0o`i5Rc&20Ivcy4fhgKUC``5|~? z)79Hs-_qiH3H@3~L`1s!`tousCWR%i0)=oI`WBrt|wX7+BVFARC!0ah&8h8p3CrnbyE9b|OO zCPz+c&+=SPy+6AV$)tp`j8P1v59pIyUX=(Vvmo8Qp*>GeZfl6&B($FrIz?!}c6ID0 zdx*kndw75R4b#3D?|R|Sy4ry|eskrf6JDs5feq8HE5w??vZ8u7r!1yG=-%oKs_03| z;A;!5{np({Tvu_|^nu*!roWV%aDRZL7wI9!fdEi5cBA%Q$|Mg{DQsnX?y4JoR?6|P`lXq%gN z_Qxv>;Z;VKIt|}Teh^&e&0o{H_p9ZvZ;UXCVA$4Uk|nsidob0Amc8|uGc_%m`_-Eq zvfEdA=lg1HSpC&HA30pVp+_$LTruEfMW=|{Va(!^mCsluE;I8pv$Goy#;Vd}Q(rNy z`ldb!PuHp~*x>j1FPz5_piwCl7Z)cAbshp_U;^d#n)B3z>c5bSU@IN=Df287T7l%I zmZs)UB&h*fIT|cPJ;VWke6SdFk$`sT>*E8@LA^!|vQ62_-)SINMEdO69Y~%)s=mJs z(nUd#+J(acm!AgDe5oe`1dL$EL^u+6iHSSG#6Utc$oL8nekzUfq%Sf#aEn<2o;M4| z&f$cVm<)wwXH!{QpPyyOK)MDXE1WH#Xe(@=3K3q72D(j19dUGFvr2lEq0; zKGf(d!z12lI$Et!1F9mKc99B8u5O8{LYHR=Datl&g=rB-HejqyHckH}fX+Nz? ztQH&|?;9^v^9n6Ybow}LC7|vZ-(XRT4aJPvbiPGw+|66Jp8m%N=mp-ewYz%-krhD9 zlZHe+!3YE+G`B8BSQs^G|A2M^y4hNtX($<(df=l(FsR{u0115dH#{1=<*I#1e0D;k zdT(c!1&0I{;WFexA>tRbr}6!4t}VL%<)lB%X>67!^(O8YX@^`)wFM^fI*6V-$#}Bb0!U@7-+%9V{H2S4ey5A zS!3}xHIWMYyQT^XCT@71P&8g1f^UmhzT_Wj>&VHW~jfQ?je~e5EU0;NC%k;0IH5R z*xS*;F!+}i4SbtDE@Vaubg7DgyS%TVW)?(`)#Jd>A_Q0v$wlaa&?(Utck=pC#B6}h zaFmHyXA$qW=Y{0VWKo1p43-yqn0lqhtjp?gVQ)=BXe}|ssh5;Y&K9_~K%?c`!KTx> z(GYELBhXvRMCPOZNa>DAKL;}@X;*>j1?oeVO-;L7Sa6&hqoz&hAEILLzJjlF||#oqE;o7#eraR{bD?MunX@XX6w zpquY`tspXu}%6Jd^2rD z8d_=Q^<&0TL!}FsgUy;Xrwl}T)pacj3_N*)F*cgA^a}TWJ8*pWvCXYnY+*ss^j>nP z-(dR9Ols7wti`r36dB`V(VNNfC&5>}qNuU(pfE;;); z+00$?IPHX&z`5OB9SVOU==Dxh)BXGR=MV(*IKHO@#vwRApFvsz9W>kzm-ZeZVLRxr zn_F8cFl>m4&lUh@X$MB|?mv3u+uOH=J(PW4|t%fUw3m28=JHK!;IO zR%V8IL@wKv-}_Y~hNT&gmvXI#qm873i+BrrzEAJZ8L+MKwS_zR{w;spTU3Y5pi95) zyt!VSZe2>m6V%=4X&rUjL%_d8l(#FF9WBxO#w*E-$a1x+5MyKQ;ac8#4eOUnlW#Dc zCJ9M!pFVu}@W$`GK`_h$ON0IW!_lZA(K1*B0s19~&%wC0sQXGZ4DmofLj>ZlS3A;3 zAo-FGHogd~3^I($3d_*a*(r4lX*zP4Q-uLY7>9BVgP0>ZOok5)4WST0(He%qY=|HZ z8j8nmM)w#7(ZHGv3l0uuwU~ajX9@RV2(Fe4G-7gyCbtX>$j5Qpdq@u%F)@g^h9NJ!)C2FNQE_o|=I^ZwH3*UILLy5HLKBT@ zxG(JsJUu;85QB_>p(O~WBfU*5w39R=nemkFt%izPZgpi5>)szZY8qmqr0HXh_wQ=WH{7=JD?gfCu=!Lik7G^ebG0_0iX0gmM2aE0&y%fLw zwyfyZb5HnbY4ZpQvczLQdV3>b*haah{}`w0Fz^oZ6dJrNtRtpj}S{-(NKNO-{pmliGi8+Qc zEJQ;u%WNCr~7`T(MC0-8iX<^J&Rcb&(3L}H)cIZln;M7{nxp?jA z5YCk+=7_J>-F+7nLh$Sm4r3Z!YEn_<33!iGP^ejkaA~^#Dte0#zBjSu-X@kzI5U-W zOWaL~glEMr=aXa6{B{bYNtT)$qDK7cg2WD2q%-G4%*yW(dN4P9Q4&54di*);+4e#6 zu0La!ts7rphrg61l_XfSEmE}`f3C!U44De)F@z zkDQ*J{j~4I^Svs>4fZvt%E8bCYFLOJK|ob-INW>!6=({&WB?Iw92Bka&!Us|!xkOs zi7$f^mbv}TR%>)8XTE-sgc4BJ!! z)~#DzDY!YE)=b>C1oimrzk3ELONFqejpQg-E_#QZO<+J5T%Tg0=;iU)A2~1*249x2 z0|VUEmWkDlFzJ5uy?F7J$-g$Y(?#&e<|0LN!KTXZsz8Lp*35$zsgZ9*O*6TKeFG_&Tbr56lHf(LOqA#U3;RPPy}Bs5Df{tB0v{|`B`ac;WJx=LlDf! zCyf1XFVp^=-M()2s`IA=>hYLBm82Q#~|eV2n? zqn5zf*ci#Q{+ssvM0yuee;|!oynp`=vP3Wj>GPrBvf{(apw4|~uly(mz<@WX2w%v5 z`Dlf|anE`AuBh78$he9(`AJVJuT|7dvJ}Y?94=rk2`_Zfosop8`s?dn?%G@P+de@5 zCNJO{FR$T?)&+SzPEX!1dgJ?*P6wq`9~EJ}oUR%-f^dI_Q<~E2AEasJzQ&P1o>rbza}&M1?95oX{tX zM5i*b_NGWS%S7ZGdPmu=b9>zHW#%k5r<{MdFvFO*5F3Rhl^fXfCXe-T9{U~FTntes zClfEsrrBa!DoiZDlsV#jE$oCiR>NQ1%zSEyQ*d6~)rCaY+u?jennD}+>|x)Qrs<;c z@P=XVw;-koxl=F6q(q;p_zn$!v%3~&k16CG`!0Un>NF~)DzOaa^^*{t+03`$m{1hQ z|7ibZMnG;a)<76Pba%%{>(_%cm3)HR#d=zxi!oc^W{LRJ94En-(GcO%$RW8J2L{GP z@k!)COkKyq(w%Pb1)CNRmcns&(fya|Px2A5fC+A*`-Jze$>WJsg{|2?h-#`$HoxaU z`@qTx9q(W_^Hafd%h&54sA(8N-E7aj6c!7dX{y>>T+EZ?VuMeGc$oO}NmhnfF!5P` zn5{nArFrY9zhQs=1|)MqkW=qI*SfMFi^-P4E6{c;-&S;=H61QdenWIXIXxvNE*Iy+jjjm@64}`t8(RKer!d} zpW@|b3@fInf;=~8tCY}UoQJRZ!?v@vacQGJw}()(par*Wh%*p;Zacq!#CsA}RvZX3 z0q@`3-qwQn3G33|PjAcQyEQd1=&b1<$>?5Bb}X6u8EJ1TS<%#s;rlo8+0TpkZ4QU{ zg&}Sn+lwbJxfkB-ZCyLnwfLmWHtE|&RkEK%YSeNC2aK)#r9i_hNrTu}T){SevyH(k zx6PHBJVcny#>w`Yu%_)cu93J7GRWj>P|gk8Fca}mg;|D&g)I($rv-!zd08f~HwoI( zQd2h%4um0b1LFe_LW`%xhJpg$_5gF$oQ{r;iJFR$nyW^|wh<&5jZ`T2ggL)ZS_=7vT?^m3WW4gm!kbj*fVP7Qp%lAa@DE*E^W9`W^bT z;{DWR*p6tQfu|r}ZUD3Jz>VNr2S{KhT!9Y=uoj5hFyIvp?lPF7l!#yiQpEFof5bB% z1)%`uWHFJia)O8>pn&erYUo}BkAKy2pA<3`YU=cd9#)mk?=MlBsJn!I($6W6EK8Th zHaAh-NeVq~?WcXUmMpn;$Xos!XVm5pb0;8-itX%Y_;000eGqBsA%E1DPv68?gANHj z>NS`abnkS7G-SR?nE}s~BE)uKlt>#!NLWq&-uVDTetL27`U_B*U~+5=jsRK z`%xZa!kBy#e3=Fsl9d7q3yvevtp{{n*$i<0e(fx17#SPi$H#Ajpar6;><#y8>;4wy zB3=ym>Q2uue-Robc|Lkf85h{r8A^~W*&;en0weC8g6j))sIZ2k;)@Vx`)kj`FPdJP zy(o845Kof`VLS@gH*97y?7qB1G}gVhP2(khs{l!La;>l9?cMvi!)`nJeWfAjakz2p z*skGXd6lthW+td%|K-`U!aOh!FYl%6H*dnEus`seoMmc><&2g;<^;Ot^Sq_;WobSj<>vD@TGIE4Qn75Kxs%p~`L`t59v%G;-vvQ2!zC(H z6Tg)HU-8b??%}FZQZwVkkers$>e+e7JhUS$adDvm+lbaKzL*Q$} zcdK-7Jq8-Ct0_%3=kQ-%J!?lt2U}6Dfr~w%O7}|t% zB;@TwaiJlueK~{G0C&{7cUM+MmX+hq5`rO}Nyts2t3xu_ai+8U>)rom(UV+U-2Vdf z*>K>?WBC3u&8c6W;2|?X#%*`|k#nuXjvkFdzBt5zlpqcU{7@6-C11;!Bk{Ml`wfwB zrhko%A>wz4UR=|5$Q&JS366rc0!4q)|Hsx_M`g7}ZNC;Gs30JXAdPfNr$~1yAP7iz zH;8mgNrQlt(p`dpbT>#h0*Zo^)S1iuo_CDzobR7K#@=J>hv!-Ajv3eWo8ne0e|3a@ zy*{{1eJWc3y#SLQZdV`F)m9j+F0`{X>1x|MZ}ykKszi?`gsC3_(s%Nd6@ zYnQ>hugOMy@x#@+>gvA=fN2XX41SYq356nBHa09Md4O2t7!*_?VPUFJck|?hC5bhA%F0QI^i(CAho==@%Sh#vtO}s)U+I{-q5l-7|bHk_=N=nvN+ng+R=+ z1CX>$dV1J!=~_T%0k3j99D9(vs#ZCvd*8l@@NvHM3kpFU-BQSo$)dqOnlo$*s30J) zP~EP;cAc!2EQmdg`8Jx5dSf?HVfr*BpCZRzu;_7Fw1jEK=PMfB&klE^o_AuQ84-5oTzclCs0l|X^=+!Phus5SzcEq&F(&=lx`R+aXsN~~7fFPD-lRYWr z?H6Z4M;{#T^55^7b&wE_ytOnyrusYNo3vYecnVq-D@MNv%OAW@m7LOK0m7AcOre&p zLB9_FGaDVh|LiJV3}sKGNYAsv!yI4#HKsX1si*Kh$GP!RnnuWe^McV#|Lw%-tXXk9Y^LOV|azZn!T4yN! zb%vMxuyJt%ptuEhIbz|2D{5)9Cupsu_qn^{qnv(Oj=Xsto%DBWt@RBzBz4+5`V-%s zd)huqI3=`~!p@|1dF$qgr(2LvcixxY8(lTrWQTe^TQbr4{9468nzEKQzM@}1jm*Cu zcq{4>_^Oc}3L>IC*)swKX=0@__H7oHSKrHW{wb^6_e(uiyH9YjW<6c+qAIEi5k9*r zsaT+h|RY9>scomtJUbuI`5x^tpS(`|5U;L zM_SHbDj^vIizQ-=#f-yDEH$~NnN^}RP)@SH)!#{RF!QGKlr@27M2;%+93YlGxMNs2r2+wA|faUhu-MnY4sW2 z9x?J)vYejcjlJ*}4Zfb>T_wC2^o}7g-hiJMFv1KsEH`2|paVk(C|L~-Q>Xj*S)*8@yt{|8pOkS`8rkLGRyJQU z;cs-{cpp3(*J>DbQjz~y>DWuiSy1k3?~a3+tNz5q?n<`@DNcPao_gG)iO=q;dnHe% zBFI?Y(2y^LR&_oC_z_4tm7Bf2;41KgNuAV-rL5f4=a*1v0WXH#vS~6l1 zk$*5KKQ*$EscL)0eM=rZVgnlnIG$R8F9KE!M3$%fGfBwirOX9x8#G)2(R&_oN=nw(b2yz_m$i!qAqpc%qLOP*--C4{CcfO+~JR>S1G~VT`ytb zSN6QmMc%n8Hn~98G$z9{jLY_0odYdEMiKTCWP#8{M@3}H;FsUi@c~#1C?Fnd{)G%^ z*vG8L1KgKzwMfg!fr0u(TJezP@EI{n#5Mtbe2*~k?y5}5^e`^Y_NeI}O9E`pB zRahT@(pmum<-7#sE7;UBZ`0EUz%D*CGLi-I`K>jGpg;}q7Fy@osR$m-^~d&hFYF&s zQ076G2%a_i6?mYyFOG9~0N73@Yi_wUa5DSFiux0y?oIJy@;;0OAO`M?pFwoYHEFL61*jl0q9#=_QA7~mUa`8kS$OHV?-f_&_2C3 zxO~B!bkM}f&JN`tA!&K}r;3VE;G$aRJ8YeajgD@Cgc$lBWwm__S3p%90&W#32T(%^ zgOJ1IrTSnfSEdC|ykqFi$h*gC)A5@=YT|R!U%I;A)?207GMV_wU5q%aF8|y&U0{?O zv?f2yWsb(8k^07$=wuo7JmE^*;bHrjOeWp^gA?)5fxC3;U!ZZL3~&c<<=sFmT6NAp zh(HzF!(j`=1W`#zTFrWug1#!sit=(eeY=3ddR4-EJI^9NAiWHmlH-NUWI64oI8_Ln z8A0OaeV&C$guMob76?&5DS<9Pb{p@%RBwKM9!d3nFF*a?RB!x8zw>$@VZF<4u{R`7 zX7n0`!C+GB?tY{>Jw1~Sk(A~%{BL#Fsy@D6=*S7h zg-o;kT)7$khyguWY@#h0;4I|y=m=n{ipVIW$g-qEbudQMA@ zt4AIxLo6MWBC^`P(7u&>;Gvd;jczo2{pd&?&UQil%^<07sD*m+qwIqlGe9T!w) z0XFgf#cjtup%0KnxH(@!8ipx+_Btnr8d|rHc5>0d`{LoI>X(xsuYUL4P`G z%~W4e@ZitKzz>AJ<5OE(+sH}?3*cr5Es$_irCZz569jidxMoXP+MS>D(Piio(209oLet=l8ycV>jM^F0v$piEZ=T6Y~^bKP9Iq*g>f z4qA7_3*>Y58}*w3-V4!td*pCtz&Fr;`SMF4Zv|_SMbFmTaq+Z}tR#oq$;8gf^DVE6 zg}nklMNvo8dl;@9j*z&;i2p5W4xVR2OXJ{{t~C-D`}pYjxcTx#Gv;6QkBt>iZu(;; z$i+|<)S?S}95N!1FzC4=SU_koAVy$-d5VdMd?BuZzU==i67S>z3HHkRy2hd%GdP=$ zkB@a$LgrD~lfvOJ!FN|2J}p)JuW1U7Fn|T*J`KZLR+*Ee;NeLGRR#wbTl)I?c-)ui z^l$1HqCIbm9iy$ZHGi7@)mfZGk%Ct!4LiQDV5|>URs&4*feTYrB9_zX<%b0VaN z-9V_7AfX%_9HkW%+1m*(>MYz77D&%JD#lEX`KxnvXY*#~;-#eQ8FN0)s1zVlOcp^` zZjQ7M6&BJ{0*V1Zo;ea!7OH;;+!wePLb)26U(ITc2Bsmt-%4H!O9e6Ae-Ey*2 zOUEiP+{_P8l0(N#zENrO=?k4PH1c@`>ii+Oz55o5|NdFZ;hHZ|M329qprS&J7o<|S z2xW(3qffl~osfl15%q<+fYr{7@`0TLg>wgv0*TQ!pHgNS$4wfRIo!0f@F}kf>ACWE z{wZ3oXw0y-b+2tGU;kzP?cOv$ABo6u?SE<0kQ9K^09|4NHVi=a1gaBj*tKomoGGl1 zwq^2FwBlF#u8;HcJpGH85d|(0@5<;GEWx$}YvvG-FBLyP=Ga)~_laC8BdXznj~qfr#I~15uB6|o zRqJvX*0wyMNb$SJuF^eFxg+N`vE*RxeT2Ivr{{wj`>@gCKc)QYTQ6yAkmo~5Bg^8W?C#p< z=MD~c&A3N5Urt2CERDOlQbhBFIYI&B>!&~_tseYqG}08?>!PHW?C;_@qNJiUu`SGG z_a?paXSN;U_zbLhmd^iVT+0KU+rf=@K&}BB3%*PaEFu4z-F7xPs3L`@vnE$9Sxn!r z4hb*3!$;&3qWz4TG((D_UE*n*bq?(3n*qxKqO$;W$m70O_3~!7CU&=+>aU^p2Pcc7 zW9Nm^15PMQf@<%-JR&Feg+r!HzwIVySzd2ozCoyC&1ZYJLF^ZxB@ONO%+0%CRR>WX zyVHc)>iDW<{pXOvw~Ty*HKWo~Mhv|fLSE<%kz{F* zJVWj8izYLYdqP_&K+9Np_0g@?h98i#+_-V0)UXrdO76Kpgl`IJ`LsO;u^9J=GXLA% zpBLdjn_}pOvW$eu_%5CW#Kc>TiN?mglRC4ZtQ(yT;(vmNLx!RGX@c5r&RWZAPVUFw zCJIw*Av%;TyT72_oQ6g!2qrZ0xU9a#fw5`?R6Sy!W;@LRo3M<93&~#+AWc8AbOTs@ zs8-$3I4z?+FopQ7w7rn#VGDa#SgmBiX2MA3fQTZAV88th5fz>LY+>G1#v2PCgw@}g z7xs@EGZpYG@_%ax(0QB%F%nSWT$1lDj9>LOAFc{$kdab^Hh{Y5r$|)#_!U!% z{NrpgV=0&MhmIxuZ{H<`7^*wab2+uo&OekSedJ?9?LIFTV=eF`7o|p~)^4L>cXQ}l z)Xk1%Li^UvJP}`2&~TvF5!pwZweg12 zNPat#a~tuWZI?6;lvx6{24L#BA_sTS(Dad9kk=s)swP(eQ@{>zydvEVP}w5?@t?{z zT^Q9I1UHCD7(u*&(ih<-_9R>$vE4*20}x^X3ywn*KtisUMn*;mCJ^d{L4JPMV@>2~ zKCAE*U7AKjziLhR{zQ4cl$=98z~iokX9}O8K~BlME@%_&Mz3Z0oJLf76m;F8Vpe06 zmCMt=!O0la?3?H(ee8-$XGPs4jcad)jr6urv4OB7me<2cZ zWd$4kOL!H)2fhY<5-83{0yRLp=o1vcR@!Clk(kjr?AtTv@>fqbZ!+4<3^RO}e8t@# z-D?#0X+z{BaA1=dzGNKXL2zQ7@NdgWHZ?PBLJS#ab&CM6Q|~|dTj9kjzmrE$mR3~W zf01TRXm-|(+>vGrxP*B&VPGNvuu4gj8a@dLkeUnKAL!}nK`+=JNb~g@+eWkssYtig z6By@T_FrzpQT|lNL-by#77{dcL>NNQ0LWiF{joNqx-@#FsbukW^}ED1?R&Z!@4q0^M|gDs zkzos25Ew#|EOy>1*}_GMq|`!Qe@lUYTv=I(khfsRZaKlK5C^#~LKg(0KsM|WL>q1~ z$&A>dk-ieR1*WU5h%YWK06qkb0;sLfcf3P;^Ggo~ysX;uA{GE(H^?P&`F=o`whW&N zUa@ld`!(Vzu0e2QW(;};kLtwWRjav;JP+V33E98R+s})3{2_!s$4DAq{i?yng{a27 z{kYtjZq=YwM$|R_8VZ6PABcyZJ3oT90Tm_qKsS7f6MxM@x(mF(uYgMiXdeWpZw-4; zp(_s>r<0{1G9W;p!XX1SDittvzzKXM5qi7e7NLef{p-jG;tB^Ty`GLd_KnQfunh=+ zf(uW9QM)?)rWNtMb*zru(-d2@gAa;*uI1L-*(M*Kv||i_heuNO^u*}#BTBzxt8LeM zM{={Hm7&MotIkIF7x}X(hw|r%D@IJl$9OmHc752HX-98?$R5N@V)^KW)8@FCY zm0!;V`TimTxNq^2{v-ct^2dufDF(?E+KgB?O?*QJK{z5?D(aa+FZGOjL!)S&+bPnE zo=X~g64h4|o~f2I($XJf%C69l+1A{<8vAR2VQas8dpRTl_uJwxkf|c&EMV@TGsr>^ z6{QOXc7uSMjyPU;92i6m-SpcO0rHT`;^)V`CP<3aJ?IgNW$qWx&h0ueAmt zn~(%}-4V(#43sbceL{JEZ>_0qwEPjbGV_OO%oP}=j~P>*r8@nC*{3HTc;BVcQf2LID;R- zv`zOufu*}@XC%h20G0Qi!)x@T?UCK3JLT!7`akr=a%^<$4j)U>#F85sMVDT(q%)_= z%P8^4w8{M%Hx5y?sAO7ca_K%t8Uf5g#MgH^C%u>V!oYm7ZymBq77Z zVLam%`egj^osZkTp9|(wZsXq&H6%h+&@9#&=1~P|Lc|0y@X3q6-v)(1o!fqHhtcPd z+8-Ayl#zA6nrgx419v{S)s;ZqD=|nQ@0es-{nR?oT}kz?P@0x=k?cY+loXyiWr2+YY;V5?EMCnN`dAG9K_^VDaT zDZkJyuJyrBgi$FiJ`5k!c~-`&s;U5s49A559M09%&P}@{fER|6U~F>o-+_72!|Kny zr{XbGAQugZjFezb$Sz_^&!cWkv7W7MftwU)K9o?BSxP}$upRpKuv1P^XLvoUNg$g#?~pd#>VG9DA1mX(lf6$$fEuE^1gr8 z2fH|IQqtm!OF%pl!j@$us7D#GZ7^Ejddx7KMJ4{E4T_EX80 zxC?xQ@QMnD^zGc7oalG&a=p_8Lkyv00N|mziG=P~5TQkb2?daPCu!yMKoLQp)8I$K z(^8x|{rO^^7Ad-j`HE!%-*D|D{oZ`~&N)kD+kPhrh{>U_+6@B~2-u8n3>#%nKd6iP zrYV%U;pg1NzUDe!n&U|+8P4b!&Cc!$4zRN3a~b!k2abL0rTOUtc&B_jTo>UO-nh?I zTSy-;@jeVWXY?HUuMB;g^J*@DS0D;~4^0H#5x+NYejOaZ$hD{0p0G`yfTjm7Cjrmn zw@_V2M!O*63!L6NCk$&W6slSI z7o{lFWSUPSNv`MJ37;KN-jwNzjb-EYy19=dsZ2#>_%b21I^ipN1r5oNwYa?G3wkXX zy7_(#JyBN{YM7Z*=U@^+^Mz4q5oUqOf_mZvybj|lQm}C0wpoRE0-7e7BDhh2Sp@1f zl-rVMg50TB-{5`gMSB$atL^6Ul+6jc zy1Dh}(ZWcQ{Q=3)ux5i22 zuil<4;qX3lg+hlMq#`F~!-|$s-v&a0_m9(TWY;8L3~^6n8fcsJjM<6E-p0ltU0&K- zkg9Ve%qbwzULc^DO-qV9;Vs9G_`dL-D*i=(Sc4hhMLvAV_PBfJ4j2xPeCR#l?1JnA zruK<}5G8BU*)CSP zADBZv)CchZuiG9KSVLfRAhC;!3(R*bQYHl;<_4tJ8&Kv0$c=J?vlXN!NT4f<&@+E& zARHgy1B0FH>)S_9YImJ71YM$(Xg5_qiu3E67fI$=7snNv`)z?O`&=L)F@ji$n#N!? zCd4b2;No#f&sq4@xWnS^Rd>?3(#MQHjUj5giGrSr%(7AxKK}}pVnC$6ac`9c3q7bX zv+WiuAw+s}p9LUxjEJuRCMF`#X4r7XzgAmj_ZvIo8)^z1wp8WPrwx`~isXoB3V0St zH{-UhL_37cRNbz-CMOWX`Y8G<#^-v*esk%5^!VpGlMiS?W?lNQkp=xe4{U`fqT>FA0zR*V0LwPFpgXzdtJlu6UEdt{-o z7lJd3Ng_HTOgqdz-rhrt;dLZ8(M4Pqh{su?k$&JbTl#o}>1Qt5o41PaTful&fD9CJ z1l(^-Ow5G?QY)HA0iPv!p;SSNGPEi>%S70cD&MTUudMwCr>~U7NM`M4@l*rx+g}?Q zvnTo}W;J5NRoq{iBpBH>AK;lh-f(%!@okd1Z6LWgaVxCzDq8F5M<`YR{u1U^_OPY! ze1T-pK|b^myuv0JPXNI2yI{$ok~UcU;42_c|TgJ8QDIm?_K&&4eKY44D`70?>NmDO&_9&b~Rt~3Prt3Bn~qm zkmgPd6;y%Rpg4&177-Md{-iQE&x%}s$ILD@W5zDU_>Q4b_^B4SWijwQx>>7Pnx*1^ zFBh^j-IIYfSN%XODKT!MrE$wJi^-87dMr#f^zWZNmUhf|l3{iFP-Gk3kAWZ#3Iujj+lU{);%Fkl%L!f zw{8L4?=^U^k$kVZnz_#%*2jHb=@@7cA*0~HA@aNbt4-}Lk+fwZo>Qbxh)T|r?_17l zmaD;MBX;TD+>!YVnnTsxoz=y6aJH40H`Oz3#PC(_d>UD3z9HW`m31%bDV9H`hVS(} zNZ}CF7UE1sKw1zKfN$K;ly02@>&DWY(~oalAR~=RNWd44xCq&M(4$(QhKt?y=QlrA zFgCT(7LQ8PJL=~_7mLyfHeIg2u6QZ(lMtJ{eCh2+`K>DH-4xmN3iLmhg*Oglo`-a1 z-D;J5FXB52l@vtK2RpAl1tak76#EncO$+FjnjCCeU>Jl4*ebtwZxfb^bM>w7i`Yb6etH z>ijKZ=4&*f$HDjMY!Q!H0qi)y?{xt6H|+wPW+Nbj6Z@13x^#VrECGBd1f`@=8!=&0 zJ^^yBzB|p@{mNVP`BQx;ib1ciCt1k%@&rNe&st;|ku;4mM)zHLbmc50h4(<|FA)(~z_fa~l!cw#4;Ia#|k%RPds%=$oc{7poJ?iJ$ zgMKQHCEDkJAsMRddyN*5o!)&tVLum|qoSjE3SeP2!Z|ho0rFVF>TGgDneZwdhKpDJ zQ6YIf4f@AQreH=xDxF%TwmE;hJIl#$4+Opv)>5}O$;&_;z*{b( zgXPC+Yd-P;4YWJ@kQQHogcuelApn*jdZQad2fXeoGP&d#j6nnq>L1d{6cdd{H!h>) z=BBJSlNj>%3IsLCPeaJ|DHXvUUTXe)=c8739-nAs6F*K&ld+4vJfDWcipT>OOZl&c z%0Qpi^u3k`$q_Q@4=}B;*^!xx$Z#PrVo$(?%_*n*!1pWy>!!YyRRpNsgm67?k&K4% z^xo;)ReL{su0rt4LOCagFxR9ehV_q&ff2jh_TQJ*DZb8sg??_u8ycM5Q^--v(zbGm zoUm`36%oJ#vh*w*)4Y6q_D~jrNsM1WXDuZs_rYW^9jeQqxY7LgSGz{xR)>e`O>#gv zpOLWSIzc_V@<~}}1eel-I-Y@n^~~a~^b?HOSNgs=K1OnP6|>|2@{9JEhf5R5o0>mw zvfLa9Ct@bprVnd)4hMf5xP@7)zZsm1x%JL**Hia`FtP@|o8&8nX9YX`lbr+8Ht%_J838{> ztty&UT-8%q0ZD~>{@3V~7CB)^12U}@-pqrem0a7&xb75H3e$p-%Pe8n+lJ&w9}JL z-fqrvI>vU#rGF{Zh$@j3^mZ;o!LlAzn4#H7U5@&!apyU{(ltGkie>8b+K1V6H1tz*|2YCO{sI5rp>=2Suzq4_aXJn>4;G1?JvuI_bGYb_cAezH(gI@L( zqy&O;8*jDlPWm8S`egQV2vN$z8f#hl6!C@_Apy8voZB)YzxF!(iV#iZ`&1!_z zY9DRADbO^i=#2$KouNkT?{YR0H2gyjV#- zG?JU~cVjGu@~>L4^I_Xjg>0eey_ZtRel=@n$+G+9OB>2Q8LEK+QRvRH;|GpMsa#eA zUA2D&Jk1poh?p6n91$PQGYMCr^O^-kEU;4T&i^_cY{PFEfnf}=r@lf{;{<{dV8Yt_ z(j!F}5Y;32^r1sIRvOX;+{B??NsFa~8UP!bl=LV^w;4kmmlzM2*7m$U_ z6b{!O9Dd<+yft#7cQR9oQl(-%%8+~e;=sbe;Mw&+?jvkbz;3lu=m{!7kY|7}!6%Wr zGW4(+Va^O9V*;=x;w;-!4*_9HIW%89Jw5lT(%=ack7^d%TI%uOMQY}+uRIwWrQ^H1 z85-uSo{BA#9Zw|NJwsgT<#=;cgNcny=)qi9k?lv?BtDuQw(osW(c{A{Z|3{wP8vUQ z+~h;?p7KSUd|i{zVJPBFsDd7B!!Xq$;8-4PL$AUXfFGXf582qFq4j}GF$bx=ZiBU! z77@f1Ta)k@uE3085X?oF*mFgvnJBNGH({TO4}Aqy)=91fS@CS6f3X2)-gZV?iM^)f z^4ySgNUr!VdrQ?PY(IubN8PQ4Ftj2)7d%Jq>`cGLbE71`MrD<@5Nim&p``Pk28F zA0b^PZMxYTuKjhQSXxe+o6+#``%_6)#W;+!kH?{U853W{6h#}b={=cnl*IMchI~K$ z3mE3Ho*vA_{-XfH7e$&>u#}P@1Z%#$;0JF$FHK{k{Te#-IfyoDpa+b6dyxKfY54$3 z1zem`|5(ZJ;=yD1rp#_>a0VuZ{ExQEuIYL!X8aUw>$V(Wb#QQ()U~sls#UKj@1s*o zVP7HSbH3r>)KYR}SgFu2>3m64W-KHZciB}ktm`_H!rHlz){)bW1OQzH`OO~XB`?h= z^n6F5WM&3~dcpclKtxmy)0bcn_;|0+=?`xB0$IwsP>x)gv1wj|v<@mp=9Lk^|4nz< zc@*D6agNQhb+#aBka6qRS?35`3Z@Xo#Gf;aE>sB&Zr$-DWi>jz%vN~ykx^zrC&GRM zgH(IcIIxB4v9M$2$4@8>XR=B)A97q7LxJ(3uC^d}(#@gg8w=VYsQyzkGh=}8^FI31 z$TcKZ9a5@5+eBtsKqVRG#cWl=6a^UT(*p;pw5)7zd^4ggfm9p7#Z!FKG3GQI(_A>= zIw>zn9m04jV(O|39PNl6bFtqVOY3gA#eXLAysY2_+Q<4N{3o~JU_{RqsmTXb;cAk# zb@2&WDI}~qpOYPS#Nd`g+=Ni9gT|=83rslg%-25h`7!6FgL`n%`Fm1-!R6lD7xz$5 zT-s-qugPDLE_D!kjwEC{4BN*f zz26kcKH9x(dK-`+&mL%fd+MT`#c7Vz=K+>jaHw3>?je}URRHpCU3vNeqF38BNQm3l5SBE8>^>e;L z?`#&&W~3W!B$wuEaGW2Qy!TmWAWi>vD=@cGE-^IPOsHC@?)dEQbrj6-q-pniorev! zkK}&nqLF`H(9C=@3L3&OX3<+$=|Wp!!9B0BexNa+;r|CcJ7HxzalZ7++B%x&&j+1W zBHWhRisF}JD$mXzYlUqUR=P*LKvA&VVZZ$}TU1Z6rDx+R(Nl6slkP11?SeTja{uED zwfsdYf2BzXe1HAXbX4)BTp|DtS z70ag|G{0I7>Up%Zhw`FHT?U>xJSj8rdmPvFS9m$=h<{(tT9WB<%iKj7o(U~>s{g%I zA+O}`9C;O1+mEjWjpc;6+-NJ>(0^GRv_0N-yqcElzA8I)<2W1t^pT~)YP!`6p^DU= zy#;gC-u6JuQ}HYeh$Ezz{BIwCRUv}Chb0{p6%=EoNwJ^`G~9|#W%6{)Zc1+S~Hk< zn(HH+cHZgp-YSaJ(S^tti)fblQsk~YJ+q%)Ykl(dU6xT(M^)8nYg`OvVCw|guGjb zHHk-QN#O&@iZoDPOJui5iN)Sq<|S{D4yIpREr<aT-`D^5=FmOEi4el0nfGkAMpCFOf4L97^oZ+LQ2OV8gRl6T1W5BQ z)z9cVf+9PITHoq^4jTGa%#v$r*IXMxTE9>v+d6L zmj5_$W7Y3W>5up9mlL%}!5^1o4fpJ+Oz* zF+rYv_xJk`P)kT&oyGl5eeQA7K~k1Nk3{i$$}Pt7ir;5WOk8Jo?1*q*Pygt*b*D^_ zx8T@a(kAIK6K|%`-LYAcKWI+;#pHia3|30K#(bN=yB(d@!?2k;6Q3v|p{tzUW!FU? z>p#OYUUc(jYVyV7iQH>j{ddAPi(34ir#F?+xEmV_`kIJEj7bmlB zJ^w=J?*-*Mre__yWz9HK6v5qT!qh9yl+5Kd(j#Hc3ovoLz__rf1I{Z*QH?p3va{M!4s1E6^2XvYya8G5_eO z$)v=fOk=BWygFA$;=EO*uYVzITdbSF?QuGko0~0nj3*X+=?m|Tp7p76a5L`UNVe<= z@3sjenU;am!aMKJ63R18T-9#ezvNDc7H9@|Dl0mE?eSF>l*&v-4&J6PRpWl!&g7N9 zO@q@Ea4(Ay`BvBH$8gIp_q_3}^u9*pO1i#1drht9<0oH_*<91x#>f9)*FkRyXF2pq zKHE)AjhGbcfsr@Ng_Hg`!#r>AvbZi;o8&rc!YN!w0nZAZN;9Q*VywusdY=P(k~+qT z{f{oW&FeqaCEu+cnPiwr{;eu%PtQ$%meTES zL+($93Ck+~iuK04@m!sgrPQ=YF~ z?u}oVeO~AlQSt5p0rt}1s(bMs#S8c(6Zj-0<*m-cn@F9TI2(1kXHI~#I19`{=txC#fZHh44AyZXruvg%s>=tbp98yfxZH6jmenNan9 z01mfe>GBS`L1g>eGD<)GG8bEigD;2Am=w~QL)v!D#%di1O7JkAaWw`j{}l4B*Gr&A zQ7yY`*_l7Vb<4g&xq(6Kmh#%a4;$X96xR!-*|NOf?Gno-Y%VBgz z;Q2Y!OCTY+_7J87!bOjRg_UXiTugxLsWOlIIUU15@!|%tu%TOL40r3o`!H>n3!70_ zqN-iFCnb+-=L8~m#|Q1u++U~tRy@l!89c>2b(m}xj<&ky);1^cqxcrNzw0%1Kcun^ zIdB`uE?|4Xh;@*&eC_N68d$!unOK~RsQ%T45oau?$Dw1V1-0`o?e+=Oo!L>N=K~KP z8@CUC%Ml(gA^m#Pp^|F8&^gcO%c=AHefU?ZS~5TFlNEiIe~*P-iHipctyNQd9=o4A z;fI|-Ws-^~1f7Xe*e_y(yPU@Dt_$gGrh@1Lk|u|T?uwk&1L=3H`O(7oNo#e(ItCvS zdR%VvJWJmQUvBd}_gS1iqo(mcerdt{-^UlQI-~+65a~Jd^6~-$PQW?k?96j-e?ROU zbU&^RnOB)sePb{A9_96}vx(u;M{k8a@J14dkzOn z>XEBtONEDwG(>wu7=wf^L8OHW6)2wECQ1xw9FX`23SF#zgMN<>5? z#EOi2c@@TPL9yeaJliKo;}d+2ZI(HVDS+v?og;?XN?W-(RN=T-CIgpT z{DgjYTiV!|1_J{l2VLRg!bg}j1M{BoUqfvS8^{cx;X7FlW0A20DP?$BnO%tUlP7KV zpIH-*E*P>mPEuC|`wUM@W}dq_Qzx*qC=ZsAhJ9mfRUE{$VX@I6nI*i>7jmy8*Hv#m zXfW-m+9xNx>oa0a>fh}77u9vmXTpUr*3DgV#9Y&b8N`B3J~bB8Dpn|t`;GdHes^q_ zi1&BZ__;o3)K+)5Q1SWT=SGjpBg?@ny;n)5Twn9~-VIqu8kia&Q|_I=zT12HtEnb& z%U61JhM-)bgoD#9B0&qqkQP~Y%XK;Cw#n>9=uqYdbp{NRZij9%GSm>Zka`89a=T7X z?bnYV)IEKEdvKa=F3tG4qZ~Y|eo07Xb4s8oiK$bewQOu4vyn(3@h_ z#dJ)d^}J0kj+GcXL8qT3fm8KK@#6%h0_|ro zq0wS`(sX&&fAE`E;Ixf#-$kb-3^|aP#5+gBB#HFNyuDOE>S}D&R`lD+K>ixf>M2$x zmKVWy(@f;170Z`;6YE+$H}JW?-eG&{ZMiRr6~==`zS8&o*TQ!R_b2jera;KXeE9PY zdYT+@Lh=|-4T)kyY%S;Ke*IAi63pZ$u0aZ%HFPId3gqxqqi)dt){*&~Afu*opWkOL zBr~1-v&G^1+BX)%!=XEXc*3~!3{@4TW~Xyx&m8y6xAP*KcO$kneb<7CS0ruvI1r;q zOW&LdSq5TfDez&lO87Un;YW&i5K^YPevEsM%sk<@zm-pr? zPiyg&q8DN6`vTsTXr1hi0!tBF+5gT;F@eAvw94PjoV2o7o+58xwV`{gvZ(u%X+AGo zQ*wYRRsZE~!gQG-KeO{s$6KWSG}Iw~n}1{n$}$GBZ1QZ4-9Cz3`L5Ng`R{;-T#F|} zhN0nMN5{Kj<)GSgN)1rxlZJj??>ti_k>n39G`Ry)+RUXX=K-IexJl~Xp>|a ze(`s3@T6I}M74h+LNqfrq_nd2XddINF}7B1FlwJ%)|B8|nATr+`Xh;3w=7z8QHI62JB6iR>DZb}VP@-1WZ0 zwWBI^*Q{&2+Txb^EM9~}6FIH_y(~+JdvIkWyl)Z@+8|Hwr$Fr!;+n^DR($b+<3w4* zrtoow9e}-xhXc>a(Csf@_p5kVkG=2>wDYcaQKBF}@d|dTc#_KffMf07$OF5siSg3JpfmVA)}MlDRAQWztRLW= z?XIlJtKc@~9=DwRuH4KlG_8G$H9WU8o@M-I;g_+zB=hroVz8amwk5M98DFth+P7!h!k!7 zbgs!Y`&Hkydr*pdedD#s9210^zE;{L8?>er5ra`Se+Z{s_I@gGP!s(n%v>KF2@njV}F{ILB|ff-Sh z7b=JYU-=>#PR)d36TJ=H3qelE*qiWQ&wX#`9$mxodX$WcNj5axKzVzHk?Kp+v8l49 zC8Ld^caWFxPEK+Js?gHavCmH7KQ)5bybEX*1ii00pmNny!$TF|LQ_~~39!Ln8Fm)F z6w+3yxz99c!`1xtG0Wjj?|RXKN`HqR7L&w&OpBMf(Alj|c7CPbYYD=cdaJCF-&+Vf z%g4;Rw@!|qJwhy}^edlNgb*x}M_Q4f533ytJQn0;YufGz#=nSay6N)U;o@ooRyLc> z0WJYS!c-Kz3uQvs>WUv!9ZhdnJtKN<58onk`X{ICZiiy}Os$n&Kvmi#_Dixa2wGP_ zpGcTAG*dfv>Cc`;x723~SGU|nPV;LW5t_XmiO3ppclY@$eUilQ!VEH_n5A}R0mfj=Affn$S! zko^s}{O-trqBJFS9S_dDcKrA)Em7}im8`id}P!(oakDBAV`a6IpQY6T;(MPSM> zR4>inY2>`#$J3i9P4At#;kN{{=b77ngT7AL8ZLtdNFl{_bVy*@Dt(!kx3>_? z@P670oy9ArRa%u=R!NgbokG2a3%JUV1eXkY_a{48l(JRpJ9k-9G*TR!?s!Se~>!+#6J@u%pdddPp>8>erN|bUI7VzOYqf&w~8s zW?F9U1Z3DjbhK}L{5Vcc5o?Qd-Y8-h0U5&}D2quO`d1`Ufguk9%>g~-T141>-2nqUr$|WaM8fl;{+6YAnHWu0rI_!!Y{NLJGhDAp!tdYX~e{V4IV~e!%eK3l&0T+}9 zf^aZ?o4?!FVHm^RJ8$1TSskNYHFY4+b>Z>5j%sLpY-&CA#ERUH^0Fn_Da^L|_e;x_ z1h@9}n-|v^%Lr=IQkqlO12Uhq+_%0lku^Mx^6QFar+1P4Ao5a8B*$Twj?saMq4#LP zCkk360GPM|bAgEfoX#cdQT%m2e64K?Q@SZd&Lm?X+=jKuloV$Ebxt>Pqns{~{PKJj zR2G4XZRT_RvP+~-?=R6Gvb%cqV>Y2bTCz>UFRX2EVl+}-#zj@$ymB;xHPqw5y#l;`} zH;uY4wa%?8$9`sa%)DUTe!NwNS;$#d{^)N{ZhDhYAa*ix)UZLChGX>q6n5tERBmk_ zSDJ7dOqEn9vydTkoJ^Tg+YpV)SagmGsfd#y^HGM(Lw05|L?}a~ka;R}LS+aUibS&a z^IM(gd56#Ed7rmG8pOWuweEXe>so7Fzwb{Hxndq$m1u08Y{~FRrE9M0hpBbLbJ=qr z`;!(r50Z{@r+R2U?yvUr*^&0n?EsbX#GORWnygLX&$N24cBrJyie$zs#a2&J$5a-U zSA5`}x5*#f|D(?N*?ku^CIiHNO%a`o*9}Q0dG~BoaQ$|&;2N=gwE2|5#GkmoI=Iq; zePNI}E>8K~lD21Xtc%%`3*-_;cLq%wPBlEPsR?K4b-iYRpIWq4c$k1$i+?7(KLm(A^k-@`Tb9FqBHZU9mQ?Ghjx`KfRau|mSL0FDT5jQ6rR>|* zq~>+WBZTR9n;`ldT;5z16xeLX zy-_9ClbS%2U+|FHapN09u|Br4_!7l%k2fu6qqlpTcb;FLsJ-u+W7A62AGX_CT=c8vxp7GI^|nKvvY+c zxq8{R$9uQdq<%c6oO!B{OlJZ*7qiY-I=+ym*SZ@`rM5j!XOVOZ9*oRat+h}H2nt!t z&h0YXZJ5{$+-%eGjMNMo#oY7C)1rC}~!2j7^TAhiqu(;di`pe;$9{r|-bkby;Qk z)7N7vZH%GxrIjIf0{(d+WD$`+Mpe>~Fl>rtH*8W?j!sl%-NNmfCOg&Su>5Ies}xtr z&3k`;_ev(iEQL!SR?S+CI??*C)if!wyVv;b)-@{)zk^Iw>2sgMz0!k~H?=J+CXLY3 z>pt`FE9}>xD~2}gZf2yi?&kRh%t5fE->FwuH zc9qtqUqttW=N>t#X8k&HM`=%~%>BFaZy&#{D|@5<_xrzEs5)~y=Q#7C#8+k25IRTr zK*+U4O~0qdYBrWpJGj@0yX!NF0-6mWM zx;P3IayC{7R%#ZvJ+O}wCc!D4zdAGcR7-i2S2&A8<>b0!aSs(;zp6Ztn=*gftKNFv zJKwR;Xp-$`>*RGWitawP`q&=jhrx_~)P`lj(4Jc3(!NreTvM|blJUVeo{jwTW9fHR zrPJGaW}_{+Ov*gIM=1JfC?wVaE0F~)~ujuWZ5bTd|JFL;wooh*BYgN^WbcBT9y2* zgb$AfZ!BstGdmkhv;{@nPm10#o8Tje>%CV*_VKczfe+k;R@EhgW@%d2WY5x2%%YRn zysB}(d!%jup@olc9fg}pczcu*Yn|$%D_mtZOLTWjn38t2{*hHOzH8SmXhx1a^kMVp zs|Xp8v{(_cCinEFs(zKmsqi;Cm7D#tLzH))@RRwGR3TMdVEV*_G*!ha(*955f0N{F z-#z8Vk~~jc&FDK>(AeN{yE;FAY?WJy=3%Pe%`&Z+9eIU|)B)3Cddl3;!(BeYV#7D~ zUyEHnnl&g3D3)z=tZB-AC8cdhR-_eg-K##ZbF*gh$1!j1XKnMHQt|}|-G16;{x{!k zxH9=tG~CM677iKqQM{r|i#hA~Ug}|HzU<_rUy-X#;YdB1byaU$$Sqh$x1o4m?d`~= z-G*>d0%mV5HWsC`1-?&6mU8eMNqL*?!M5X82{r*Z1tXa~W3@Q&xtz z1|o@$`1X}wMur_sBCz6V(uYm5ScO2Xfy;n?5%;!jcP9h7emJz0r?|~6snN3A!>br* zsZFhASATD?(^^M4`f`HgX(nG!xNt@~t#|x|S$c2FlVH9lw6dXCKSy$V}weZ3A`6z>_ChW02aZD}r)-zyu@#!ucr7jL3H&fsZm>9;_c^TZ7S(%{l2b{Qb^y}a@t1le|=`P`rOf*w5!{a(_ZMHF3KH>7CDe8vMgazi5hJ?eu^4OJog7GuzI| z5?A9%BNJnD^Lpj!a_^E7`Lwh&0ARdPd=oWZq56K&1^FRyZ$<;~K0+;ew?|M>F$PKr zQU(hPG^~V1u)p0}Y0jtV`v@r!#GZjWFLSZ6)j%t{qSWo11PJKW0|PwB##B^PK>ObV z1y5^vi{A3kj2$1y3t)hHB`5P1>r=W>xk8RvAaw_n<}$GQ@U-?qu`S$hOrNOS3yY86 z4zd@QlCyBd^{=7@G;CeNG{_5;DN+uv_JE$R09K5kprELUm`_GM5?ewe#=1f{P#rZg zL{bmWbuCIMDx!IA%&`J`3K1PKX(px1u=!3rpYTP&OQ^o3B@KjvAG1kxYgGJqWt)Y9 z6q9-CuIOXuG4*2mZgc3*DJM_?Cbe&$4Y;PFM+HThL$pb@i_*M=lQ9_?&Pv#Zo4SvH11G-^Sh+z-!{erW7=yw1>{&ikdShAY zcl-A3y5B*dA`2av^@n&%#%Wy;E-1Q;a{vx%A7#X6kf;)aH3)=5z95poG^G4nzZ;>3 zPIhRnh8=@=;lqb9=sr?|4gW_V2mDYTN>m9Qpe_{abEuo_4G4fQyU*0M+C9UdI~x z7Qz<4lM3=0Rp8hHA$`T_YtYXA1&I)>~GDgRRiKcfm^8JPv~p%0+>`>_B4u z*6%hAqpbtzS5zPpydoy+tW&W^C|9n?!M;SR5!Hz(Fgasq=jZ3Q78Sk8&_%)oC=kGY zJhNwb|8QvX$8{|hKGFMdrP##EdK2G>@JG(hQ06X^k_o3ex#o47Xcp02pi?a^v zlcDL@->EdL_^;^S`v1-c|0AmYoKh%QmXP4dr>Bd;jsFUsCRbOlUcK=B`(>E5BR0K7 z85Tn3Usaw}U7l%BCZbxt>_;FfkM0ce*E*fU`VZL3ea&p=}FU6Qb=6mnU$u%Rj&1wzjilJWOMG3(^6i&GICT_{(2) zbL&K~W}CqIX(k*F5QQ<~s%`|2_5y%}QryDwh!g(OyeNGbjR*)}njLK&g(k}LaY%~r zFI)E#l41wo^5`!V3|oTsg@V4l*j=%FvlbB+M%}PSswH9f(FHEYimyk@<~1lBgh+tH z!TFxAfN|D#cNd>@xEki=wX*xqQ^#?>2D7&U)Gih4v!--lqq){83*18}S!6Kq)!q`v zLhz*%A*G*bQJ!vt7b2eiIVf<*y=@~8rhhHL|H6k;d6X2bS+96lvxJ-{VU+8YuS zM6f>?1gZruN=p?aPM7OoZ0qN|sc@4Ab-;bD{a~unBW>zgKt&K0&)C}shK6oLS|IwU z84`+*o#`9pLC$g#lUKo;oD(G*Ktu)f3;j&L zabmhECN@^}tAC?Vl6wvMV*vspi094%!Vl%d^4L5C0BMN&h%%cU=!l_Lzjm7)16>oC znN(F8CU6Z*NN`Y>#VEaxYIlAiA;#oxdm@VikpXNGr@4<5P(+{(wJPwyj#ma6N21c(}m7%zrX zdNj}&4vJaW3IyIoYq6DQ3ANrLfO3n?+tr0|r$@)diDBzgk6f|Ot||H+0GnIHlT@x5 z!RY14Y4^`sS`?09IP#b<4CCqo<3RP9sL~d!@MRn1OnQi#oC$_zS(U-u@`kAF{jESQ z!7v>+ONrU<_9#U-FbpDZ20eH8AMD>&3*)KodG!h#JHrqsn{jAS@P?#9&AKI9Tusda zGrlQIxUz*`Qk_oZ9^Z%QgdR{AD`j=d($+Q&@osbRq3FE_&i55%k`GQu$VCmYC`t`P zj$0f#l3e=uu_%xG%uW0ssh}6YgxTXEbST#3lgdVJIRgOVGz)&)z@wT21)vD!k1{Ejf%IH<|MVGjEsyFN@SI^%eV=G41C(Ix7SXqVakC&=FQAZ zkwdYve83VzfjXBtPPF=%InfZAE1%2}5RyulAqU+nB_;Cek#z$bz+2I{CF333y!`z4 zYrpV}j-W#Jep;I4`SSso-Vwiu?ma514#NK+ngO1Vl(h8sMk~N=IJrffE?pv!?x)l{ zyK;{k5p4UnZInbT1?1K)BjN(DX*=?k@UOh;c|tgof3izC7C zpoa&|;+441g3}uNExbO$7TA#9rJ@p#X$ZdDgO(EzROsmHio6>6_|czA-Hq`nFh4v! z-HOq&7lyS6ROVOn3kz400t{~MI7`nd!kK!t045vIM`lTr`1>-lO(}W#x~{GwgHTCL)7`rb)lF8Q zxW@{2uwBig+}w?*N_BPLfyF4tFOev{?u2@a#gl> zxdMUD5+})H>1>VA5Utti`xssp*!K6y%X2KS@LW|@NVo^M2Zif%=O}TgML}W?VFLra z=bCXkl4$M;5UY-!o+u@c{0RLZfwO_@E7_^Kj>1V+9j3qHjev(6FTo=f%;TOuzjYmG zhg~1k$LcV2MKVmRLviEDd3hG&RtRL$?h6Uz1Kc5c zz!zC7_|SF8k3fo$kkb3=hg1c*sUlK+=#y3ihTdlYW|JYb#y>9Z4Zyi5JOXDY1#>7) z5N=@x%XeSA%s^%tJYyZy?vonrOHNMYW~M8b{zUU=!<9+5kVxfK(Ug!AJ#A^(0540` zg+WHF$%A%Q?8Opc^B(i3#7;rgT>;18Ipzf^JsvcLDy+|K7iQieMukU2XaYV{@Fs&q zY7mNTj#h0ljSNOzsaE;Y)DwZ<+0V-YmEQ9d_tsPFumy?Cvw?vmWN7bE2ZUgmIKCBt zJ3(N-bi)N%w{ES+Ne~k=vnAdbt%0_OMMXp28Kj+XuXuV`SC3qTK=o7?HB2us0q zs0KqTZp^#FXDA2{hMj|>76Lewi=S#=7a9}8h4dG}+Z#DNQNLq)*GAsao?BlC{g}%2 zZ&S@;dFATlA;Xme6A9QpD??YlM?yoKh^wosNj6eWr!767XT?Xuaf&QXutRYlPud_! zwW8-?E~bMQm`nh4(~0?Zz&}fy`eSsV1k2}Nk#P{4Gqi4Vh{W=a|K8gu6nnrHEqk5~ zkHrc+2>YmAnyEqsqV>Y)raYh4?gk z?=-wq_ZQWmB?$X0STy7+{cFAapEA4uyj{>iXZ#W;q2u#wmQ^j BDf0jT From 4e58faa6bd02cd71da70263e8ba4d01b174fb99d Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 17 Apr 2024 16:37:14 +0200 Subject: [PATCH 116/171] Unify SLURM logs per job --- .../torch-scaling-test/runall.sh | 42 +++++++++++++++---- .../torch-scaling-test/slurm.sh | 12 ++---- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 4f9efdcf..1baa4395 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -15,47 +15,71 @@ else fi # Common options -CMD="--nodes=$N --time=$T --account=atmo-rep --partition=booster slurm.sh" -PYTHON_VENV="../../../envAI_juwels" +CMD="--nodes=$N --time=$T --account=intertwin --partition=batch slurm.sh" +PYTHON_VENV="../../../envAI_hdfml" echo "Distributing training over $N nodes. Timeout set to: $T" rm -rf logs_slurm mkdir logs_slurm -rm *.out *.err *.csv #*checkpoint.pth.tar +rm *.csv #*.out *.err *checkpoint.pth.tar # DDP baseline DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD # DeepSpeed baseline DIST_MODE="deepspeed" RUN_NAME="deepspeed-bl-imagenent" TRAINING_CMD="deepspeed_trainer.py -c config/base.yaml -c config/deepspeed.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD # Horovod baseline DIST_MODE="horovod" RUN_NAME="horovod-bl-imagenent" TRAINING_CMD="horovod_trainer.py -c config/base.yaml -c config/horovod.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD # DDP itwinai DIST_MODE="ddp" RUN_NAME="ddp-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/ddp.yaml -s ddp" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD # DeepSpeed itwinai DIST_MODE="deepspeed" RUN_NAME="deepspeed-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/deepspeed.yaml -s deepspeed" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD # Horovod itwinai DIST_MODE="horovod" RUN_NAME="horovod-itwinai-imagenent" TRAINING_CMD="itwinai_trainer.py -c config/base.yaml -c config/horovod.yaml -s horovod" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" --job-name="$RUN_NAME-n$N" $CMD \ No newline at end of file +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + $CMD \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-scaling-test/slurm.sh b/tutorials/distributed-ml/torch-scaling-test/slurm.sh index ba89e421..c53e3da5 100644 --- a/tutorials/distributed-ml/torch-scaling-test/slurm.sh +++ b/tutorials/distributed-ml/torch-scaling-test/slurm.sh @@ -72,13 +72,13 @@ else source $PYTHON_VENV/bin/activate fi +# Get GPUs info per node +srun --cpu-bind=none --ntasks-per-node=1 bash -c 'echo -e "NODE hostname: $(hostname)\n$(nvidia-smi)\n\n"' + # Launch training if [ "$DIST_MODE" == "ddp" ] ; then echo "DDP training: $TRAINING_CMD" srun --cpu-bind=none --ntasks-per-node=1 \ - --job-name="$RUN_NAME-n$SLURM_NNODES" \ - --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ bash -c "torchrun \ --log_dir='logs_torchrun' \ --nnodes=$SLURM_NNODES \ @@ -95,9 +95,6 @@ elif [ "$DIST_MODE" == "deepspeed" ] ; then export MASTER_PORT=29500 srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ - --job-name="$RUN_NAME-n$SLURM_NNODES" \ - --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ python -u $TRAINING_CMD --deepspeed # # Run with deepspeed launcher: set --ntasks-per-node=1 @@ -112,9 +109,6 @@ elif [ "$DIST_MODE" == "deepspeed" ] ; then elif [ "$DIST_MODE" == "horovod" ] ; then echo "HOROVOD training: $TRAINING_CMD" srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ - --job-name="$RUN_NAME-imagenet-n$SLURM_NNODES" \ - --output="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.out" \ - --error="logs_slurm/job-$RUN_NAME-n$SLURM_NNODES.err" \ python -u $TRAINING_CMD else >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" From 9930023637855b76b176b9b394276d4fe5160d4f Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:53:39 +0200 Subject: [PATCH 117/171] Update README.md --- tutorials/distributed-ml/torch-scaling-test/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 74e316c0..4b722cb0 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -41,8 +41,12 @@ setting SLURM environment variables using the `--export` option: DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" -sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD" \ - --job-name="$RUN_NAME" slurm.sh +PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` ## Run all training configurations From a5912268eee87e142a3a4eedb8236af1c2eb5e93 Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:55:38 +0200 Subject: [PATCH 118/171] Update README.md --- tutorials/distributed-ml/torch-scaling-test/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 4b722cb0..521409a8 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -42,11 +42,12 @@ DIST_MODE="ddp" RUN_NAME="ddp-bl-imagenent" TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" PYTHON_VENV="../../../envAI_hdfml" +N=2 # Number of nodes sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ --job-name="$RUN_NAME-n$N" \ --output="logs_slurm/job-$RUN_NAME-n$N.out" \ --error="logs_slurm/job-$RUN_NAME-n$N.err" \ - slurm.sh + --nodes=$N slurm.sh ``` ## Run all training configurations From d2d906c61863faa0d64946eae14f712bbf5c1bd2 Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:56:13 +0200 Subject: [PATCH 119/171] Update README.md --- tutorials/distributed-ml/torch-scaling-test/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index 521409a8..1344504e 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -38,11 +38,11 @@ setting SLURM environment variables using the `--export` option: ```bash # Launch a distributed training setup with Torch DDP -DIST_MODE="ddp" -RUN_NAME="ddp-bl-imagenent" -TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" -PYTHON_VENV="../../../envAI_hdfml" -N=2 # Number of nodes +export DIST_MODE="ddp" +export RUN_NAME="ddp-bl-imagenent" +export TRAINING_CMD="ddp_trainer.py -c config/base.yaml -c config/ddp.yaml" +export PYTHON_VENV="../../../envAI_hdfml" +export N=2 # Number of nodes sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ --job-name="$RUN_NAME-n$N" \ --output="logs_slurm/job-$RUN_NAME-n$N.out" \ From c61a2ce68ed6c4a7f6cb9f5d29f5712afda44dc1 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Wed, 24 Apr 2024 17:58:08 +0200 Subject: [PATCH 120/171] ADD itwinai installation --- env-files/tensorflow/createEnvJSCTF.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh index 8838347c..377940d4 100644 --- a/env-files/tensorflow/createEnvJSCTF.sh +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -104,5 +104,8 @@ if [ "$cont1" = true ] ; then pip3 install -r reqs_TF.txt --ignore-installed fi +# Install itwinai +pip install --upgrade pip +pip install -e .[dev] # eof From 4a787586331b353e97dfc57e74c428afa5570eb5 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 15:16:23 +0200 Subject: [PATCH 121/171] UPDATE torch distributed tutorial 0 --- .../torch-scaling-test/runall.sh | 5 +- .../torch-tutorial-0-basics/README.md | 30 ++++- .../torch-tutorial-0-basics/ddp_slurm.sh | 66 ---------- .../deepspeed_slurm.sh | 75 ----------- .../torch-tutorial-0-basics/hvd_slurm.sh | 60 --------- .../torch-tutorial-0-basics/runall.sh | 42 ++++++- .../torch-tutorial-0-basics/slurm.sh | 117 ++++++++++++++++++ .../torch-tutorial-0-basics/train.py | 29 ++++- 8 files changed, 208 insertions(+), 216 deletions(-) delete mode 100644 tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-0-basics/slurm.sh diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 1baa4395..26451422 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -20,9 +20,12 @@ PYTHON_VENV="../../../envAI_hdfml" echo "Distributing training over $N nodes. Timeout set to: $T" +# Clear SLURM logs (*.out and *.err files) rm -rf logs_slurm mkdir logs_slurm -rm *.csv #*.out *.err *checkpoint.pth.tar + +# Clear scaling test logs +rm *.csv # *checkpoint.pth.tar # DDP baseline DIST_MODE="ddp" diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/README.md b/tutorials/distributed-ml/torch-tutorial-0-basics/README.md index 5ddcd635..43d42565 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/README.md @@ -23,19 +23,43 @@ should be used to run it: If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: ```bash -sbatch ddp_slurm.sh +export DIST_MODE="ddp" +export RUN_NAME="ddp-itwinai" +export TRAINING_CMD="train.py -s ddp" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: ```bash -sbatch deepspeed_slurm.sh +export DIST_MODE="deepspeed" +export RUN_NAME="deepspeed-itwinai" +export TRAINING_CMD="train.py -s deepspeed" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` If you want to distribute the code in `train.py` with **Horovod**, run from terminal: ```bash -sbatch hvd_slurm.sh +export DIST_MODE="deepspeed" +export RUN_NAME="deepspeed-itwinai" +export TRAINING_CMD="train.py -s deepspeed" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` You can run all of them with: diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh deleted file mode 100644 index 1b53f04c..00000000 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/ddp_slurm.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DDP_tutorial-0 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ddp.out -#SBATCH --error=job-ddp.err -#SBATCH --time=00:15:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -# launch training -TRAINING_CMD="train.py -s ddp" - -srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $TRAINING_CMD" - diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh deleted file mode 100644 index b12009de..00000000 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/deepspeed_slurm.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial-0 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ds.out -#SBATCH --error=job-ds.err -#SBATCH --time=00:15:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=4 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set env vars -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i -export MASTER_ADDR -export MASTER_PORT=29500 - -TRAINING_CMD="train.py -s deepspeed" - -# Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD #--deepspeed - -# srun pwd - -# # Run with deepspeed launcher: set --ntasks-per-node=1 -# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables -# export NCCL_IB_DISABLE=1 -# export NCCL_SOCKET_IFNAME=eth0 -# nodelist=$(scontrol show hostname $SLURM_NODELIST) -# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile -# # Requires passwordless SSH access among compute node -# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed -# rm .hostfile \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh deleted file mode 100644 index a2a06e6c..00000000 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/hvd_slurm.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial-0 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-hvd.out -#SBATCH --error=job-hvd.err -#SBATCH --time=00:15:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=8 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set vars -# export NCCL_DEBUG=INFO -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -TRAINING_CMD="train.py -s horovod" - -srun --cpu-bind=none python -u $TRAINING_CMD - diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh index 17c0f190..956682ef 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh @@ -1,6 +1,38 @@ #!/bin/bash -# Run all versions of distributed ML -rm *.out *.err -echo "Torch DDP training: $(sbatch ddp_slurm.sh)" -echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" -echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file + +# Python virtual environment +PYTHON_VENV="../../../envAI_hdfml" + +# Clear SLURM logs (*.out and *.err files) +rm -rf logs_slurm +mkdir logs_slurm + +# DDP itwinai +DIST_MODE="ddp" +RUN_NAME="ddp-itwinai" +TRAINING_CMD="train.py -s ddp" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# DeepSpeed itwinai +DIST_MODE="deepspeed" +RUN_NAME="deepspeed-itwinai" +TRAINING_CMD="train.py -s deepspeed" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# Horovod itwinai +DIST_MODE="horovod" +RUN_NAME="horovod-itwinai" +TRAINING_CMD="train.py -s horovod" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/slurm.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/slurm.sh new file mode 100644 index 00000000..c53e3da5 --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/slurm.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +# SLURM jobscript for JSC systems + +# Job configuration +#SBATCH --job-name=distributed_training +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 + +# Resources allocation +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=4 +#SBATCH --cpus-per-gpu=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# Load environment modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# Job info +echo "DEBUG: TIME: $(date)" +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" +echo "Running on system: $sysN" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$DEBUG" = true ] ; then + echo "DEBUG: NCCL_DEBUG=INFO" + export NCCL_DEBUG=INFO +fi +echo + +# Setup env for distributed ML +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_GPU" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_GPU +fi + +# Env vairables check +if [ -z "$DIST_MODE" ]; then + >&2 echo "ERROR: env variable DIST_MODE is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" + exit 1 +fi +if [ -z "$RUN_NAME" ]; then + >&2 echo "WARNING: env variable RUN_NAME is not set. It's a way to identify some specific run of an experiment." + RUN_NAME=$DIST_MODE +fi +if [ -z "$TRAINING_CMD" ]; then + >&2 echo "ERROR: env variable TRAINING_CMD is not set. It's the python command to execute." + exit 1 +fi +if [ -z "$PYTHON_VENV" ]; then + >&2 echo "WARNING: env variable PYTHON_VENV is not set. It's the path to a python virtual environment." +else + # Activate Python virtual env + source $PYTHON_VENV/bin/activate +fi + +# Get GPUs info per node +srun --cpu-bind=none --ntasks-per-node=1 bash -c 'echo -e "NODE hostname: $(hostname)\n$(nvidia-smi)\n\n"' + +# Launch training +if [ "$DIST_MODE" == "ddp" ] ; then + echo "DDP training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=1 \ + bash -c "torchrun \ + --log_dir='logs_torchrun' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" +elif [ "$DIST_MODE" == "deepspeed" ] ; then + echo "DEEPSPEED training: $TRAINING_CMD" + MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i + export MASTER_ADDR + export MASTER_PORT=29500 + + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + python -u $TRAINING_CMD --deepspeed + + # # Run with deepspeed launcher: set --ntasks-per-node=1 + # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables + # export NCCL_IB_DISABLE=1 + # export NCCL_SOCKET_IFNAME=eth0 + # nodelist=$(scontrol show hostname $SLURM_NODELIST) + # echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile + # # Requires passwordless SSH access among compute node + # srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed + # rm .hostfile +elif [ "$DIST_MODE" == "horovod" ] ; then + echo "HOROVOD training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + python -u $TRAINING_CMD +else + >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" + exit 1 +fi + diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py index 614b56e4..1e5cbcac 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py @@ -2,14 +2,17 @@ Show how to use DDP, Horovod and DeepSpeed strategies interchangeably with an extremely simple neural network. """ -from typing import Any +from typing import Any, Dict import os import argparse +import time import torch from torch import nn from torch.utils.data import DataLoader, Dataset, DistributedSampler +import horovod.torch as hvd + from itwinai.torch.distributed import ( TorchDistributedStrategy, DDPDistributedStrategy, @@ -29,6 +32,9 @@ def parse_args() -> argparse.Namespace: "--shuffle_dataloader", action=argparse.BooleanOptionalAction ) + parser.add_argument( + '--batch-size', type=int, default=10, + help='input batch size for training (default: 10)') # DeepSpeed: needs to be removed import deepspeed @@ -56,7 +62,10 @@ def __getitem__(self, index): def trainer_entrypoint_fn( - foo: Any, args: argparse.Namespace, strategy: TorchDistributedStrategy + foo: Any, + args: argparse.Namespace, + strategy: TorchDistributedStrategy, + distribute_kwargs: Dict ) -> int: """Dummy training function. This emulates custom code developed by some use case. @@ -70,10 +79,8 @@ def trainer_entrypoint_fn( optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = nn.MSELoss() # Distributed model - deepspeed_config = dict(train_batch_size=32) - # 'config_params' key is ignored if strategy != DSDistributedStrategy model, optim, lr_sched = strategy.distributed( - model, optim, lr_scheduler=None, config_params=deepspeed_config + model, optim, lr_scheduler=None, **distribute_kwargs ) # Data @@ -115,6 +122,7 @@ def trainer_entrypoint_fn( if lr_sched: lr_sched.step() + time.sleep(1) print(f" - TRAINING FINISHED") strategy.clean_up() return 123 @@ -131,13 +139,22 @@ def trainer_entrypoint_fn( raise RuntimeError('Resources unavailable') strategy = DDPDistributedStrategy(backend='nccl') + distribute_kwargs = {} elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() + distribute_kwargs = dict( + compression=hvd.Compression.none, + op=hvd.Average, + gradient_predivide_factor=1.0 + ) elif args.strategy == 'deepspeed': strategy = DSDistributedStrategy(backend='nccl') + distribute_kwargs = dict( + config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) + ) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") # Launch distributed training - trainer_entrypoint_fn("foobar", args, strategy) + trainer_entrypoint_fn("foobar", args, strategy, distribute_kwargs) From be69e218e3ca15f1cf55335302b2e2c7691c43b8 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 17:08:24 +0200 Subject: [PATCH 122/171] UPDATE torch distributed tutorials --- src/itwinai/torch/reproducibility.py | 48 ++ .../torch-scaling-test/ddp_trainer.py | 7 +- .../torch-scaling-test/deepspeed_trainer.py | 9 +- .../torch-scaling-test/horovod_trainer.py | 7 +- .../torch-scaling-test/itwinai_trainer.py | 13 +- .../torch-scaling-test/runall.sh | 1 + .../torch-scaling-test/utils.py | 34 -- .../torch-tutorial-0-basics/runall.sh | 1 + .../torch-tutorial-0-basics/train.py | 14 +- .../torch-tutorial-1-mnist/README.md | 30 +- .../torch-tutorial-1-mnist/config.yaml | 25 +- .../torch-tutorial-1-mnist/ddp_slurm.sh | 66 --- .../torch-tutorial-1-mnist/deepspeed_slurm.sh | 74 --- .../torch-tutorial-1-mnist/hvd_slurm.sh | 60 --- .../torch-tutorial-1-mnist/runall.sh | 43 +- .../torch-tutorial-1-mnist/slurm.sh | 116 +++++ .../torch-tutorial-1-mnist/train.py | 462 ++++++++---------- 17 files changed, 465 insertions(+), 545 deletions(-) create mode 100644 src/itwinai/torch/reproducibility.py delete mode 100644 tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh create mode 100644 tutorials/distributed-ml/torch-tutorial-1-mnist/slurm.sh diff --git a/src/itwinai/torch/reproducibility.py b/src/itwinai/torch/reproducibility.py new file mode 100644 index 00000000..1513c82a --- /dev/null +++ b/src/itwinai/torch/reproducibility.py @@ -0,0 +1,48 @@ +""" +This module provides the tools to support reproducible execution of +torch scripts. +""" + +from typing import Optional +import numpy as np +import random + +import torch + + +def seed_worker(worker_id): + """Seed DataLoader worker.""" + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def set_seed( + rnd_seed: Optional[int], + deterministic_cudnn: bool = True +) -> torch.Generator: + """Set torch random seed and return a PRNG object. + + Args: + rnd_seed (Optional[int]): random seed. If None, the seed is not set. + deterministic_cudnn (bool): if True, sets + ``torch.backends.cudnn.benchmark = False``, which may affect + performances. + + Returns: + torch.Generator: PRNG object. + """ + g = torch.Generator() + if rnd_seed is not None: + # Deterministic execution + np.random.seed(rnd_seed) + random.seed(rnd_seed) + torch.manual_seed(rnd_seed) + g.manual_seed(rnd_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(rnd_seed) + torch.cuda.manual_seed_all(rnd_seed) + if deterministic_cudnn: + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + return g diff --git a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py index 54f64fef..0a25ae5b 100755 --- a/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/ddp_trainer.py @@ -18,8 +18,11 @@ from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker +from itwinai.torch.reproducibility import ( + seed_worker, set_seed +) -from utils import seed_worker, imagenet_dataset, set_seed +from utils import imagenet_dataset def parse_params(): @@ -121,7 +124,7 @@ def main(): dist.init_process_group(backend=args.backend) # Set random seed for reproducibility - torch_prng = set_seed(args.rnd_seed, use_cuda) + torch_prng = set_seed(args.rnd_seed, deterministic_cudnn=False) if is_distributed: # get job rank info - rank==0 master gpu diff --git a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py index 691712e8..e6022021 100644 --- a/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/deepspeed_trainer.py @@ -18,8 +18,11 @@ from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker +from itwinai.torch.reproducibility import ( + seed_worker, set_seed +) -from utils import seed_worker, set_seed, imagenet_dataset +from utils import imagenet_dataset def parse_params(): @@ -124,7 +127,7 @@ def main(): deepspeed.init_distributed(dist_backend=args.backend) # Set random seed for reproducibility - torch_prng = set_seed(args.rnd_seed, use_cuda) + torch_prng = set_seed(args.rnd_seed, deterministic_cudnn=False) if is_distributed: # Get job rank info - rank==0 master gpu @@ -248,7 +251,7 @@ def main(): print('TIMER: epoch time:', timer()-lt, 's') epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) - if torch.cuda.is_available(): + if is_distributed: dist.barrier() if grank == 0: diff --git a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py index 501b545c..a4c3eaa4 100755 --- a/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/horovod_trainer.py @@ -19,8 +19,11 @@ from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker +from itwinai.torch.reproducibility import ( + seed_worker, set_seed +) -from utils import imagenet_dataset, seed_worker, set_seed +from utils import imagenet_dataset def parse_params(): @@ -129,7 +132,7 @@ def main(): hvd.init() # Set random seed for reproducibility - torch_prng = set_seed(args.rnd_seed, use_cuda) + torch_prng = set_seed(args.rnd_seed, deterministic_cudnn=False) # is_main_worker = True # if is_distributed and (hvd.rank() != 0 or hvd.local_rank() != 0): diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py index a1eacc20..8e81fdfc 100644 --- a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py @@ -27,8 +27,11 @@ ) from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker +from itwinai.torch.reproducibility import ( + seed_worker, set_seed +) -from utils import seed_worker, imagenet_dataset, set_seed +from utils import imagenet_dataset def parse_params() -> argparse.Namespace: @@ -182,13 +185,13 @@ def main(): # Limit # of CPU threads to be used per worker # torch.set_num_threads(1) - # start the timer for profiling + # Start the timer for profiling st = timer() # Set random seed for reproducibility - torch_prng = set_seed(args.rnd_seed, use_cuda) + torch_prng = set_seed(args.rnd_seed, deterministic_cudnn=False) - # get job rank info - rank==0 master gpu + # Get job rank info - rank==0 master gpu if is_distributed: # local world size - per node lwsize = strategy.dist_lwsize() # local world size - per run @@ -221,7 +224,7 @@ def main(): # Encapsulate the model on the GPU assigned to the current process device = torch.device( - strategy.dist_device() if use_cuda and torch.cuda.is_available() + strategy.dist_device() if use_cuda else 'cpu') if use_cuda: torch.cuda.set_device(lrank) diff --git a/tutorials/distributed-ml/torch-scaling-test/runall.sh b/tutorials/distributed-ml/torch-scaling-test/runall.sh index 26451422..22958c16 100644 --- a/tutorials/distributed-ml/torch-scaling-test/runall.sh +++ b/tutorials/distributed-ml/torch-scaling-test/runall.sh @@ -23,6 +23,7 @@ echo "Distributing training over $N nodes. Timeout set to: $T" # Clear SLURM logs (*.out and *.err files) rm -rf logs_slurm mkdir logs_slurm +rm -rf logs_torchrun # Clear scaling test logs rm *.csv # *checkpoint.pth.tar diff --git a/tutorials/distributed-ml/torch-scaling-test/utils.py b/tutorials/distributed-ml/torch-scaling-test/utils.py index cbd6aace..a5dc591e 100644 --- a/tutorials/distributed-ml/torch-scaling-test/utils.py +++ b/tutorials/distributed-ml/torch-scaling-test/utils.py @@ -1,40 +1,6 @@ -from typing import Optional -import numpy as np -import random - -import torch from torchvision import datasets, transforms -def seed_worker(worker_id): - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def set_seed(rnd_seed: Optional[int], use_cuda: bool) -> torch.Generator: - """Set torch random seed and return a PRNG object. - - Args: - rnd_seed (Optional[int]): random seed. If None, the seed is not set. - use_cuda (bool): whether GPU is available. - - Returns: - torch.Generator: PRNG object. - """ - g = torch.Generator() - if rnd_seed is not None: - # Deterministic execution - np.random.seed(rnd_seed) - random.seed(rnd_seed) - torch.manual_seed(rnd_seed) - g.manual_seed(rnd_seed) - if use_cuda: - torch.cuda.manual_seed(rnd_seed) - torch.cuda.manual_seed_all(rnd_seed) - return g - - def imagenet_dataset(data_root: str): """Create a torch dataset object for Imagenet.""" transform = transforms.Compose([ diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh b/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh index 956682ef..48a8f1e0 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/runall.sh @@ -6,6 +6,7 @@ PYTHON_VENV="../../../envAI_hdfml" # Clear SLURM logs (*.out and *.err files) rm -rf logs_slurm mkdir logs_slurm +rm -rf logs_torchrun # DDP itwinai DIST_MODE="ddp" diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py index 1e5cbcac..d48e5a3e 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py @@ -2,8 +2,7 @@ Show how to use DDP, Horovod and DeepSpeed strategies interchangeably with an extremely simple neural network. """ -from typing import Any, Dict -import os +from typing import Dict import argparse import time @@ -61,18 +60,13 @@ def __getitem__(self, index): return torch.rand(self.x_size), torch.rand(self.y_size) -def trainer_entrypoint_fn( - foo: Any, +def training_fn( args: argparse.Namespace, strategy: TorchDistributedStrategy, distribute_kwargs: Dict ) -> int: - """Dummy training function. This emulates custom code developed - by some use case. - """ + """Dummy training function.""" strategy.init() - print(f"{foo}: {os.environ.get('RANK')} {os.environ.get('LOCAL_RANK')} " - f"{os.environ.get('MASTER_ADDR')} {os.environ.get('MASTER_PORT')}") # Local model model = nn.Linear(3, 4) @@ -157,4 +151,4 @@ def trainer_entrypoint_fn( f"Strategy {args.strategy} is not recognized/implemented.") # Launch distributed training - trainer_entrypoint_fn("foobar", args, strategy, distribute_kwargs) + training_fn(args, strategy, distribute_kwargs) diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/README.md b/tutorials/distributed-ml/torch-tutorial-1-mnist/README.md index 6f22d3ef..70178f0d 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/README.md +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/README.md @@ -33,19 +33,43 @@ should be used to run it: If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: ```bash -sbatch ddp_slurm.sh +export DIST_MODE="ddp" +export RUN_NAME="ddp-itwinai" +export TRAINING_CMD="train.py -s ddp -c config.yaml" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: ```bash -sbatch deepspeed_slurm.sh +export DIST_MODE="deepspeed" +export RUN_NAME="deepspeed-itwinai" +export TRAINING_CMD="train.py -s deepspeed -c config.yaml" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` If you want to distribute the code in `train.py` with **Horovod**, run from terminal: ```bash -sbatch hvd_slurm.sh +export DIST_MODE="horovod" +export RUN_NAME="horovod-itwinai" +export TRAINING_CMD="train.py -s horovod -c config.yaml" +export PYTHON_VENV="../../../envAI_hdfml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh ``` You can run all of them with: diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml index cb221dec..8067987d 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml @@ -1,26 +1,29 @@ -# I/O +# Data and logging data_dir: ./ +log_int: 10 +verbose: True restart_int: 10 download_only: False -verbose: True +dataset_replication: 100 +shuff: False +nworker: 4 # num workers dataloader +prefetch: 2 # Model batch_size: 64 epochs: 2 lr: 0.001 -concM: 100 momentum: 0.5 -shuff: False -# Debugging -testrun: False -nseed: 10 -log_int: 10 +# Reproducibility +rnd_seed: 10 # Distributed ML -backend: nccl -nworker: 4 # num workers dataloader -prefetch: 2 +backend: nccl # ignored when using Horovod no_cuda: False +# Horovod: ignored when NOT using Horovod +fp16_allreduce: False +use_adasum: False +gradient_predivide_factor: 1.0 diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh deleted file mode 100644 index 3d5d4bb3..00000000 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/ddp_slurm.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DDP_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ddp.out -#SBATCH --error=job-ddp.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -# launch training -TRAINING_CMD="train.py -s ddp -c config.yaml" - -srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $TRAINING_CMD" - diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh deleted file mode 100644 index 8e5f7881..00000000 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/deepspeed_slurm.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ds.out -#SBATCH --error=job-ds.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=4 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set env vars -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i -export MASTER_ADDR -export MASTER_PORT=29500 - -TRAINING_CMD="train.py -s deepspeed -c config.yaml" - -# Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed - -# # Run with deepspeed launcher: set --ntasks-per-node=1 -# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables -# export NCCL_IB_DISABLE=1 -# export NCCL_SOCKET_IFNAME=eth0 -# nodelist=$(scontrol show hostname $SLURM_NODELIST) -# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile -# # Requires passwordless SSH access among compute node -# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed -# rm .hostfile - diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh deleted file mode 100644 index 3774b6e1..00000000 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/hvd_slurm.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-hvd.out -#SBATCH --error=job-hvd.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=8 -#SBATCH --gpus-per-node=4 -# SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set vars -# export NCCL_DEBUG=INFO -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -TRAINING_CMD="train.py -s horovod -c config.yaml" - -srun --cpu-bind=none python -u $TRAINING_CMD - diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh index b1470d75..5a89b4fe 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/runall.sh @@ -1,6 +1,39 @@ #!/bin/bash -# Run all versions of distributed ML for MNIST -rm *checkpoint.pth.tar *.out *.err -echo "Torch DDP training: $(sbatch ddp_slurm.sh)" -echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" -echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file + +# Python virtual environment +PYTHON_VENV="../../../envAI_hdfml" + +# Clear SLURM logs (*.out and *.err files) +rm -rf logs_slurm +mkdir logs_slurm +rm -rf logs_torchrun + +# DDP itwinai +DIST_MODE="ddp" +RUN_NAME="ddp-itwinai" +TRAINING_CMD="train.py -s ddp -c config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# DeepSpeed itwinai +DIST_MODE="deepspeed" +RUN_NAME="deepspeed-itwinai" +TRAINING_CMD="train.py -s deepspeed -c config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# Horovod itwinai +DIST_MODE="horovod" +RUN_NAME="horovod-itwinai" +TRAINING_CMD="train.py -s horovod -c config.yaml" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/slurm.sh b/tutorials/distributed-ml/torch-tutorial-1-mnist/slurm.sh new file mode 100644 index 00000000..3eef38ae --- /dev/null +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/slurm.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# SLURM jobscript for JSC systems + +# Job configuration +#SBATCH --job-name=distributed_training +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 + +# Resources allocation +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=4 +#SBATCH --cpus-per-gpu=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# Load environment modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# Job info +echo "DEBUG: TIME: $(date)" +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" +echo "Running on system: $sysN" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$DEBUG" = true ] ; then + echo "DEBUG: NCCL_DEBUG=INFO" + export NCCL_DEBUG=INFO +fi +echo + +# Setup env for distributed ML +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_GPU" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_GPU +fi + +# Env vairables check +if [ -z "$DIST_MODE" ]; then + >&2 echo "ERROR: env variable DIST_MODE is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" + exit 1 +fi +if [ -z "$RUN_NAME" ]; then + >&2 echo "WARNING: env variable RUN_NAME is not set. It's a way to identify some specific run of an experiment." + RUN_NAME=$DIST_MODE +fi +if [ -z "$TRAINING_CMD" ]; then + >&2 echo "ERROR: env variable TRAINING_CMD is not set. It's the python command to execute." + exit 1 +fi +if [ -z "$PYTHON_VENV" ]; then + >&2 echo "WARNING: env variable PYTHON_VENV is not set. It's the path to a python virtual environment." +else + # Activate Python virtual env + source $PYTHON_VENV/bin/activate +fi + +# Get GPUs info per node +srun --cpu-bind=none --ntasks-per-node=1 bash -c 'echo -e "NODE hostname: $(hostname)\n$(nvidia-smi)\n\n"' + +# Launch training +if [ "$DIST_MODE" == "ddp" ] ; then + echo "DDP training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=1 \ + bash -c "torchrun \ + --log_dir='logs_torchrun' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" +elif [ "$DIST_MODE" == "deepspeed" ] ; then + echo "DEEPSPEED training: $TRAINING_CMD" + MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i + export MASTER_ADDR + export MASTER_PORT=29500 + + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + python -u $TRAINING_CMD --deepspeed + + # # Run with deepspeed launcher: set --ntasks-per-node=1 + # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables + # export NCCL_IB_DISABLE=1 + # export NCCL_SOCKET_IFNAME=eth0 + # nodelist=$(scontrol show hostname $SLURM_NODELIST) + # echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile + # # Requires passwordless SSH access among compute node + # srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed + # rm .hostfile +elif [ "$DIST_MODE" == "horovod" ] ; then + echo "HOROVOD training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + python -u $TRAINING_CMD +else + >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" + exit 1 +fi diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py index 365a9048..bad6a3a5 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py @@ -1,21 +1,20 @@ """ Show how to use DDP, Horovod and DeepSpeed strategies interchangeably -with a simple neural network trained on MNIST dataset, showing how -to use checkpoints. +with a simple neural network trained on MNIST dataset. """ -import os +from typing import Tuple import argparse import sys import time -import numpy as np -import random +from timeit import default_timer as timer import torch -import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F from torchvision import datasets, transforms -from torch.utils.data import DataLoader, DistributedSampler +from torch.utils.data import DataLoader, DistributedSampler, Dataset + +import horovod.torch as hvd import deepspeed @@ -26,9 +25,12 @@ DSDistributedStrategy, ) from itwinai.parser import ArgumentParser as ItAIArgumentParser +from itwinai.torch.reproducibility import ( + seed_worker, set_seed +) -def parse_args() -> argparse.Namespace: +def parse_params() -> argparse.Namespace: """ Parse CLI args, which can also be loaded from a configuration file using the --config flag: @@ -44,54 +46,61 @@ def parse_args() -> argparse.Namespace: default='ddp' ) - # IO parsers + # Data and logging parser.add_argument('--data-dir', default='./', help=('location of the training dataset in the local ' 'filesystem')) + parser.add_argument('--log-int', type=int, default=10, + help='log interval per training') + parser.add_argument('--verbose', + action=argparse.BooleanOptionalAction, + help='Print parsed arguments') parser.add_argument('--restart-int', type=int, default=10, help='restart interval per epoch (default: 10)') parser.add_argument('--download-only', action=argparse.BooleanOptionalAction, help='Download dataset and exit') - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') + parser.add_argument('--dataset_replication', type=int, default=100, + help='concatenate MNIST to this factor (default: 100)') + parser.add_argument('--shuff', action='store_true', default=False, + help='shuffle dataset (default: False)') + parser.add_argument('--nworker', type=int, default=0, + help=('number of workers in DataLoader (default: 0 -' + ' only main)')) + parser.add_argument('--prefetch', type=int, default=2, + help='prefetch data in DataLoader (default: 2)') - # model parsers + # Model parser.add_argument('--batch-size', type=int, default=64, help='input batch size for training (default: 64)') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, help='learning rate (default: 0.01)') - parser.add_argument('--concM', type=int, default=100, - help='concatenate MNIST to this factor (default: 100)') parser.add_argument('--momentum', type=float, default=0.5, help='momentum in SGD optimizer (default: 0.5)') - parser.add_argument('--shuff', action='store_true', default=False, - help='shuffle dataset (default: False)') - # debug parsers - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run with seed (default: False)') - parser.add_argument('--nseed', type=int, default=0, + # Reproducibility + parser.add_argument('--rnd-seed', type=int, default=0, help='seed integer for reproducibility (default: 0)') - parser.add_argument('--log-int', type=int, default=10, - help='log interval per training') - # parallel parsers + # Distributed ML parser.add_argument('--backend', type=str, default='nccl', help='backend for parrallelisation (default: nccl)') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader (default: 0 -' - ' only main)')) - parser.add_argument('--prefetch', type=int, default=2, - help='prefetch data in DataLoader (default: 2)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables GPGPUs') parser.add_argument('--local_rank', type=int, default=-1, help='local rank passed from distributed launcher') + # Horovod: ignored when not using Horovod + parser.add_argument('--fp16-allreduce', action='store_true', default=False, + help='use fp16 compression during allreduce') + parser.add_argument('--use-adasum', action='store_true', default=False, + help='use adasum algorithm to do reduction') + parser.add_argument('--gradient-predivide-factor', type=float, default=1.0, + help=('apply gradient pre-divide factor in optimizer ' + '(default: 1.0)')) + # DeepSpeed parser = deepspeed.add_config_arguments(parser) args = parser.parse_args() @@ -140,20 +149,21 @@ def train( if strategy.is_main_worker(): print("\n") for batch_idx, (data, target) in enumerate(train_loader): - t = time.perf_counter() + t = timer() data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if batch_idx % args.log_int == 0 and strategy.is_main_worker(): + if (strategy.is_main_worker() and args.log_int > 0 + and batch_idx % args.log_int == 0): print( f'Train epoch: {epoch} ' f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' f'Loss: {loss.item():.6f}') - t_list.append(time.perf_counter() - t) + t_list.append(timer() - t) loss_acc += loss.item() if strategy.is_main_worker(): print('TIMER: train time', sum(t_list) / len(t_list), 's') @@ -172,9 +182,9 @@ def test(model, device, test_loader, strategy: TorchDistributedStrategy): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) - # sum up batch loss + # Sum up batch loss test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability + # Get the index of the max log-probability pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) @@ -186,58 +196,6 @@ def test(model, device, test_loader, strategy: TorchDistributedStrategy): return acc_test -def save_state( - epoch, distrib_model, loss_acc, optimizer, - res_name, is_best, strategy: TorchDistributedStrategy -): - """ - Save training state. - """ - grank = strategy.dist_grank() - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = strategy.par_allgather_obj(is_best) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in ' - f'{time.time()-rt} s') - - -def seed_worker(worker_id): - """ - Seed dataloader worker. - """ - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - def download_mnist(): """ Use built-in torch datasets functions to pull MNIST dataset. @@ -257,12 +215,46 @@ def download_mnist(): ])) +def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: + """Load MNIST train and test datasets, replicating them. + + Args: + dataset_replication (int): dataset replication factor. Default 1. + + Returns: + Tuple[Dataset, Dataset]: train dataset and test dataset. + """ + replicated_data = [ + datasets.MNIST(args.data_dir, train=True, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + for _ in range(dataset_replication) + ] + train_dataset = torch.utils.data.ConcatDataset(replicated_data) + + replicated_data = [ + datasets.MNIST(args.data_dir, train=False, download=False, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + for _ in range(dataset_replication) + ] + test_dataset = torch.utils.data.ConcatDataset(replicated_data) + return train_dataset, test_dataset + + if __name__ == "__main__": - args = parse_args() + args = parse_params() if args.download_only: - # Download datasets and exit + # Download datasets from a location with internet access and exit. + # This is convenient when submitting training jobs to + # a batch system where worker nodes have no internet + # access, like in some HPCs. download_mnist() sys.exit() @@ -273,193 +265,148 @@ def download_mnist(): raise RuntimeError('Resources unavailable') strategy = DDPDistributedStrategy(backend=args.backend) + distribute_kwargs = {} elif args.strategy == 'horovod': strategy = HVDDistributedStrategy() + distribute_kwargs = dict( + compression=( + hvd.Compression.fp16 if args.fp16_allreduce + else hvd.Compression.none + ), + op=hvd.Adasum if args.use_adasum else hvd.Average, + gradient_predivide_factor=args.gradient_predivide_factor + ) elif args.strategy == 'deepspeed': strategy = DSDistributedStrategy(backend=args.backend) + distribute_kwargs = dict( + config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) + ) else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") strategy.init() - # check CUDA availability - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # limit # of CPU threads to be used per worker - torch.set_num_threads(1) - - # get directory - program_dir = os.getcwd() + # Check resources availability + use_cuda = not args.no_cuda and torch.cuda.is_available() + is_distributed = False + if use_cuda and torch.cuda.device_count() > 0: + is_distributed = True - # start the time.time for profiling - st = time.time() + # Start the timer for profiling + st = timer() - # deterministic testrun - if args.testrun: - torch.manual_seed(args.nseed) - g = torch.Generator() - g.manual_seed(args.nseed) + # Set random seed for reproducibility + torch_prng = set_seed(args.rnd_seed) - # get job rank info - rank==0 master gpu - if torch.cuda.is_available(): + # Get job rank info - rank==0 master gpu + if is_distributed: # local world size - per node - lwsize = strategy.dist_lwsize() if args.cuda else 0 + lwsize = strategy.dist_lwsize() # local world size - per run gwsize = strategy.dist_gwsize() # global world size - per run grank = strategy.dist_grank() # global rank - assign per run lrank = strategy.dist_lrank() # local rank - assign per node else: + # Use a single worker (either on GPU or CPU) + lwsize = 1 gwsize = 1 grank = 0 + lrank = 0 - # some debug if strategy.is_main_worker(): - print('TIMER: initialise:', time.time()-st, 's') - - # move the model on the GPU assigned to the current process + print('TIMER: initialise:', timer()-st, 's') + print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: sys.version:', sys.version) + print('DEBUG: args.data_dir:', args.data_dir) + print('DEBUG: args.log_int:', args.log_int) + print('DEBUG: args.nworker:', args.nworker) + print('DEBUG: args.prefetch:', args.prefetch) + print('DEBUG: args.batch_size:', args.batch_size) + print('DEBUG: args.epochs:', args.epochs) + print('DEBUG: args.lr:', args.lr) + print('DEBUG: args.momentum:', args.momentum) + print('DEBUG: args.shuff:', args.shuff) + print('DEBUG: args.rnd_seed:', args.rnd_seed) + print('DEBUG: args.backend:', args.backend) + print('DEBUG: args.no_cuda:', args.no_cuda, '\n') + + # Encapsulate the model on the GPU assigned to the current process device = torch.device( - strategy.dist_device() if args.cuda and torch.cuda.is_available() - else 'cpu') - if args.cuda: + strategy.dist_device() if use_cuda else 'cpu') + if use_cuda: torch.cuda.set_device(lrank) - # deterministic testrun - if args.testrun: - torch.cuda.manual_seed(args.nseed) - - # read data - mnist_scale = args.concM - largeData = [] - for i in range(mnist_scale): - largeData.append( - datasets.MNIST(args.data_dir, train=True, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) - - # concat data - train_dataset = torch.utils.data.ConcatDataset(largeData) - - mnist_scale = args.concM - largeData = [] - for i in range(mnist_scale): - largeData.append( - datasets.MNIST(args.data_dir, train=False, download=False, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) - ) - # concat data - test_dataset = torch.utils.data.ConcatDataset(largeData) + # Dataset + train_dataset, test_dataset = mnist_dataset(args.dataset_replication) - # restricts data loading to a subset of the dataset exclusive to the - # current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): + if is_distributed: + # Distributed sampler restricts data loading to a subset of the dataset + # exclusive to the current process. train_sampler = DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - test_sampler = DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - # distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False - - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): + train_dataset, num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) + ) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) + test_sampler = DistributedSampler( + test_dataset, num_replicas=gwsize, rank=grank, + shuffle=(args.shuff and args.rnd_seed is None) ) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker ) + else: train_loader = DataLoader( - train_dataset, batch_size=args.batch_size) + train_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) test_loader = DataLoader( - test_dataset, batch_size=args.batch_size) + test_dataset, batch_size=args.batch_size, generator=torch_prng, + worker_init_fn=seed_worker + ) if strategy.is_main_worker(): - print('TIMER: read and concat data:', time.time()-st, 's') + print('TIMER: read and concat data:', timer()-st, 's') - # create CNN model + # Create CNN model model = Net().to(device) - # optimizer + # Optimizer optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum) - deepspeed_config = dict(train_batch_size=args.batch_size) - # 'config_params' key is ignored if strategy != DSDistributedStrategy - distrib_model, optimizer, _ = strategy.distributed( - model, optimizer, lr_scheduler=None, config_params=deepspeed_config - ) + # Distributed + if is_distributed: + distrib_model, optimizer, _ = strategy.distributed( + model, optimizer, lr_scheduler=None, **distribute_kwargs + ) - # resume state - start_epoch = 1 - best_acc = np.Inf - res_name = f'{args.strategy}-checkpoint.pth.tar' - if os.path.isfile(res_name): - try: - if torch.cuda.is_available(): - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) - else: - checkpoint = torch.load(program_dir+'/'+res_name) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print(f'WARNING: restarting from {start_epoch} epoch') - else: - print(f'WARNING: restarting from {start_epoch} epoch') - except Exception: - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print('WARNING: restart file cannot be loaded, ' - 'restarting!') - else: - print('WARNING: restart file cannot be loaded, restarting!') - - if start_epoch > args.epochs: - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print('WARNING: given epochs are less than the one in the ' - 'restart file!\n' - 'WARNING: SYS.EXIT is issued') - - strategy.clean_up() - sys.exit() - else: - print('WARNING: given epochs are less than the one in ' - 'the restart file!\n' - 'WARNING: SYS.EXIT is issued') - sys.exit() - - # start trainin/testing loop + # Start training and test loop if strategy.is_main_worker(): - print('TIMER: broadcast:', time.time()-st, 's') + print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') - et = time.time() + et = timer() + start_epoch = 1 for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training + lt = timer() + if is_distributed: + # Inform the sampler that a new epoch started: shuffle + # may be needed + train_sampler.set_epoch(epoch) + test_sampler.set_epoch(epoch) + + # Training loss_acc = train( model=distrib_model, device=device, @@ -470,7 +417,7 @@ def download_mnist(): args=args ) - # testing + # Testing acc_test = test( model=distrib_model, device=device, @@ -478,69 +425,44 @@ def download_mnist(): strategy=strategy ) - # save first epoch timer + # Save first epoch timer if epoch == start_epoch: - first_ep_t = time.time()-lt + first_ep_t = timer()-lt - # final epoch + # Final epoch if epoch + 1 == args.epochs: train_loader.last_epoch = True test_loader.last_epoch = True if strategy.is_main_worker(): - print('TIMER: epoch time:', time.time()-lt, 's') + print('TIMER: epoch time:', timer()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0: - save_state( - epoch=epoch, - distrib_model=distrib_model, - loss_acc=loss_acc, - optimizer=optimizer, - res_name=res_name, - is_best=is_best, - strategy=strategy - ) - # reset best_acc - best_acc = min(loss_acc, best_acc) - - # finalise - # save final state - save_state( - epoch=epoch, - distrib_model=distrib_model, - loss_acc=loss_acc, - optimizer=optimizer, - res_name=res_name, - is_best=True, - strategy=strategy - ) - - # some debug if strategy.is_main_worker(): print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') - print('TIMER: last epoch time:', time.time()-lt, ' s') - print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') - print('TIMER: total epoch time:', time.time()-et, ' s') + print('TIMER: last epoch time:', timer()-lt, ' s') + print('TIMER: average epoch time:', (timer()-et)/args.epochs, ' s') + print('TIMER: total epoch time:', timer()-et, ' s') if epoch > 1: print('TIMER: total epoch-1 time:', - time.time()-et-first_ep_t, ' s') + timer()-et-first_ep_t, ' s') print('TIMER: average epoch-1 time:', - (time.time()-et-first_ep_t)/(args.epochs-1), ' s') + (timer()-et-first_ep_t)/(args.epochs-1), ' s') print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n', - torch.cuda.memory_summary(0)) if args.cuda else '' + if use_cuda: + print('DEBUG: memory req:', + int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + print('DEBUG: memory summary:\n\n', + torch.cuda.memory_summary(0)) - if strategy.is_main_worker(): - print(f'TIMER: final time: {time.time()-st} s\n') + print(f'TIMER: final time: {timer()-st} s\n') + time.sleep(1) print(f" - TRAINING FINISHED") - strategy.clean_up() + + # Clean-up + if is_distributed: + strategy.clean_up() sys.exit() From 7d4f4863c78c5dbf2a84abe6e6e5eb0b30ec60f4 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 17:11:19 +0200 Subject: [PATCH 123/171] REMOVE imagenet tutorial --- .../torch-tutorial-2-imagenet/README.md | 47 -- .../torch-tutorial-2-imagenet/config.yaml | 25 - .../torch-tutorial-2-imagenet/ddp_slurm.sh | 66 --- .../deepspeed_slurm.sh | 74 --- .../torch-tutorial-2-imagenet/hvd_slurm.sh | 60 --- .../torch-tutorial-2-imagenet/runall.sh | 6 - .../torch-tutorial-2-imagenet/scaling-test.sh | 11 - .../torch-tutorial-2-imagenet/train.py | 499 ------------------ 8 files changed, 788 deletions(-) delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh delete mode 100644 tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md b/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md deleted file mode 100644 index 780eb278..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Tutorial: distributed strategies for PyTorch model trained on MNIST dataset - -In this tutorial we show how to use torch `DistributedDataParallel` (DDP), Horovod and -DeepSpeed from the same client code. -Note that the environment is tested on the HDFML system at JSC. For other systems, -the module versions might need change accordingly. - -## Setup - -First, from the root of this repository, build the environment containing -pytorch, horovod and deepspeed. You can *try* with: - -```bash -# Creates a Python venv called envAI_hdfml -make torch-gpu-jsc -``` - -The Imagenet dataset is assumed to be already downloaded to some location. - -## Distributed training - -Each distributed strategy has its own SLURM job script, which -should be used to run it: - -If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: - -```bash -sbatch ddp_slurm.sh -``` - -If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: - -```bash -sbatch deepspeed_slurm.sh -``` - -If you want to distribute the code in `train.py` with **Horovod**, run from terminal: - -```bash -sbatch hvd_slurm.sh -``` - -You can run all of them with: - -```bash -bash runall.sh -``` diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml b/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml deleted file mode 100644 index 2473d346..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/config.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# I/O -data_dir: /p/project/intertwin/datasets/Imagenet_sub/ImageNet_uncompressed/train/ #/p/project/intertwin/datasets/ImageNet_uncompressed/train -restart_int: 10 -verbose: True - -# Model -batch_size: 64 -epochs: 3 -lr: 0.001 -momentum: 0.5 -shuff: False -num_classes: 1000 - -# Debugging -testrun: False -nseed: 10 -log_int: 10 - -# Distributed ML -backend: nccl -nworker: 4 # num workers dataloader -prefetch: 2 -no_cuda: False - - diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh deleted file mode 100644 index 4e9749c2..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/ddp_slurm.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DDP_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ddp.out -#SBATCH --error=job-ddp.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=32 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set comm -export CUDA_VISIBLE_DEVICES="0,1,2,3" -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi - -# launch training -TRAINING_CMD="train.py -s ddp -c config.yaml" - -srun --cpu-bind=none bash -c "torchrun \ - --log_dir='logs' \ - --nnodes=$SLURM_NNODES \ - --nproc_per_node=$SLURM_GPUS_PER_NODE \ - --rdzv_id=$SLURM_JOB_ID \ - --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ - --rdzv_backend=c10d \ - --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ - $TRAINING_CMD" - diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh deleted file mode 100644 index 8f1c2d2d..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/deepspeed_slurm.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_DeepSpeed_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-ds.out -#SBATCH --error=job-ds.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=4 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set env vars -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i -export MASTER_ADDR -export MASTER_PORT=29500 - -TRAINING_CMD="train.py -s deepspeed -c config.yaml" - -# Run without launcher: set --ntasks-per-node=NUM_GPUS -srun --cpu-bind=none python -u $TRAINING_CMD --deepspeed - -# # Run with deepspeed launcher: set --ntasks-per-node=1 -# # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables -# export NCCL_IB_DISABLE=1 -# export NCCL_SOCKET_IFNAME=eth0 -# nodelist=$(scontrol show hostname $SLURM_NODELIST) -# echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile -# # Requires passwordless SSH access among compute node -# srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed -# rm .hostfile - diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh deleted file mode 100644 index 69b9d51e..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/hvd_slurm.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# general configuration of the job -#SBATCH --job-name=Torch_HVD_tutorial-1 -#SBATCH --account=intertwin -#SBATCH --mail-user= -#SBATCH --mail-type=ALL -#SBATCH --output=job-hvd.out -#SBATCH --error=job-hvd.err -#SBATCH --time=00:30:00 - -# configure node and process count on the CM -#SBATCH --partition=batch -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=4 -#SBATCH --cpus-per-task=8 -#SBATCH --gpus-per-node=4 -#SBATCH --exclusive - -# gres options have to be disabled for deepv -#SBATCH --gres=gpu:4 - -# set modules -ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py - -# set env -source ../../../envAI_hdfml/bin/activate - -# job info -debug=false -echo "DEBUG: TIME: $(date)" -echo "DEBUG: EXECUTE: $EXEC" -echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" -echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" -echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" -echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" -echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" -echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" -echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" -echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" -if [ "$debug" = true ] ; then - export NCCL_DEBUG=INFO -fi -echo - -# set vars -# export NCCL_DEBUG=INFO -export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK} -export OMP_NUM_THREADS=1 -if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then - export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -fi -export CUDA_VISIBLE_DEVICES="0,1,2,3" - -# launch training -TRAINING_CMD="train.py -s horovod -c config.yaml" - -srun --cpu-bind=none python -u $TRAINING_CMD - diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh deleted file mode 100644 index 21c02a22..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/runall.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Run all versions of distributed ML version -rm *checkpoint.pth.tar *.out *.err *.csv -echo "Torch DDP training: $(sbatch ddp_slurm.sh)" -echo "DeepSpeed training: $(sbatch deepspeed_slurm.sh)" -echo "Horovod training: $(sbatch hvd_slurm.sh)" \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh b/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh deleted file mode 100644 index 275f7fb7..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/scaling-test.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -rm *checkpoint.pth.tar *.out *.err *.csv - -timeout="01:01:00" -for N in 1 2 4 8 -do - sbatch --job-name="DDP-imagenet-n$N" --nodes=$N --output="job-ddp-n$N.out" --error="job-ddp-n$N.err" --time=$timeout ddp_slurm.sh - sbatch --job-name="DS-imagenet-n$N" --nodes=$N --output="job-ds-n$N.out" --error="job-ds-n$N.err" --time=$timeout deepspeed_slurm.sh - sbatch --job-name="HVD-imagenet-n$N" --nodes=$N --output="job-hvd-n$N.out" --error="job-hvd-n$N.err" --time=$timeout hvd_slurm.sh -done \ No newline at end of file diff --git a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py b/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py deleted file mode 100644 index 6bd71214..00000000 --- a/tutorials/distributed-ml/torch-tutorial-2-imagenet/train.py +++ /dev/null @@ -1,499 +0,0 @@ -""" -Show how to use DDP, Horovod and DeepSpeed strategies interchangeably -with a large neural network trained on Imagenet dataset, showing how -to use checkpoints. -""" -import os -import argparse -import sys -import time -import numpy as np -import random - -import torch -from torch import nn -import torch.distributed as dist -import torch.nn.functional as F -import torchvision -from torchvision import transforms -from torch.utils.data import DataLoader, DistributedSampler - -import deepspeed - -from itwinai.torch.distributed import ( - TorchDistributedStrategy, - DDPDistributedStrategy, - HVDDistributedStrategy, - DSDistributedStrategy, -) -from itwinai.parser import ArgumentParser as ItAIArgumentParser -from itwinai.loggers import EpochTimeTracker - - -def parse_args() -> argparse.Namespace: - """ - Parse CLI args, which can also be loaded from a configuration file - using the --config flag: - - >>> train.py --strategy ddp --config config.yaml - """ - parser = ItAIArgumentParser(description='PyTorch MNIST Example') - - # Distributed ML strategy - parser.add_argument( - "--strategy", "-s", type=str, - choices=['ddp', 'horovod', 'deepspeed'], - default='ddp' - ) - - # IO parsers - parser.add_argument('--data-dir', default='./', - help=('location of the training dataset in the local ' - 'filesystem')) - parser.add_argument('--restart-int', type=int, default=10, - help='restart interval per epoch (default: 10)') - parser.add_argument('--verbose', - action=argparse.BooleanOptionalAction, - help='Print parsed arguments') - - # model parsers - parser.add_argument('--batch-size', type=int, default=64, - help='input batch size for training (default: 64)') - parser.add_argument('--epochs', type=int, default=10, - help='number of epochs to train (default: 10)') - parser.add_argument('--lr', type=float, default=0.01, - help='learning rate (default: 0.01)') - parser.add_argument('--momentum', type=float, default=0.5, - help='momentum in SGD optimizer (default: 0.5)') - parser.add_argument('--shuff', action='store_true', default=False, - help='shuffle dataset (default: False)') - parser.add_argument('--num-classes', type=int, default=1000, - help='number of classes in dataset') - - # debug parsers - parser.add_argument('--testrun', action='store_true', default=False, - help='do a test run with seed (default: False)') - parser.add_argument('--nseed', type=int, default=0, - help='seed integer for reproducibility (default: 0)') - parser.add_argument('--log-int', type=int, default=10, - help='log interval per training') - - # parallel parsers - parser.add_argument('--backend', type=str, default='nccl', - help='backend for parrallelisation (default: nccl)') - parser.add_argument('--nworker', type=int, default=0, - help=('number of workers in DataLoader (default: 0 -' - ' only main)')) - parser.add_argument('--prefetch', type=int, default=2, - help='prefetch data in DataLoader (default: 2)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables GPGPUs') - parser.add_argument('--local_rank', type=int, default=-1, - help='local rank passed from distributed launcher') - - # DeepSpeed - parser = deepspeed.add_config_arguments(parser) - args = parser.parse_args() - - if args.verbose: - args_list = [f"{key}: {val}" for key, val in args.items()] - print("PARSED ARGS:\n", '\n'.join(args_list)) - - return args - - -def train( - model, device, train_loader, optimizer, epoch, - strategy: TorchDistributedStrategy, args -): - """ - Training function, representing an epoch. - """ - model.train() - t_list = [] - loss_acc = 0 - gwsize = strategy.dist_gwsize() - if strategy.is_main_worker(): - print("\n") - for batch_idx, (data, target) in enumerate(train_loader): - t = time.perf_counter() - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_int == 0 and strategy.is_main_worker(): - print( - f'Train epoch: {epoch} ' - f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' - f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' - f'Loss: {loss.item():.6f}') - t_list.append(time.perf_counter() - t) - loss_acc += loss.item() - if strategy.is_main_worker(): - print('TIMER: train time', sum(t_list) / len(t_list), 's') - return loss_acc - - -def test(model, device, test_loader, strategy: TorchDistributedStrategy): - """ - Model validation. - """ - model.eval() - test_loss = 0 - correct = 0 - gwsize = strategy.dist_gwsize() - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction="sum").item() - # get the index of the max log-probability - pred = output.argmax(dim=1, keepdim=True) - correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset) - if strategy.is_main_worker(): - print( - f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) - return acc_test - - -def save_state( - epoch, distrib_model, loss_acc, optimizer, - res_name, is_best, strategy: TorchDistributedStrategy -): - """ - Save training state. - """ - grank = strategy.dist_grank() - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available(): - is_best_m = strategy.par_allgather_obj(is_best) - - if torch.cuda.is_available(): - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_acc': loss_acc, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} in ' - f'{time.time()-rt} s') - - -def seed_worker(worker_id): - """ - Seed dataloader worker. - """ - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -if __name__ == "__main__": - - args = parse_args() - - # Instantiate Strategy - if args.strategy == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - - strategy = DDPDistributedStrategy(backend=args.backend) - elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy() - elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(backend=args.backend) - else: - raise NotImplementedError( - f"Strategy {args.strategy} is not recognized/implemented.") - strategy.init() - - # check CUDA availability - args.cuda = not args.no_cuda and torch.cuda.is_available() - - # limit # of CPU threads to be used per worker - torch.set_num_threads(1) - - # get directory - program_dir = os.getcwd() - - # start the time.time for profiling - st = time.time() - - # deterministic testrun - if args.testrun: - torch.manual_seed(args.nseed) - g = torch.Generator() - g.manual_seed(args.nseed) - - # get job rank info - rank==0 master gpu - if torch.cuda.is_available(): - # local world size - per node - lwsize = strategy.dist_lwsize() if args.cuda else 0 - gwsize = strategy.dist_gwsize() # global world size - per run - grank = strategy.dist_grank() # global rank - assign per run - lrank = strategy.dist_lrank() # local rank - assign per node - else: - gwsize = 1 - grank = 0 - - # some debug - if strategy.is_main_worker(): - print('TIMER: initialise:', time.time()-st, 's') - - # move the model on the GPU assigned to the current process - device = torch.device( - strategy.dist_device() if args.cuda and torch.cuda.is_available() - else 'cpu') - if args.cuda: - torch.cuda.set_device(lrank) - # deterministic testrun - if args.testrun: - torch.cuda.manual_seed(args.nseed) - - # dataset - # Initialize transformations for data augmentation - transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - transforms.RandomRotation(degrees=45), - transforms.ColorJitter( - brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]) - - # Load the ImageNet Object Localization Challenge dataset - train_dataset = torchvision.datasets.ImageFolder( - root=args.data_dir, - transform=transform - ) - # test_dataset = ... - - # restricts data loading to a subset of the dataset exclusive to the - # current process - args.shuff = args.shuff and not args.testrun - if torch.cuda.is_available(): - train_sampler = DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, shuffle=args.shuff) - # test_sampler = DistributedSampler( - # test_dataset, num_replicas=gwsize, rank=grank, - # shuffle=args.shuff) - # distribute dataset to workers - # persistent workers is not possible for nworker=0 - pers_w = True if args.nworker > 1 else False - - # deterministic testrun - the same dataset each run - kwargs = {'worker_init_fn': seed_worker, - 'generator': g} if args.testrun else {} - - if torch.cuda.is_available(): - train_loader = DataLoader( - train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=pers_w, prefetch_factor=args.prefetch, **kwargs - ) - # test_loader = DataLoader( - # test_dataset, batch_size=args.batch_size, - # sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - # persistent_workers=pers_w, prefetch_factor=args.prefetch, - # **kwargs - # ) - else: - train_loader = DataLoader( - train_dataset, batch_size=args.batch_size) - # test_loader = DataLoader( - # test_dataset, batch_size=args.batch_size) - - if strategy.is_main_worker(): - print('TIMER: read and concat data:', time.time()-st, 's') - - # create CNN model: resnet 50, resnet101, resnet152 - model = torchvision.models.resnet152() - model.fc = nn.Linear(2048, args.num_classes) - - # optimizer - optimizer = torch.optim.SGD( - model.parameters(), lr=args.lr, momentum=args.momentum) - - deepspeed_config = dict(train_micro_batch_size_per_gpu=args.batch_size) - # 'config_params' key is ignored if strategy != DSDistributedStrategy - distrib_model, optimizer, _ = strategy.distributed( - model, optimizer, lr_scheduler=None, config_params=deepspeed_config - ) - - # resume state - start_epoch = 1 - best_acc = np.Inf - nnod = os.environ.get('SLURM_NNODES', 'unk') - res_name = f'{args.strategy}-{nnod}N-checkpoint.pth.tar' - if os.path.isfile(res_name): - try: - if torch.cuda.is_available(): - dist.barrier() - # Map model to be loaded to specified single gpu. - loc = {'cuda:%d' % 0: 'cuda:%d' % lrank} if args.cuda else { - 'cpu:%d' % 0: 'cpu:%d' % lrank} - checkpoint = torch.load( - program_dir+'/'+res_name, map_location=loc) - else: - checkpoint = torch.load(program_dir+'/'+res_name) - start_epoch = checkpoint['epoch'] - best_acc = checkpoint['best_acc'] - distrib_model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print(f'WARNING: restarting from {start_epoch} epoch') - else: - print(f'WARNING: restarting from {start_epoch} epoch') - except Exception: - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print('WARNING: restart file cannot be loaded, ' - 'restarting!') - else: - print('WARNING: restart file cannot be loaded, restarting!') - - if start_epoch > args.epochs: - if torch.cuda.is_available(): - if strategy.is_main_worker(): - print('WARNING: given epochs are less than the one in the ' - 'restart file!\n' - 'WARNING: SYS.EXIT is issued') - - strategy.clean_up() - sys.exit() - else: - print('WARNING: given epochs are less than the one in ' - 'the restart file!\n' - 'WARNING: SYS.EXIT is issued') - sys.exit() - - # start trainin/testing loop - if strategy.is_main_worker(): - print('TIMER: broadcast:', time.time()-st, 's') - print('\nDEBUG: start training') - print('--------------------------------------------------------') - epoch_time_tracker = EpochTimeTracker(series_name=args.strategy) - - et = time.time() - for epoch in range(start_epoch, args.epochs + 1): - lt = time.time() - # training - loss_acc = train( - model=distrib_model, - device=device, - train_loader=train_loader, - optimizer=optimizer, - epoch=epoch, - strategy=strategy, - args=args - ) - - # # testing - # acc_test = test( - # model=distrib_model, - # device=device, - # test_loader=test_loader, - # strategy=strategy - # ) - - # save first epoch timer - if epoch == start_epoch: - first_ep_t = time.time()-lt - - # final epoch - if epoch + 1 == args.epochs: - train_loader.last_epoch = True - # test_loader.last_epoch = True - - if strategy.is_main_worker(): - print('TIMER: epoch time:', time.time()-lt, 's') - epoch_time_tracker.add_epoch_time(epoch-1, time.time()-lt) - # print('DEBUG: accuracy:', acc_test, '%') - - # save state if found a better state - is_best = loss_acc < best_acc - if epoch % args.restart_int == 0: - save_state( - epoch=epoch, - distrib_model=distrib_model, - loss_acc=loss_acc, - optimizer=optimizer, - res_name=res_name, - is_best=is_best, - strategy=strategy - ) - # reset best_acc - best_acc = min(loss_acc, best_acc) - - # finalise - # save final state - save_state( - epoch=epoch, - distrib_model=distrib_model, - loss_acc=loss_acc, - optimizer=optimizer, - res_name=res_name, - is_best=True, - strategy=strategy - ) - - # some debug - if strategy.is_main_worker(): - print('\n--------------------------------------------------------') - print('DEBUG: training results:\n') - print('TIMER: first epoch time:', first_ep_t, ' s') - print('TIMER: last epoch time:', time.time()-lt, ' s') - print('TIMER: average epoch time:', (time.time()-et)/args.epochs, ' s') - print('TIMER: total epoch time:', time.time()-et, ' s') - if epoch > 1: - print('TIMER: total epoch-1 time:', - time.time()-et-first_ep_t, ' s') - print('TIMER: average epoch-1 time:', - (time.time()-et-first_ep_t)/(args.epochs-1), ' s') - # print('DEBUG: last accuracy:', acc_test, '%') - print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') \ - if args.cuda else 'DEBUG: memory req: - MB' - print('DEBUG: memory summary:\n\n', - torch.cuda.memory_summary(0)) if args.cuda else '' - - if strategy.is_main_worker(): - print(f'TIMER: final time: {time.time()-st} s\n') - nnod = os.environ.get('SLURM_NNODES', 'unk') - epoch_time_tracker.save( - csv_file=f"epochtime_{args.strategy}_{nnod}N.csv") - - print(f" - TRAINING FINISHED") - strategy.clean_up() - sys.exit() From 4dc62be45609334aabafa48332bc41db18055344 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 19:43:25 +0200 Subject: [PATCH 124/171] ADD NonDistributedStrategy and create_dataloader method --- src/itwinai/torch/distributed.py | 437 ++++++++++++++++-- src/itwinai/torch/types.py | 4 + .../torch-scaling-test/itwinai_trainer.py | 28 +- .../torch-tutorial-0-basics/train.py | 32 +- .../torch-tutorial-1-mnist/config.yaml | 1 - .../torch-tutorial-1-mnist/train.py | 158 +++---- 6 files changed, 490 insertions(+), 170 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 34174346..8947fd42 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -1,5 +1,5 @@ import abc -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Tuple, Union, Iterable from pathlib import Path import json import os @@ -12,18 +12,47 @@ import torch.optim as optim from torch.optim.lr_scheduler import _LRScheduler as LRScheduler from torch.optim.optimizer import Optimizer +from torch.utils.data import Dataset, Sampler, DistributedSampler, DataLoader +from torch.utils.data.dataloader import T_co, _worker_init_fn_t, _collate_fn_t from ..distributed import DistributedStrategy +from .types import UninitializedStrategyError + + +def distributed_resources_available() -> bool: + """Check if the current execution environment + has (enough) GPUs available to allow for distributed ML. + + Returns: + bool: env can support distributed ML. + """ + if torch.cuda.is_available() and torch.cuda.device_count() > 1: + return True + return False class TorchDistributedStrategy(DistributedStrategy): """Abstract class to define the distributed backend methods for PyTorch models. """ + is_distributed: bool = True + _initialized: bool = False + + @property + def is_main_worker(self) -> bool: + """Checks if local worker has global rank equal to zero. + + Returns: + bool: True if main worker. + """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + return self.global_rank() == 0 + @abc.abstractmethod def init(self) -> None: """Initializes the chosen distributed backend""" - # @abc.abstractmethod # def distributed_engine( # self, model: nn.Module, optimizer: Optimizer, @@ -39,7 +68,7 @@ def distributed( """Setup model, optimizer and scheduler for distributed.""" @abc.abstractmethod - def dist_gwsize(self) -> int: + def global_world_size(self) -> int: """Returns the total number of processes (global world size). Returns: @@ -47,7 +76,7 @@ def dist_gwsize(self) -> int: """ @abc.abstractmethod - def dist_lwsize(self) -> int: + def local_world_size(self) -> int: """Returns the number of local workers available on a node (local world size). Usually it is equal to the number of available GPUs. @@ -57,7 +86,7 @@ def dist_lwsize(self) -> int: """ @abc.abstractmethod - def dist_grank(self) -> int: + def global_rank(self) -> int: """Returns the global rank of the current process. Rank ranges from 0 to world_size. @@ -66,28 +95,182 @@ def dist_grank(self) -> int: """ @abc.abstractmethod - def dist_lrank(self) -> int: + def local_rank(self) -> int: """Returns the local rank of the current process. Returns: int: local rank. """ - def is_main_worker(self) -> bool: - """Checks if local worker has global rank equal to zero. - - Returns: - bool: True if main worker. - """ - return self.dist_grank() == 0 - - def dist_device(self) -> str: + def device(self) -> str: """Device used by local worker. Returns: str: torch device in the form 'cuda:N'. """ - return f"cuda:{self.dist_lrank()}" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + return f"cuda:{self.local_rank()}" + + def create_dataloader( + self, dataset: Dataset[T_co], batch_size: Optional[int] = 1, + shuffle: Optional[bool] = None, + sampler: Union[Sampler, Iterable, None] = None, + batch_sampler: Union[Sampler[List], Iterable[List], None] = None, + num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None, + pin_memory: bool = False, drop_last: bool = False, + timeout: float = 0, + worker_init_fn: Optional[_worker_init_fn_t] = None, + multiprocessing_context=None, generator=None, + *, prefetch_factor: Optional[int] = None, + persistent_workers: bool = False, + pin_memory_device: str = "" + ): + """Create a distributed DataLoader by using ``DistributedSampler`` as + random sampler. + + Args: + dataset (Dataset): dataset from which to load the data. + batch_size (int, optional): how many samples per batch to load + (default: ``1``). + shuffle (bool, optional): set to ``True`` to have the data + reshuffled at every epoch (default: ``False``). + sampler (Sampler or Iterable, optional): defines the strategy to + draw + samples from the dataset. Can be any ``Iterable`` with + ``__len__`` + implemented. If specified, :attr:`shuffle` must not be + specified. + batch_sampler (Sampler or Iterable, optional): like + :attr:`sampler`, but + returns a batch of indices at a time. Mutually exclusive with + :attr:`batch_size`, :attr:`shuffle`, :attr:`sampler`, + and :attr:`drop_last`. + num_workers (int, optional): how many subprocesses to use for data + loading. ``0`` means that the data will be loaded in the main + process. (default: ``0``) + collate_fn (Callable, optional): merges a list of samples to form a + mini-batch of Tensor(s). Used when using batched loading from + a map-style dataset. + pin_memory (bool, optional): If ``True``, the data loader will + copy Tensors + into device/CUDA pinned memory before returning them. If your + data elements + are a custom type, or your :attr:`collate_fn` returns a batch + that is a custom type, + see the example below. + drop_last (bool, optional): set to ``True`` to drop the last + incomplete batch, + if the dataset size is not divisible by the batch size. + If ``False`` and + the size of dataset is not divisible by the batch size, then + the last batch + will be smaller. (default: ``False``) + timeout (numeric, optional): if positive, the timeout value for + collecting a batch + from workers. Should always be non-negative. (default: ``0``) + worker_init_fn (Callable, optional): If not ``None``, + this will be called on each + worker subprocess with the worker id (an int in + ``[0, num_workers - 1]``) as + input, after seeding and before data loading. + (default: ``None``) + multiprocessing_context (str or + multiprocessing.context.BaseContext, optional): If + ``None``, the default `multiprocessing context`_ of + your operating system will + be used. (default: ``None``) + generator (torch.Generator, optional): If not ``None``, + this RNG will be used + by RandomSampler to generate random indexes and + multiprocessing to generate + ``base_seed`` for workers. (default: ``None``) + prefetch_factor (int, optional, keyword-only arg): Number of + batches loaded + in advance by each worker. ``2`` means there will be a total of + 2 * num_workers batches prefetched across all workers. + (default value depends + on the set value for num_workers. If value of num_workers=0 + default is ``None``. + Otherwise, if value of ``num_workers > 0`` default is ``2``). + persistent_workers (bool, optional): If ``True``, the data loader + will not shut down + the worker processes after a dataset has been consumed once. + This allows to + maintain the workers `Dataset` instances alive. + (default: ``False``) + pin_memory_device (str, optional): the device to + :attr:`pin_memory` to if ``pin_memory`` is ``True``. + + + .. warning:: If the ``spawn`` start method is used, + :attr:`worker_init_fn` + cannot be an unpicklable object, e.g., a lambda function. + See :ref:`multiprocessing-best-practices` on more + details related to multiprocessing in PyTorch. + + .. warning:: ``len(dataloader)`` heuristic is based on the length of + the sampler used. + When :attr:`dataset` is an + :class:`~torch.utils.data.IterableDataset`, + it instead returns an estimate based on + ``len(dataset) / batch_size``, with proper + rounding depending on :attr:`drop_last`, regardless + of multi-process loading + configurations. This represents the best guess PyTorch + can make because PyTorch + trusts user :attr:`dataset` code in correctly handling + multi-process + loading to avoid duplicate data. + + However, if sharding results in multiple workers having + incomplete last batches, + this estimate can still be inaccurate, because (1) an + otherwise complete batch can + be broken into multiple ones and (2) more than one batch + worth of samples can be + dropped when :attr:`drop_last` is set. Unfortunately, + PyTorch can not detect such cases in general. + + See `Dataset Types`_ for more details on these two + types of datasets and how + :class:`~torch.utils.data.IterableDataset` interacts with + `Multi-process data loading`_. + + .. warning:: See :ref:`reproducibility`, and + :ref:`dataloader-workers-random-seed`, and + :ref:`data-loading-randomness` notes for random + seed related questions. + + .. _multiprocessing context: + https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods + """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + + if self.is_distributed: + if sampler is not None: + raise RuntimeError( + "User-provided sampler is not supported." + ) + sampler = DistributedSampler( + dataset, num_replicas=self.global_world_size(), + rank=self.global_rank(), + shuffle=shuffle + ) + # shuffle and batch_sampler must be unset + return DataLoader( + dataset=dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, collate_fn=collate_fn, + pin_memory=pin_memory, drop_last=drop_last, timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, + generator=generator, prefetch_factor=prefetch_factor, + persistent_workers=persistent_workers, + pin_memory_device=pin_memory_device + ) @abc.abstractmethod def clean_up(self) -> None: @@ -121,12 +304,17 @@ def __init__(self, backend: str) -> None: def init(self) -> None: """Initializes the distributed process group and the distributed package. + + Raises: + RuntimeError: when there are not (enough) GPUs available. """ - if torch.cuda.is_available() and torch.cuda.device_count() > 1: - dist.init_process_group(backend=self.backend) - else: - print("WARNING: trying to run distributed on insufficient" - " resources. Skipping distributed process group setup.") + if not distributed_resources_available(): + raise RuntimeError( + "Trying to run distributed on insufficient resources.") + dist.init_process_group(backend=self.backend) + self._initialized = True + + torch.cuda.device(self.local_rank()) # def distributed_engine( # self, model: nn.Module, optimizer: Optimizer, @@ -158,55 +346,73 @@ def distributed( **kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): # device = self.dist_lrank() - model = model.to(self.dist_device()) + model = model.to(self.device()) dist_model = torch.nn.parallel.DistributedDataParallel( model, - device_ids=[self.dist_device()], - output_device=self.dist_device() + device_ids=[self.device()], + output_device=self.device() ) else: dist_model = model return dist_model, optimizer, lr_scheduler - def dist_gwsize(self) -> int: + def global_world_size(self) -> int: """Returns the total number of processes (global world size). Returns: int: global world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_world_size() - def dist_lwsize(self) -> int: + def local_world_size(self) -> int: """Returns the local number of workers available per node, which is usually the number of GPUs available. Returns: int: local world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return torch.cuda.device_count() - def dist_grank(self) -> int: + def global_rank(self) -> int: """Returns the global rank of the current process, where rank ranges from 0 to world_size. Returns: int: global rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_rank() - def dist_lrank(self) -> int: + def local_rank(self) -> int: """Returns the local rank of the current process. Returns: int: local rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): dist.barrier() dist.destroy_process_group() @@ -221,7 +427,10 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: Returns: List[Any]: List of gathered objects. """ - res = [None] * self.dist_gwsize() + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + res = [None] * self.global_world_size() dist.all_gather_object(res, obj) return res @@ -256,7 +465,14 @@ def _load_config(self, ds_config) -> None: def init(self) -> None: """Initializes the distributed process group and the distributed package. + + Raises: + RuntimeError: when there are not (enough) GPUs available. """ + if not distributed_resources_available(): + raise RuntimeError( + "Trying to run distributed on insufficient resources.") + # https://github.com/Lightning-AI/pytorch-lightning/issues/13567 ompi_lrank = os.environ.get('OMPI_COMM_WORLD_LOCAL_RANK') os.environ['OMPI_COMM_WORLD_LOCAL_RANK'] = os.environ.get( @@ -264,6 +480,9 @@ def init(self) -> None: # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) + self._initialized = True + + torch.cuda.device(self.local_rank()) def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, @@ -272,6 +491,10 @@ def distributed( **init_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + if init_kwargs.get("config"): self._load_config(init_kwargs.get("config")) # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization @@ -286,42 +509,57 @@ def distributed( ) return distrib_model, optimizer, lr_scheduler - def dist_gwsize(self) -> int: + def global_world_size(self) -> int: """Returns the total number of processes (global world size). Returns: int: global world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_world_size() - def dist_lwsize(self) -> int: + def local_world_size(self) -> int: """Returns the local number of workers available per node, which is usually the number of GPUs available. Returns: int: local world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return torch.cuda.device_count() - def dist_grank(self) -> int: + def global_rank(self) -> int: """Returns the global rank of the current process, where rank ranges from 0 to world_size. Returns: int: global rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_rank() - def dist_lrank(self) -> int: + def local_rank(self) -> int: """Returns the local rank of the current process. Returns: int: local rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") deepspeed.sys.exit() def par_allgather_obj(self, obj: Any) -> list[Any]: @@ -334,7 +572,10 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: Returns: List[Any]: List of gathered objects. """ - res = [None] * self.dist_gwsize() + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + res = [None] * self.global_world_size() dist.all_gather_object(res, obj) return res @@ -343,9 +584,18 @@ class HVDDistributedStrategy(TorchDistributedStrategy): """Horovod distributed strategy class.""" def init(self) -> None: - """Initializes the Horovod distributed backend.""" + """Initializes the Horovod distributed backend. + + Raises: + RuntimeError: when there are not (enough) GPUs available. + """ + if not distributed_resources_available(): + raise RuntimeError( + "Trying to run distributed on insufficient resources.") hvd.init() - torch.cuda.set_device(hvd.local_rank()) + self._initialized = True + + torch.cuda.device(self.local_rank()) def distributed( self, model: nn.Module, optimizer: Optional[Optimizer] = None, @@ -353,8 +603,11 @@ def distributed( **optim_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") - model.to(self.dist_device()) + model.to(self.device()) # Scale learning rate # https://github.com/horovod/horovod/issues/1653#issuecomment-574764452 @@ -389,42 +642,57 @@ def _broadcast_params( hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - def dist_gwsize(self) -> int: + def global_world_size(self) -> int: """Returns the total number of processes (global world size). Returns: int: global world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return hvd.size() - def dist_lwsize(self) -> int: + def local_world_size(self) -> int: """Returns the local number of workers available per node, which is usually the number of GPUs available. Returns: int: local world size. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return hvd.local_size() - def dist_grank(self) -> int: + def global_rank(self) -> int: """Returns the global rank of the current process, where rank ranges from 0 to world_size. Returns: int: global rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return hvd.rank() - def dist_lrank(self) -> int: + def local_rank(self) -> int: """Returns the local rank of the current process. Returns: int: local rank. """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return hvd.local_rank() def clean_up(self) -> None: """Shuts Horovod down.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") hvd.shutdown() def par_allgather_obj(self, obj: Any) -> list[Any]: @@ -437,9 +705,96 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: Returns: list: gathered list with size(#worker). """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") return hvd.allgather_object(obj) +class NonDistributedStrategy(TorchDistributedStrategy): + """Dummy class for non-distributed environments.""" + + is_distributed: bool = False + + def init(self) -> None: + """If CUDA is available set CUDA device, and do nothing more.""" + if torch.cuda.is_available(): + torch.cuda.device(self.local_rank()) + self._initialized = True + + def device(self) -> str: + """Device used by local worker. + + Returns: + str: cpu device if CUDA is not available. + """ + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + if torch.cuda.is_available(): + return super().device() + return "cpu" + + def distributed( + self, model: nn.Module, optimizer: Optional[Optimizer] = None, + lr_scheduler: Optional[LRScheduler] = None, + **kwargs + ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: + """Do nothing and return model, optimizer and scheduler.""" + if not self._initialized: + raise UninitializedStrategyError( + "Strategy has not been initialized. Use the init method.") + if torch.cuda.is_available(): + model = model.cuda() + return model, optimizer, lr_scheduler + + def global_world_size(self) -> int: + """Returns the total number of processes (global world size). + + Returns: + int: global world size. + """ + return 1 + + def local_world_size(self) -> int: + """Returns the local number of workers available per node, + which is usually the number of GPUs available. + + Returns: + int: local world size. + """ + return 1 + + def global_rank(self) -> int: + """Returns the global rank of the current process, where + rank ranges from 0 to world_size. + + Returns: + int: global rank. + """ + return 0 + + def local_rank(self) -> int: + """Returns the local rank of the current process. + + Returns: + int: local rank. + """ + return 0 + + def clean_up(self) -> None: + """Do nothing.""" + + def par_allgather_obj(self, obj: Any) -> list[Any]: + """Raise error as this operation is not available. + + Args: + obj (Any): object in a worker. + """ + raise RuntimeError( + f"{self.__class__.__name__} does not support this operation." + ) + # class TorchDistributedStrategy_old(DistributedStrategy): # """Abstract class to define the distributed backend methods for # PyTorch models. diff --git a/src/itwinai/torch/types.py b/src/itwinai/torch/types.py index 614462ad..6b42c901 100644 --- a/src/itwinai/torch/types.py +++ b/src/itwinai/torch/types.py @@ -64,3 +64,7 @@ class TorchOptimizer(BaseEnum): """ SGD = 'SGD' ADAM = 'Adam' + + +class UninitializedStrategyError(Exception): + """Error raised when a strategy has not been initialized.""" diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py index 8e81fdfc..d47e9c7b 100644 --- a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py @@ -119,8 +119,8 @@ def train( model.train() t_list = [] loss_acc = 0 - gwsize = strategy.dist_gwsize() - if strategy.is_main_worker(): + gwsize = strategy.global_world_size() + if strategy.is_main_worker: print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = timer() @@ -130,7 +130,7 @@ def train( loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if (strategy.is_main_worker() and args.log_int > 0 + if (strategy.is_main_worker and args.log_int > 0 and batch_idx % args.log_int == 0): print( f'Train epoch: {epoch} ' @@ -139,7 +139,7 @@ def train( f'Loss: {loss.item():.6f}') t_list.append(timer() - t) loss_acc += loss.item() - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc @@ -194,10 +194,10 @@ def main(): # Get job rank info - rank==0 master gpu if is_distributed: # local world size - per node - lwsize = strategy.dist_lwsize() # local world size - per run - gwsize = strategy.dist_gwsize() # global world size - per run - grank = strategy.dist_grank() # global rank - assign per run - lrank = strategy.dist_lrank() # local rank - assign per node + lwsize = strategy.local_world_size() # local world size - per run + gwsize = strategy.global_world_size() # global world size - per run + grank = strategy.global_rank() # global rank - assign per run + lrank = strategy.local_rank() # local rank - assign per node else: # Use a single worker (either on GPU or CPU) lwsize = 1 @@ -205,7 +205,7 @@ def main(): grank = 0 lrank = 0 - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: initialise:', timer()-st, 's') print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) print('DEBUG: sys.version:', sys.version) @@ -224,7 +224,7 @@ def main(): # Encapsulate the model on the GPU assigned to the current process device = torch.device( - strategy.dist_device() if use_cuda + strategy.device() if use_cuda else 'cpu') if use_cuda: torch.cuda.set_device(lrank) @@ -266,7 +266,7 @@ def main(): ) # Start training loop - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') @@ -305,11 +305,11 @@ def main(): if epoch + 1 == args.epochs: train_loader.last_epoch = True - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: epoch time:', timer()-lt, 's') epoch_time_tracker.add_epoch_time(epoch-1, timer()-lt) - if strategy.is_main_worker(): + if strategy.is_main_worker: print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -330,7 +330,7 @@ def main(): print(f'TIMER: final time: {timer()-st} s\n') time.sleep(1) - print(f" - TRAINING FINISHED") + print(f" - TRAINING FINISHED") # Clean-up if is_distributed: diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py index d48e5a3e..e9d96735 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py @@ -8,15 +8,17 @@ import torch from torch import nn -from torch.utils.data import DataLoader, Dataset, DistributedSampler +from torch.utils.data import Dataset import horovod.torch as hvd from itwinai.torch.distributed import ( + distributed_resources_available, TorchDistributedStrategy, DDPDistributedStrategy, HVDDistributedStrategy, DSDistributedStrategy, + NonDistributedStrategy ) @@ -80,18 +82,11 @@ def training_fn( # Data train_set = UniformRndDataset(x_size=3, y_size=4) # Distributed dataloader - train_loader = DataLoader( - train_set, batch_size=10, num_workers=1, - sampler=DistributedSampler( - train_set, - num_replicas=strategy.dist_gwsize(), - rank=strategy.dist_grank(), - shuffle=args.shuffle_dataloader - ) - ) + train_loader = strategy.create_dataloader( + train_set, batch_size=args.batch_size, num_workers=1) # Device allocated for this worker - device = strategy.dist_device() + device = strategy.device() for epoch in range(2): for (x, y) in train_loader: @@ -108,7 +103,7 @@ def training_fn( optim.step() - if strategy.is_main_worker(): + if strategy.is_main_worker: print(f"Loss [epoch={epoch}]: {loss.item()}") print(f"NNLoss [epoch={epoch}]: {loss.item()}") @@ -117,7 +112,7 @@ def training_fn( lr_sched.step() time.sleep(1) - print(f" - TRAINING FINISHED") + print(f" - TRAINING FINISHED") strategy.clean_up() return 123 @@ -127,11 +122,11 @@ def training_fn( args = parse_args() # Instantiate Strategy - if args.strategy == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - + if not distributed_resources_available(): + print("WARNING: falling back to non-distributed strategy.") + strategy = NonDistributedStrategy() + distribute_kwargs = {} + elif args.strategy == 'ddp': strategy = DDPDistributedStrategy(backend='nccl') distribute_kwargs = {} elif args.strategy == 'horovod': @@ -149,6 +144,5 @@ def training_fn( else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") - # Launch distributed training training_fn(args, strategy, distribute_kwargs) diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml index 8067987d..c5ef5bf5 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml @@ -20,7 +20,6 @@ rnd_seed: 10 # Distributed ML backend: nccl # ignored when using Horovod -no_cuda: False # Horovod: ignored when NOT using Horovod fp16_allreduce: False diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py index bad6a3a5..9a7ef0c1 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py @@ -12,17 +12,19 @@ import torch.nn as nn import torch.nn.functional as F from torchvision import datasets, transforms -from torch.utils.data import DataLoader, DistributedSampler, Dataset +from torch.utils.data import Dataset import horovod.torch as hvd import deepspeed from itwinai.torch.distributed import ( + distributed_resources_available, TorchDistributedStrategy, DDPDistributedStrategy, HVDDistributedStrategy, DSDistributedStrategy, + NonDistributedStrategy ) from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.torch.reproducibility import ( @@ -60,7 +62,7 @@ def parse_params() -> argparse.Namespace: parser.add_argument('--download-only', action=argparse.BooleanOptionalAction, help='Download dataset and exit') - parser.add_argument('--dataset_replication', type=int, default=100, + parser.add_argument('--dataset-replication', type=int, default=100, help='concatenate MNIST to this factor (default: 100)') parser.add_argument('--shuff', action='store_true', default=False, help='shuffle dataset (default: False)') @@ -87,8 +89,6 @@ def parse_params() -> argparse.Namespace: # Distributed ML parser.add_argument('--backend', type=str, default='nccl', help='backend for parrallelisation (default: nccl)') - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables GPGPUs') parser.add_argument('--local_rank', type=int, default=-1, help='local rank passed from distributed launcher') @@ -145,8 +145,7 @@ def train( model.train() t_list = [] loss_acc = 0 - gwsize = strategy.dist_gwsize() - if strategy.is_main_worker(): + if strategy.is_main_worker: print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = timer() @@ -156,16 +155,17 @@ def train( loss = F.nll_loss(output, target) loss.backward() optimizer.step() - if (strategy.is_main_worker() and args.log_int > 0 + if (strategy.is_main_worker and args.log_int > 0 and batch_idx % args.log_int == 0): + dl_size = len(train_loader.dataset)//strategy.global_world_size() print( f'Train epoch: {epoch} ' - f'[{batch_idx * len(data)}/{len(train_loader.dataset)/gwsize} ' + f'[{batch_idx * len(data)}/{dl_size} ' f'({100.0 * batch_idx / len(train_loader):.0f}%)]\t\t' f'Loss: {loss.item():.6f}') t_list.append(timer() - t) loss_acc += loss.item() - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: train time', sum(t_list) / len(t_list), 's') return loss_acc @@ -177,7 +177,6 @@ def test(model, device, test_loader, strategy: TorchDistributedStrategy): model.eval() test_loss = 0 correct = 0 - gwsize = strategy.dist_gwsize() with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) @@ -188,11 +187,15 @@ def test(model, device, test_loader, strategy: TorchDistributedStrategy): pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) - if strategy.is_main_worker(): + if strategy.is_main_worker: + dl_size = len(test_loader.dataset)//strategy.global_world_size() print( f'Test set: average loss: {test_loss:.4f}\t' - f'accurate samples: {correct}/{len(test_loader.dataset)/gwsize}') - acc_test = 100.0 * correct * gwsize / len(test_loader.dataset) + f'accurate samples: {correct}/{dl_size}') + acc_test = ( + 100.0 * correct * strategy.global_world_size() + / len(test_loader.dataset) + ) return acc_test @@ -259,7 +262,11 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: sys.exit() # Instantiate Strategy - if args.strategy == 'ddp': + if not distributed_resources_available(): + print("WARNING: falling back to non-distributed strategy.") + strategy = NonDistributedStrategy() + distribute_kwargs = {} + elif args.strategy == 'ddp': if (not torch.cuda.is_available() or not torch.cuda.device_count() > 1): raise RuntimeError('Resources unavailable') @@ -284,13 +291,9 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: else: raise NotImplementedError( f"Strategy {args.strategy} is not recognized/implemented.") - strategy.init() - # Check resources availability - use_cuda = not args.no_cuda and torch.cuda.is_available() - is_distributed = False - if use_cuda and torch.cuda.device_count() > 0: - is_distributed = True + # Initialize strategy + strategy.init() # Start the timer for profiling st = timer() @@ -298,23 +301,10 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: # Set random seed for reproducibility torch_prng = set_seed(args.rnd_seed) - # Get job rank info - rank==0 master gpu - if is_distributed: - # local world size - per node - lwsize = strategy.dist_lwsize() # local world size - per run - gwsize = strategy.dist_gwsize() # global world size - per run - grank = strategy.dist_grank() # global rank - assign per run - lrank = strategy.dist_lrank() # local rank - assign per node - else: - # Use a single worker (either on GPU or CPU) - lwsize = 1 - gwsize = 1 - grank = 0 - lrank = 0 - - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: initialise:', timer()-st, 's') - print('DEBUG: local ranks:', lwsize, '/ global ranks:', gwsize) + print('DEBUG: local ranks:', strategy.local_world_size(), + '/ global ranks:', strategy.global_world_size()) print('DEBUG: sys.version:', sys.version) print('DEBUG: args.data_dir:', args.data_dir) print('DEBUG: args.log_int:', args.log_int) @@ -327,54 +317,32 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: print('DEBUG: args.shuff:', args.shuff) print('DEBUG: args.rnd_seed:', args.rnd_seed) print('DEBUG: args.backend:', args.backend) - print('DEBUG: args.no_cuda:', args.no_cuda, '\n') # Encapsulate the model on the GPU assigned to the current process device = torch.device( - strategy.dist_device() if use_cuda else 'cpu') - if use_cuda: - torch.cuda.set_device(lrank) + strategy.device() if torch.cuda.is_available() else 'cpu') + if torch.cuda.is_available(): + torch.cuda.set_device(strategy.local_rank()) # Dataset train_dataset, test_dataset = mnist_dataset(args.dataset_replication) + # Distributed dataloaders + train_loader = strategy.create_dataloader( + train_dataset, batch_size=args.batch_size, + num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) + test_loader = strategy.create_dataloader( + test_dataset, batch_size=args.batch_size, + num_workers=args.nworker, pin_memory=True, + persistent_workers=(args.nworker > 1), + prefetch_factor=args.prefetch, generator=torch_prng, + worker_init_fn=seed_worker + ) - if is_distributed: - # Distributed sampler restricts data loading to a subset of the dataset - # exclusive to the current process. - train_sampler = DistributedSampler( - train_dataset, num_replicas=gwsize, rank=grank, - shuffle=(args.shuff and args.rnd_seed is None) - ) - train_loader = DataLoader( - train_dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=(args.nworker > 1), - prefetch_factor=args.prefetch, generator=torch_prng, - worker_init_fn=seed_worker - ) - test_sampler = DistributedSampler( - test_dataset, num_replicas=gwsize, rank=grank, - shuffle=(args.shuff and args.rnd_seed is None) - ) - test_loader = DataLoader( - test_dataset, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.nworker, pin_memory=True, - persistent_workers=(args.nworker > 1), - prefetch_factor=args.prefetch, generator=torch_prng, - worker_init_fn=seed_worker - ) - - else: - train_loader = DataLoader( - train_dataset, batch_size=args.batch_size, generator=torch_prng, - worker_init_fn=seed_worker - ) - test_loader = DataLoader( - test_dataset, batch_size=args.batch_size, generator=torch_prng, - worker_init_fn=seed_worker - ) - - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: read and concat data:', timer()-st, 's') # Create CNN model @@ -384,14 +352,13 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum) - # Distributed - if is_distributed: - distrib_model, optimizer, _ = strategy.distributed( - model, optimizer, lr_scheduler=None, **distribute_kwargs - ) + # Distributed model + model, optimizer, _ = strategy.distributed( + model, optimizer, lr_scheduler=None, **distribute_kwargs + ) # Start training and test loop - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: broadcast:', timer()-st, 's') print('\nDEBUG: start training') print('--------------------------------------------------------') @@ -400,15 +367,15 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: start_epoch = 1 for epoch in range(start_epoch, args.epochs + 1): lt = timer() - if is_distributed: + if strategy.is_distributed: # Inform the sampler that a new epoch started: shuffle # may be needed - train_sampler.set_epoch(epoch) - test_sampler.set_epoch(epoch) + train_loader.sampler.set_epoch(epoch) + test_loader.sampler.set_epoch(epoch) # Training loss_acc = train( - model=distrib_model, + model=model, device=device, train_loader=train_loader, optimizer=optimizer, @@ -419,7 +386,7 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: # Testing acc_test = test( - model=distrib_model, + model=model, device=device, test_loader=test_loader, strategy=strategy @@ -434,11 +401,11 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: train_loader.last_epoch = True test_loader.last_epoch = True - if strategy.is_main_worker(): + if strategy.is_main_worker: print('TIMER: epoch time:', timer()-lt, 's') print('DEBUG: accuracy:', acc_test, '%') - if strategy.is_main_worker(): + if strategy.is_main_worker: print('\n--------------------------------------------------------') print('DEBUG: training results:\n') print('TIMER: first epoch time:', first_ep_t, ' s') @@ -451,18 +418,19 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: print('TIMER: average epoch-1 time:', (timer()-et-first_ep_t)/(args.epochs-1), ' s') print('DEBUG: last accuracy:', acc_test, '%') - if use_cuda: + if torch.cuda.is_available(): print('DEBUG: memory req:', - int(torch.cuda.memory_reserved(lrank)/1024/1024), 'MB') + int(torch.cuda.memory_reserved( + strategy.local_rank())/1024/1024), + 'MB') print('DEBUG: memory summary:\n\n', torch.cuda.memory_summary(0)) print(f'TIMER: final time: {timer()-st} s\n') time.sleep(1) - print(f" - TRAINING FINISHED") + print(f" - TRAINING FINISHED") # Clean-up - if is_distributed: - strategy.clean_up() + strategy.clean_up() sys.exit() From c0eea26925e5703452cec5670c504e5a250c1de5 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 19:44:30 +0200 Subject: [PATCH 125/171] CLEANUP older classes --- src/itwinai/torch/distributed.py | 479 ------------------------------- 1 file changed, 479 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 8947fd42..bbc7033a 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -794,482 +794,3 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: raise RuntimeError( f"{self.__class__.__name__} does not support this operation." ) - -# class TorchDistributedStrategy_old(DistributedStrategy): -# """Abstract class to define the distributed backend methods for -# PyTorch models. -# """ -# @abc.abstractmethod -# def init_backend(self) -> None: -# """Initializes the chosen distributed backend""" - -# @abc.abstractmethod -# def distribute_model(self, model: Any) -> Any: -# """Distributes a machine learning model. - -# Args: -# model (Any): a generic ML model to be distributed. - -# Returns: -# Any: distributed model instance. -# """ - -# @abc.abstractmethod -# def broadcast_params(self, model: Any, optimizer: Any) -> None: -# """Broadcasts variables from root rank to all other processes/ - -# Args: -# model (Any): distributed model. -# optimizer (Any): optimizer. -# """ - -# @abc.abstractmethod -# def distribute_optimizer(self, optimizer: Any, model: Any) -> Any: -# """Distribute optimizer. - -# Args: -# optimizer (Any): optimizer. -# model (Any): distributed model. - -# Returns: -# Any: distributed optimizer. -# """ - -# @abc.abstractmethod -# def dist_gwsize(self) -> int: -# """Returns the total number of processes (global world size). - -# Returns: -# int: global world size. -# """ - -# @abc.abstractmethod -# def dist_lwsize(self) -> int: -# """Returns the number of local workers available on a node -# (local world size). -# Usually it is equal to the number of available GPUs. - -# Returns: -# int: local world size. -# """ - -# @abc.abstractmethod -# def dist_grank(self) -> int: -# """Returns the global rank of the current process. -# Rank ranges from 0 to world_size. - -# Returns: -# int: global rank. -# """ - -# @abc.abstractmethod -# def dist_lrank(self) -> int: -# """Returns the local rank of the current process. - -# Returns: -# int: local rank. -# """ - -# def is_main_worker(self) -> bool: -# """Checks if local worker has global rank equal to zero. - -# Returns: -# bool: True if main worker. -# """ -# return self.dist_grank() == 0 - -# def dist_device(self) -> str: -# """Device used by local worker. - -# Returns: -# str: torch device in the form 'cuda:N'. -# """ -# return f"cuda:{self.dist_lrank()}" - -# @abc.abstractmethod -# def clean_up(self) -> None: -# """Cleans up resources allocated by distributed strategy.""" - -# @abc.abstractmethod -# def par_allgather_obj(self, obj: Any) -> List[Any]: -# """Gathers any object from the whole group in a list -# (to all workers). - -# Args: -# obj (Any): object to gather from all workers. - -# Returns: -# List[Any]: list of objects gathered from all workers. -# """ - - -# class DDPDistributedStrategy_old(TorchDistributedStrategy_old): -# """PyTorch DDP distributed strategy class. - -# Args: -# backend (str): Name of the communication backend to employ. -# """ - -# backend: str - -# def __init__(self, backend: str) -> None: -# super().__init__() -# self.backend = backend - -# def init_backend(self) -> None: -# """Initializes the distributed process group and the distributed -# package. -# """ -# if torch.cuda.is_available(): -# dist.init_process_group(backend=self.backend) - -# def distribute_model(self, model: nn.Module) -> nn.Module: -# """Achieves data parallelism by synchronizing the gradients -# across each model replica located in each available -# computing device. - -# Args: -# model (nn.Module): ML model to be distributed. - -# Returns: -# nn.Module: Distributed model replicas across all devices. -# that are to be synchronized. -# """ -# if torch.cuda.is_available(): -# # device = self.dist_lrank() -# model = model.to(self.dist_device()) -# dist_model = torch.nn.parallel.DistributedDataParallel( -# model, -# device_ids=[self.dist_device()], -# output_device=self.dist_device() -# ) -# else: -# dist_model = model - -# return dist_model - -# def broadcast_params( -# self, -# model: nn.Module, -# optimizer: optim.Optimizer -# ) -> None: -# """Do nothing. Only applicable for Horovod. - -# Args: -# model (nn.Module): ML model -# optimizer (optim.Optimizer): Optimizer -# """ -# pass - -# def distribute_optimizer( -# self, -# optimizer: optim.Optimizer, -# model: nn.Module = None -# ) -> optim.Optimizer: -# """Returns the optimizer from argument. - -# Args: -# optimizer (optim.Optimizer): optimizer. -# model (nn.Module): ML model. Unused here. - -# Returns: -# optim.Optimizer: Distributed optimizer. -# """ -# return optimizer - -# def dist_gwsize(self) -> int: -# """Returns the total number of processes (global world size). - -# Returns: -# int: global world size. -# """ -# return dist.get_world_size() - -# def dist_lwsize(self) -> int: -# """Returns the local number of workers available per node, -# which is usually the number of GPUs available. - -# Returns: -# int: local world size. -# """ -# return torch.cuda.device_count() - -# def dist_grank(self) -> int: -# """Returns the global rank of the current process, where -# rank ranges from 0 to world_size. - -# Returns: -# int: global rank. -# """ -# return dist.get_rank() - -# def dist_lrank(self) -> int: -# """Returns the local rank of the current process. - -# Returns: -# int: local rank. -# """ -# return dist.get_rank() % torch.cuda.device_count() - -# def clean_up(self) -> None: -# """Destroys the current process group.""" -# if torch.cuda.is_available(): -# dist.barrier() -# dist.destroy_process_group() - -# def par_allgather_obj(self, obj: Any) -> List[Any]: -# """Gathers any object from the whole group -# in a list (to all workers). - -# Args: -# obj (Any): Object to gather from all workers. - -# Returns: -# List[Any]: List of gathered objects. -# """ -# res = [None] * self.dist_gwsize() -# dist.all_gather_object(res, obj) -# return res - - -# class DSDistributedStrategy_old(TorchDistributedStrategy_old): -# """DeepSpeed distributed strategy class. - -# Args: -# backend (str): Name of the communication backend to employ. -# config (Union[dict, Path, str]): DeepSpeed config. Either a -# dictionary or a path to a JSON file. -# """ - -# config: Dict = None -# backend: str - -# def __init__( -# self, -# backend: str, -# config: Union[Dict, Path, str] -# ) -> None: -# super().__init__() -# self.backend = backend -# self._load_config(config) - -# def _load_config(self, ds_config): -# if isinstance(ds_config, (str, Path)): -# with open(ds_config) as fp: -# self.config = json.load(fp) -# elif isinstance(ds_config, dict): -# self.config = ds_config -# else: -# raise ValueError("ds_config is not a dictionary not a path.") - -# def init_backend(self) -> None: -# """Initializes the distributed process group and the distributed -# package. -# """ -# deepspeed.init_distributed(dist_backend=self.backend) - -# def distribute_model(self, model: nn.Module) -> nn.Module: -# """Achieves data parallelism by synchronizing the gradients -# across each model replica located in each available -# computing device. - -# Args: -# model (nn.Module): ML model to be distributed. - -# Returns: -# nn.Module: Distributed model replicas across all devices -# that are to be synchronized. -# """ -# distrib_model, __, __, __ = deepspeed.initialize( -# model=model, -# model_parameters=model.parameters(), -# dist_init_required=True, -# config=self.config -# ) -# return distrib_model - -# def broadcast_params( -# self, model: nn.Module, optimizer: optim.Optimizer -# ) -> None: -# """Only applicable for Horovod. Does nothing. - -# Args: -# model (nn.Module): ML model. -# optimizer (optim.Optimizer): optimizer. -# """ -# pass - -# def distribute_optimizer( -# self, -# optimizer: optim.Optimizer, -# model: nn.Module = None -# ) -> optim.Optimizer: -# """Returns the optimizer from argument. - -# Args: -# optimizer (optim.Optimizer): torch optimizer. -# model (nn.Module): torch neural network. - -# Returns: -# optim.Optimizer: distributed optimizer. -# """ -# return optimizer - -# def dist_gwsize(self) -> int: -# """Returns the total number of processes (global world size). - -# Returns: -# int: global world size. -# """ -# return dist.get_world_size() - -# def dist_lwsize(self) -> int: -# """Returns the local number of workers available per node, -# which is usually the number of GPUs available. - -# Returns: -# int: local world size. -# """ -# return torch.cuda.device_count() - -# def dist_grank(self) -> int: -# """Returns the global rank of the current process, where -# rank ranges from 0 to world_size. - -# Returns: -# int: global rank. -# """ -# return dist.get_rank() - -# def dist_lrank(self) -> int: -# """Returns the local rank of the current process. - -# Returns: -# int: local rank. -# """ -# return dist.get_rank() % torch.cuda.device_count() - -# def clean_up(self) -> None: -# """Destroys the current process group.""" -# deepspeed.sys.exit() - -# def par_allgather_obj(self, obj: Any) -> list[Any]: -# """Gathers any object from the whole group -# in a list (to all workers). - -# Args: -# obj (Any): Object to gather from all workers. - -# Returns: -# List[Any]: List of gathered objects. -# """ -# res = [None] * self.dist_gwsize() -# dist.all_gather_object(res, obj) -# return res - - -# class HVDDistributedStrategy_old(TorchDistributedStrategy_old): -# """Horovod distributed strategy class.""" - -# def init_backend(self) -> None: -# """Initializes the Horovod distributed backend.""" -# hvd.init() - -# def distribute_model(self, model: nn.Module) -> nn.Module: -# """Only applicable for DDP and DeepSpeed. -# For Horovod, returns the same model passed as argument. - -# Args: -# model (nn.Module): ML model to be distributed. - -# Returns: -# nn.Module: ML model passed in the argument. -# """ -# return model - -# def broadcast_params( -# self, model: nn.Module, optimizer: optim.Optimizer -# ) -> None: -# """Broadcasts variables from root rank to all other processes. - -# Args: -# model (nn.Module): ML model that is to be broadcasted -# across processes. -# optimizer (optim.Optimizer): Optimizer that is to be broadcasted -# across processes. -# """ -# hvd.broadcast_parameters(model.state_dict(), root_rank=0) -# hvd.broadcast_optimizer_state(optimizer, root_rank=-0) - -# def distribute_optimizer( -# self, -# optimizer: optim.Optimizer, -# model: nn.Module -# ) -> optim.Optimizer: -# """Constructs a DistributedOptimizer, for computing single-process -# gradient values and applying gradient updates after the gradients -# have been combined across all the Horovod ranks. - -# Args: -# optimizer (optim.Optimizer): Optimizer to be distributed. -# model (nn.Module): ML model to be trained. - -# Returns: -# optim.Optimizer: Distributed optimizer across all ranks. -# """ -# distOptimizer = hvd.DistributedOptimizer( -# optimizer, -# named_parameters=model.named_parameters(), -# op=hvd.Average -# ) -# return distOptimizer - -# def dist_gwsize(self) -> int: -# """Returns the total number of processes (global world size). - -# Returns: -# int: global world size. -# """ -# return hvd.size() - -# def dist_lwsize(self) -> int: -# """Returns the local number of workers available per node, -# which is usually the number of GPUs available. - -# Returns: -# int: local world size. -# """ -# return hvd.local_size() - -# def dist_grank(self) -> int: -# """Returns the global rank of the current process, where -# rank ranges from 0 to world_size. - -# Returns: -# int: global rank. -# """ -# return hvd.rank() - -# def dist_lrank(self) -> int: -# """Returns the local rank of the current process. - -# Returns: -# int: local rank. -# """ -# return hvd.local_rank() - -# def clean_up(self) -> None: -# """Shuts Horovod down.""" -# hvd.shutdown() - -# def par_allgather_obj(self, obj: Any) -> list[Any]: -# """Gathers scalar objects across all workers to a -# list with size(#worker), uses horovod communicator - -# Args: -# obj (Any): object in a worker. - -# Returns: -# list: gathered list with size(#worker). -# """ -# return hvd.allgather_object(obj) From 4fd04d5481a2e38aba9cbee2e9007d8b2afeb820 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 20:23:21 +0200 Subject: [PATCH 126/171] Rename strategies --- src/itwinai/torch/distributed.py | 8 ++++---- .../torch-scaling-test/itwinai_trainer.py | 12 ++++++------ .../distributed-ml/torch-tutorial-0-basics/train.py | 12 ++++++------ .../distributed-ml/torch-tutorial-1-mnist/train.py | 12 ++++++------ 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index bbc7033a..63226b41 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -288,8 +288,8 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: """ -class DDPDistributedStrategy(TorchDistributedStrategy): - """PyTorch DDP distributed strategy class. +class TorchDDPStrategy(TorchDistributedStrategy): + """PyTorch ``DistributedDataParallel`` distributed strategy class. Args: backend (str): Name of the communication backend to employ. @@ -435,7 +435,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: return res -class DSDistributedStrategy(TorchDistributedStrategy): +class DeepSpeedStrategy(TorchDistributedStrategy): """DeepSpeed distributed strategy class. Args: @@ -580,7 +580,7 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: return res -class HVDDistributedStrategy(TorchDistributedStrategy): +class HorovodStrategy(TorchDistributedStrategy): """Horovod distributed strategy class.""" def init(self) -> None: diff --git a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py index d47e9c7b..cded83af 100644 --- a/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py +++ b/tutorials/distributed-ml/torch-scaling-test/itwinai_trainer.py @@ -21,9 +21,9 @@ from itwinai.torch.distributed import ( TorchDistributedStrategy, - DDPDistributedStrategy, - HVDDistributedStrategy, - DSDistributedStrategy, + TorchDDPStrategy, + HorovodStrategy, + DeepSpeedStrategy, ) from itwinai.parser import ArgumentParser as ItAIArgumentParser from itwinai.loggers import EpochTimeTracker @@ -154,10 +154,10 @@ def main(): or not torch.cuda.device_count() > 1): raise RuntimeError('Resources unavailable') - strategy = DDPDistributedStrategy(backend=args.backend) + strategy = TorchDDPStrategy(backend=args.backend) distribute_kwargs = {} elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy() + strategy = HorovodStrategy() distribute_kwargs = dict( compression=( hvd.Compression.fp16 if args.fp16_allreduce @@ -167,7 +167,7 @@ def main(): gradient_predivide_factor=args.gradient_predivide_factor ) elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(backend=args.backend) + strategy = DeepSpeedStrategy(backend=args.backend) distribute_kwargs = dict( config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) ) diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py index e9d96735..29c0d272 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/train.py @@ -15,9 +15,9 @@ from itwinai.torch.distributed import ( distributed_resources_available, TorchDistributedStrategy, - DDPDistributedStrategy, - HVDDistributedStrategy, - DSDistributedStrategy, + TorchDDPStrategy, + HorovodStrategy, + DeepSpeedStrategy, NonDistributedStrategy ) @@ -127,17 +127,17 @@ def training_fn( strategy = NonDistributedStrategy() distribute_kwargs = {} elif args.strategy == 'ddp': - strategy = DDPDistributedStrategy(backend='nccl') + strategy = TorchDDPStrategy(backend='nccl') distribute_kwargs = {} elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy() + strategy = HorovodStrategy() distribute_kwargs = dict( compression=hvd.Compression.none, op=hvd.Average, gradient_predivide_factor=1.0 ) elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(backend='nccl') + strategy = DeepSpeedStrategy(backend='nccl') distribute_kwargs = dict( config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) ) diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py index 9a7ef0c1..cf56fe76 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py @@ -21,9 +21,9 @@ from itwinai.torch.distributed import ( distributed_resources_available, TorchDistributedStrategy, - DDPDistributedStrategy, - HVDDistributedStrategy, - DSDistributedStrategy, + TorchDDPStrategy, + HorovodStrategy, + DeepSpeedStrategy, NonDistributedStrategy ) from itwinai.parser import ArgumentParser as ItAIArgumentParser @@ -271,10 +271,10 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: or not torch.cuda.device_count() > 1): raise RuntimeError('Resources unavailable') - strategy = DDPDistributedStrategy(backend=args.backend) + strategy = TorchDDPStrategy(backend=args.backend) distribute_kwargs = {} elif args.strategy == 'horovod': - strategy = HVDDistributedStrategy() + strategy = HorovodStrategy() distribute_kwargs = dict( compression=( hvd.Compression.fp16 if args.fp16_allreduce @@ -284,7 +284,7 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: gradient_predivide_factor=args.gradient_predivide_factor ) elif args.strategy == 'deepspeed': - strategy = DSDistributedStrategy(backend=args.backend) + strategy = DeepSpeedStrategy(backend=args.backend) distribute_kwargs = dict( config_params=dict(train_micro_batch_size_per_gpu=args.batch_size) ) From e8172d57bc7501a740001f8de24bc966027265be Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sat, 27 Apr 2024 20:40:13 +0200 Subject: [PATCH 127/171] Simplify structure --- .../torch-tutorial-1-mnist/config.yaml | 2 +- .../torch-tutorial-1-mnist/train.py | 26 ++++++------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml index c5ef5bf5..331d6d04 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/config.yaml @@ -4,7 +4,7 @@ log_int: 10 verbose: True restart_int: 10 download_only: False -dataset_replication: 100 +dataset_replication: 10 shuff: False nworker: 4 # num workers dataloader prefetch: 2 diff --git a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py index cf56fe76..809480dd 100644 --- a/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py +++ b/tutorials/distributed-ml/torch-tutorial-1-mnist/train.py @@ -136,7 +136,7 @@ def forward(self, x): def train( - model, device, train_loader, optimizer, epoch, + model, train_loader, optimizer, epoch, strategy: TorchDistributedStrategy, args ): """ @@ -149,7 +149,8 @@ def train( print("\n") for batch_idx, (data, target) in enumerate(train_loader): t = timer() - data, target = data.to(device), target.to(device) + data = data.to(strategy.device()) + target = target.to(strategy.device()) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) @@ -170,7 +171,7 @@ def train( return loss_acc -def test(model, device, test_loader, strategy: TorchDistributedStrategy): +def test(model, test_loader, strategy: TorchDistributedStrategy): """ Model validation. """ @@ -179,7 +180,8 @@ def test(model, device, test_loader, strategy: TorchDistributedStrategy): correct = 0 with torch.no_grad(): for data, target in test_loader: - data, target = data.to(device), target.to(device) + data = data.to(strategy.device()) + target = target.to(strategy.device()) output = model(data) # Sum up batch loss test_loss += F.nll_loss(output, target, reduction="sum").item() @@ -267,10 +269,6 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: strategy = NonDistributedStrategy() distribute_kwargs = {} elif args.strategy == 'ddp': - if (not torch.cuda.is_available() - or not torch.cuda.device_count() > 1): - raise RuntimeError('Resources unavailable') - strategy = TorchDDPStrategy(backend=args.backend) distribute_kwargs = {} elif args.strategy == 'horovod': @@ -318,12 +316,6 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: print('DEBUG: args.rnd_seed:', args.rnd_seed) print('DEBUG: args.backend:', args.backend) - # Encapsulate the model on the GPU assigned to the current process - device = torch.device( - strategy.device() if torch.cuda.is_available() else 'cpu') - if torch.cuda.is_available(): - torch.cuda.set_device(strategy.local_rank()) - # Dataset train_dataset, test_dataset = mnist_dataset(args.dataset_replication) # Distributed dataloaders @@ -346,13 +338,13 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: print('TIMER: read and concat data:', timer()-st, 's') # Create CNN model - model = Net().to(device) + model = Net().to(strategy.device()) # Optimizer optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum) - # Distributed model + # Distributed model, optimizer, and scheduler model, optimizer, _ = strategy.distributed( model, optimizer, lr_scheduler=None, **distribute_kwargs ) @@ -376,7 +368,6 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: # Training loss_acc = train( model=model, - device=device, train_loader=train_loader, optimizer=optimizer, epoch=epoch, @@ -387,7 +378,6 @@ def mnist_dataset(dataset_replication: int = 1) -> Tuple[Dataset, Dataset]: # Testing acc_test = test( model=model, - device=device, test_loader=test_loader, strategy=strategy ) From e845c52a179ee79bd2b4660d1978b104392ac0dd Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sun, 28 Apr 2024 18:08:37 +0200 Subject: [PATCH 128/171] ADD draft new torch trainer class --- src/itwinai/torch/distributed.py | 86 +++++---- src/itwinai/torch/trainer.py | 292 ++++++++++++++++++++++++++++++- src/itwinai/torch/types.py | 4 + 3 files changed, 341 insertions(+), 41 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index 63226b41..3bb48647 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -16,7 +16,7 @@ from torch.utils.data.dataloader import T_co, _worker_init_fn_t, _collate_fn_t from ..distributed import DistributedStrategy -from .types import UninitializedStrategyError +from .types import UninitializedStrategyError, DistributedStrategyError def distributed_resources_available() -> bool: @@ -36,7 +36,7 @@ class TorchDistributedStrategy(DistributedStrategy): PyTorch models. """ is_distributed: bool = True - _initialized: bool = False + is_initialized: bool = False @property def is_main_worker(self) -> bool: @@ -45,7 +45,7 @@ def is_main_worker(self) -> bool: Returns: bool: True if main worker. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return self.global_rank() == 0 @@ -108,7 +108,7 @@ def device(self) -> str: Returns: str: torch device in the form 'cuda:N'. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return f"cuda:{self.local_rank()}" @@ -246,7 +246,7 @@ def create_dataloader( .. _multiprocessing context: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") @@ -307,12 +307,16 @@ def init(self) -> None: Raises: RuntimeError: when there are not (enough) GPUs available. + DistributedStrategyError: when trying to initialize a strategy + already initialized. """ if not distributed_resources_available(): raise RuntimeError( "Trying to run distributed on insufficient resources.") + if self.is_initialized: + raise DistributedStrategyError("Strategy was already initialized") dist.init_process_group(backend=self.backend) - self._initialized = True + self.is_initialized = True torch.cuda.device(self.local_rank()) @@ -346,7 +350,7 @@ def distributed( **kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): @@ -368,7 +372,7 @@ def global_world_size(self) -> int: Returns: int: global world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_world_size() @@ -380,7 +384,7 @@ def local_world_size(self) -> int: Returns: int: local world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return torch.cuda.device_count() @@ -392,7 +396,7 @@ def global_rank(self) -> int: Returns: int: global rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_rank() @@ -403,14 +407,14 @@ def local_rank(self) -> int: Returns: int: local rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): @@ -427,7 +431,7 @@ def par_allgather_obj(self, obj: Any) -> List[Any]: Returns: List[Any]: List of gathered objects. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") res = [None] * self.global_world_size() @@ -468,11 +472,16 @@ def init(self) -> None: Raises: RuntimeError: when there are not (enough) GPUs available. + DistributedStrategyError: when trying to initialize a strategy + already initialized. """ if not distributed_resources_available(): raise RuntimeError( "Trying to run distributed on insufficient resources.") + if self.is_initialized: + raise DistributedStrategyError("Strategy was already initialized") + # https://github.com/Lightning-AI/pytorch-lightning/issues/13567 ompi_lrank = os.environ.get('OMPI_COMM_WORLD_LOCAL_RANK') os.environ['OMPI_COMM_WORLD_LOCAL_RANK'] = os.environ.get( @@ -480,7 +489,7 @@ def init(self) -> None: # https://deepspeed.readthedocs.io/en/latest/initialize.html#training-initialization deepspeed.init_distributed(dist_backend=self.backend) - self._initialized = True + self.is_initialized = True torch.cuda.device(self.local_rank()) @@ -491,7 +500,7 @@ def distributed( **init_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") @@ -515,7 +524,7 @@ def global_world_size(self) -> int: Returns: int: global world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_world_size() @@ -527,7 +536,7 @@ def local_world_size(self) -> int: Returns: int: local world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return torch.cuda.device_count() @@ -539,7 +548,7 @@ def global_rank(self) -> int: Returns: int: global rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_rank() @@ -550,14 +559,14 @@ def local_rank(self) -> int: Returns: int: local rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return dist.get_rank() % torch.cuda.device_count() def clean_up(self) -> None: """Destroys the current process group.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") deepspeed.sys.exit() @@ -572,7 +581,7 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: Returns: List[Any]: List of gathered objects. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") res = [None] * self.global_world_size() @@ -588,12 +597,16 @@ def init(self) -> None: Raises: RuntimeError: when there are not (enough) GPUs available. + DistributedStrategyError: when trying to initialize a strategy + already initialized. """ if not distributed_resources_available(): raise RuntimeError( "Trying to run distributed on insufficient resources.") + if self.is_initialized: + raise DistributedStrategyError("Strategy was already initialized") hvd.init() - self._initialized = True + self.is_initialized = True torch.cuda.device(self.local_rank()) @@ -603,7 +616,7 @@ def distributed( **optim_kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Setup model, optimizer and scheduler for distributed.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") @@ -648,7 +661,7 @@ def global_world_size(self) -> int: Returns: int: global world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return hvd.size() @@ -660,7 +673,7 @@ def local_world_size(self) -> int: Returns: int: local world size. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return hvd.local_size() @@ -672,7 +685,7 @@ def global_rank(self) -> int: Returns: int: global rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return hvd.rank() @@ -683,14 +696,14 @@ def local_rank(self) -> int: Returns: int: local rank. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return hvd.local_rank() def clean_up(self) -> None: """Shuts Horovod down.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") hvd.shutdown() @@ -705,7 +718,7 @@ def par_allgather_obj(self, obj: Any) -> list[Any]: Returns: list: gathered list with size(#worker). """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") return hvd.allgather_object(obj) @@ -717,10 +730,17 @@ class NonDistributedStrategy(TorchDistributedStrategy): is_distributed: bool = False def init(self) -> None: - """If CUDA is available set CUDA device, and do nothing more.""" + """If CUDA is available set CUDA device, and do nothing more. + + Raises: + DistributedStrategyError: when trying to initialize a strategy + already initialized. + """ + if self.is_initialized: + raise DistributedStrategyError("Strategy was already initialized") if torch.cuda.is_available(): torch.cuda.device(self.local_rank()) - self._initialized = True + self.is_initialized = True def device(self) -> str: """Device used by local worker. @@ -728,7 +748,7 @@ def device(self) -> str: Returns: str: cpu device if CUDA is not available. """ - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): @@ -741,7 +761,7 @@ def distributed( **kwargs ) -> Tuple[nn.Module, Optimizer, Optional[LRScheduler]]: """Do nothing and return model, optimizer and scheduler.""" - if not self._initialized: + if not self.is_initialized: raise UninitializedStrategyError( "Strategy has not been initialized. Use the init method.") if torch.cuda.is_available(): diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index f0ad1c03..c083165c 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -1,7 +1,7 @@ """Provides training logic for PyTorch models via Trainer classes.""" from typing import ( - Optional, Dict, Union, Tuple, Type, List, Any + Optional, Dict, Union, Tuple, Type, List, Any, Literal ) import time import os @@ -17,8 +17,10 @@ import torch.nn as nn from torch.optim.optimizer import Optimizer +import horovod.torch as hvd + from ..components import Trainer, monitor_exec -from .utils import seed_worker, par_allgather_obj, clear_key +from .utils import par_allgather_obj, clear_key from .types import ( Batch, Loss, LrScheduler, Metric ) @@ -26,12 +28,286 @@ from ..loggers import LogMixin, Logger, ConsoleLogger from ..utils import dynamically_import_class from ..cluster import ClusterEnvironment -# from .distributed import ( -# TorchDistributedStrategy, -# DDPDistributedStrategy, -# DSDistributedStrategy, -# HVDDistributedStrategy -# ) +from .reproducibility import seed_worker, set_seed +from .distributed import ( + TorchDistributedStrategy, + TorchDDPStrategy, + HorovodStrategy, + DeepSpeedStrategy, + NonDistributedStrategy, + distributed_resources_available +) + + +class TorchTrainer(Trainer): + """Trainer class for torch training algorithms. + + Args: + config (Dict): training configuration containing hyperparameters. + epochs (int): number of training epochs. + model (Optional[nn.Module], optional): model to train. + Defaults to None. + strategy (Literal["ddp", "deepspeed", + "horovod"], optional): distributed strategy. + Defaults to 'ddp'. + validation_every (Optional[int], optional): run a validation epoch + every ``validation_every`` epochs. Disabled if None. Defaults to 1. + test_every (Optional[int], optional): run a test epoch + every ``test_every`` epochs. Disabled if None. Defaults to None. + random_seed (Optional[int], optional): set random seed for + reproducibility. If None, the seed is not set. Defaults to None. + logger (Optional[Logger], optional): logger for ML tracking. + Defaults to None. + name (Optional[str], optional): trainer custom name. Defaults to None. + """ + + _strategy: TorchDistributedStrategy = None + + train_dataloader: DataLoader = None + validation_dataloader: DataLoader = None + test_dataloader: DataLoader = None + + model: nn.Module = None + loss: Loss = None + optimizer: Optimizer = None + lr_scheduler: LrScheduler = None + + torch_rng: torch.Generator = None + logger: Logger = None + + def __init__( + self, + config: Dict, + epochs: int, + model: Optional[nn.Module] = None, + strategy: Literal["ddp", "deepspeed", "horovod"] = 'ddp', + validation_every: Optional[int] = 1, + test_every: Optional[int] = None, + random_seed: Optional[int] = None, + logger: Optional[Logger] = None, + name: Optional[str] = None + ) -> None: + super().__init__(name) + self.save_parameters(**self.locals2params(locals())) + + # config is mean to store all hyperparameters, which can very from use + # case to use case + # and include learning_rate, batch_size.... + self.config = config + self.epochs = epochs + self.model = model + self.strategy = strategy + self.validation_every = validation_every + self.test_every = test_every + self.random_seed = random_seed + self.logger = logger + + @property + def strategy(self) -> TorchDistributedStrategy: + return self._strategy + + @strategy.setter + def strategy(self, strategy: Union[str, TorchDistributedStrategy]) -> None: + if isinstance(strategy, TorchDistributedStrategy): + self._strategy = strategy + else: + self._strategy = self._detect_strategy(strategy) + + def _detect_strategy(self, strategy: str) -> TorchDistributedStrategy: + if not distributed_resources_available(): + print("WARNING: falling back to non-distributed strategy.") + dist_str = NonDistributedStrategy() + elif strategy == 'ddp': + dist_str = TorchDDPStrategy(backend='nccl') + elif strategy == 'horovod': + dist_str = HorovodStrategy() + elif strategy == 'deepspeed': + dist_str = DeepSpeedStrategy(backend='nccl') + else: + raise NotImplementedError( + f"Strategy '{strategy}' is not recognized/implemented.") + return dist_str + + def _init_distributed_strategy(self) -> None: + if not self.strategy.is_initialized: + self.strategy.init() + + def create_model_loss_optimizer(self) -> None: + """ + Instantiate a torch model, loss, optimizer, and LR scheduler using the + configuration provided in the Trainer constructor. + Generally a user-define method. + """ + ################################### + # Dear user, this is a method you # + # may be interested to override! # + ################################### + + if self.model is None: + # Model was not passed to the constructor. + # Create a model here + raise ValueError( + "self.model is None! Either pass it to the constructor or " + "override this method." + ) + + # A simple NLLLoss + self.loss = nn.functional.nll_loss + + # TODO: improve robustness of getting from config + self.optimizer = torch.optim.SGD( + self.model.parameters(), + lr=self.config.lr, + momentum=self.config.momentum + ) + # Create self.lr_scheduler if needed + + # IMPORTANT: model, optimizer, and scheduler need to be distributed + + # First, define strategy-wise optional configurations + # TODO: improve robustness of getting from config + if isinstance(self.strategy, DeepSpeedStrategy): + # Batch size definition is not optional for DeepSpeedStrategy! + distribute_kwargs = dict( + config_params=dict( + train_micro_batch_size_per_gpu=self.config.batch_size + ) + ) + elif isinstance(self.strategy, HorovodStrategy): + distribute_kwargs = dict( + compression=( + hvd.Compression.fp16 if self.config.fp16_allreduce + else hvd.Compression.none + ), + op=hvd.Adasum if self.config.use_adasum else hvd.Average, + gradient_predivide_factor=self.config.gradient_predivide_factor + ) + else: + distribute_kwargs = {} + + # Distributed model, optimizer, and scheduler + ( + self.model, + self.optimizer, + self.lr_scheduler + ) = self.strategy.distributed( + self.model, self.optimizer, self.lr_scheduler, **distribute_kwargs + ) + + def create_dataloaders( + self, + train_dataset: Dataset, + validation_dataset: Optional[Dataset] = None, + test_dataset: Optional[Dataset] = None + ) -> None: + """ + Create train, validation and test dataloaders using the + configuration provided in the Trainer constructor. + Generally a user-define method. + + Args: + train_dataset (Dataset): training dataset object. + validation_dataset (Optional[Dataset]): validation dataset object. + Default None. + test_dataset (Optional[Dataset]): test dataset object. + Default None. + """ + + ################################### + # Dear user, this is a method you # + # may be interested to override! # + ################################### + + # TODO: improve robustness of getting from config + self.train_dataloader = self.strategy.create_dataloader( + dataset=train_dataset, + batch_size=self.config.batch_size, + num_workers=self.config.num_workers, + pin_memory=self.config.pin_memory, + generator=self.torch_rng + ) + if validation_dataset is not None: + self.validation_dataloader = self.strategy.create_dataloader( + dataset=train_dataset, + batch_size=self.config.batch_size, + num_workers=self.config.num_workers, + pin_memory=self.config.pin_memory, + generator=self.torch_rng + ) + if test_dataset is not None: + self.test_dataloader = self.strategy.create_dataloader( + dataset=train_dataset, + batch_size=self.config.batch_size, + num_workers=self.config.num_workers, + pin_memory=self.config.pin_memory, + generator=self.torch_rng + ) + + @monitor_exec + def execute( + self, + train_dataset: Dataset, + validation_dataset: Dataset, + test_dataset: Dataset + ) -> Tuple[Dataset, Dataset, Dataset, Any]: + """Trains a machine learning model. + + Args: + train_dataset (Dataset): training dataset. + validation_dataset (Dataset): validation dataset. + test_dataset (Dataset): test dataset. + + Returns: + Tuple[Dataset, Dataset, Dataset, Any]: training dataset, + validation dataset, test dataset, trained model. + """ + self.torch_rng = set_seed(self.random_seed) + self._init_distributed_strategy() + self.create_dataloaders( + train_dataset=train_dataset, + validation_dataset=validation_dataset, + test_dataset=test_dataset + ) + self.create_model_loss_optimizer() + self.train() + return train_dataset, validation_dataset, test_dataset, self.model + + def _set_epoch_dataloaders(self, epoch: int): + """ + Sets epoch in the distributed sampler of a dataloader when using it. + """ + if self.strategy.is_distributed: + self.train_dataloader.sampler.set_epoch(epoch) + if self.validation_dataloader is not None: + self.validation_dataloader.sampler.set_epoch(epoch) + if self.test_dataloader is not None: + self.test_dataloader.sampler.set_epoch(epoch) + + def train(self): + """Main training logic (training loop).""" + # start_time = time.perf_counter() + for epoch in range(self.epochs): + self._set_epoch_dataloaders(epoch) + self.train_epoch(epoch) + if self.validation_every and self.validation_every % epoch == 0: + self.validation_epoch(epoch) + if self.test_every and self.test_every % epoch == 0: + self.test_epoch(epoch) + + def train_epoch(self, epoch: int): + pass + + def validation_epoch(self, epoch: int): + pass + + def test_epoch(self, epoch: int): + pass + + def save_state(self): + return super().save_state() + + def load_state(self): + return super().load_state() def preproc_dataloader(dataloader: DataLoader, gwsize, grank): diff --git a/src/itwinai/torch/types.py b/src/itwinai/torch/types.py index 6b42c901..0b6f88ad 100644 --- a/src/itwinai/torch/types.py +++ b/src/itwinai/torch/types.py @@ -68,3 +68,7 @@ class TorchOptimizer(BaseEnum): class UninitializedStrategyError(Exception): """Error raised when a strategy has not been initialized.""" + + +class DistributedStrategyError(Exception): + """Error raised when a strategy has already been initialized.""" From 6550a0fa25f35d8571cf68bb565bd8a6153cd171 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sun, 28 Apr 2024 18:37:43 +0200 Subject: [PATCH 129/171] UPDATED torch trainer draft --- src/itwinai/torch/trainer.py | 194 +++++++++++++++++++++++++++++++++-- 1 file changed, 188 insertions(+), 6 deletions(-) diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index c083165c..827598d4 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -39,7 +39,7 @@ ) -class TorchTrainer(Trainer): +class TorchTrainer(Trainer, LogMixin): """Trainer class for torch training algorithms. Args: @@ -58,6 +58,8 @@ class TorchTrainer(Trainer): reproducibility. If None, the seed is not set. Defaults to None. logger (Optional[Logger], optional): logger for ML tracking. Defaults to None. + log_all_workers (bool, optional): if True, the ``log`` method is + called on all workers in the distributed context. Defaults to False. name (Optional[str], optional): trainer custom name. Defaults to None. """ @@ -74,6 +76,10 @@ class TorchTrainer(Trainer): torch_rng: torch.Generator = None logger: Logger = None + train_glob_step: int = 0 + validation_glob_step: int = 0 + test_glob_step: int = 0 + metrics: Dict[str, Metric] def __init__( self, @@ -85,6 +91,8 @@ def __init__( test_every: Optional[int] = None, random_seed: Optional[int] = None, logger: Optional[Logger] = None, + log_all_workers: bool = False, + metrics: Optional[Dict[str, Metric]] = None, name: Optional[str] = None ) -> None: super().__init__(name) @@ -101,6 +109,8 @@ def __init__( self.test_every = test_every self.random_seed = random_seed self.logger = logger + self.log_all_workers = log_all_workers + self.metrics = metrics if metrics is not None else {} @property def strategy(self) -> TorchDistributedStrategy: @@ -113,6 +123,10 @@ def strategy(self, strategy: Union[str, TorchDistributedStrategy]) -> None: else: self._strategy = self._detect_strategy(strategy) + @property + def device(self) -> str: + return self.strategy.device() + def _detect_strategy(self, strategy: str) -> TorchDistributedStrategy: if not distributed_resources_available(): print("WARNING: falling back to non-distributed strategy.") @@ -243,6 +257,11 @@ def create_dataloaders( generator=self.torch_rng ) + def _setup_metrics(self): + """Move metrics to current device.""" + for m_name, metric in self.metrics.items(): + self.metrics[m_name] = metric.to(self.device) + @monitor_exec def execute( self, @@ -263,13 +282,22 @@ def execute( """ self.torch_rng = set_seed(self.random_seed) self._init_distributed_strategy() + self._setup_metrics() + self.create_dataloaders( train_dataset=train_dataset, validation_dataset=validation_dataset, test_dataset=test_dataset ) self.create_model_loss_optimizer() + + if self.strategy.is_main_worker: + self.logger.create_logger_context() + self.train() + + if self.strategy.is_main_worker: + self.logger.destroy_logger_context() return train_dataset, validation_dataset, test_dataset, self.model def _set_epoch_dataloaders(self, epoch: int): @@ -283,6 +311,26 @@ def _set_epoch_dataloaders(self, epoch: int): if self.test_dataloader is not None: self.test_dataloader.sampler.set_epoch(epoch) + def log( + self, + item: Union[Any, List[Any]], + identifier: Union[str, List[str]], + kind: str = 'metric', + step: Optional[int] = None, + batch_idx: Optional[int] = None, + **kwargs + ) -> None: + if self.logger and ( + self.strategy.is_main_worker or self.log_all_workers): + self.logger.log( + item=item, + identifier=identifier, + kind=kind, + step=step, + batch_idx=batch_idx, + **kwargs + ) + def train(self): """Main training logic (training loop).""" # start_time = time.perf_counter() @@ -294,14 +342,148 @@ def train(self): if self.test_every and self.test_every % epoch == 0: self.test_epoch(epoch) - def train_epoch(self, epoch: int): - pass + def compute_metrics( + self, + true: Batch, + pred: Batch, + logger_step: int, + batch_idx: Optional[int], + stage: str = 'train' + ) -> Dict[str, Any]: + """Compute and log metrics. - def validation_epoch(self, epoch: int): - pass + Args: + metrics (Dict[str, Metric]): metrics dict. Can be + ``self.train_metrics`` or ``self.validation_metrics``. + true (Batch): true values. + pred (Batch): predicted values. + logger_step (int): global step to pass to the logger. + stage (str): 'train', 'validation'... + + Returns: + Dict[str, Any]: metric values. + """ + m_values = {} + for m_name, metric in self.metrics.items(): + # metric = metric.to(self.device) + m_val = metric(pred, true).detach().cpu().numpy() + self.log( + item=m_val, + identifier=f'{m_name}_{stage}', + kind='metric', + step=logger_step, + batch_idx=batch_idx + ) + m_values[m_name] = m_val + return m_values + + def training_step( + self, + batch: Batch, + batch_idx: int + ) -> Tuple[Loss, Dict[str, Any]]: + x, y = batch + x, y = x.to(self.device), y.to(self.device) + pred_y = self.model(x) + loss: Loss = self.loss(pred_y, y) + self.log( + item=loss.item(), + identifier='training_loss', + kind='metric', + step=self.train_glob_step, + batch_idx=batch_idx + ) + metrics: Dict[str, Any] = self.compute_metrics( + true=y, + pred=pred_y, + logger_step=self.train_glob_step, + batch_idx=batch_idx, + stage='training' + ) + return loss, metrics + + def validation_step( + self, + batch: Batch, + batch_idx: int + ) -> Tuple[Loss, Dict[str, Any]]: + x, y = batch + x, y = x.to(self.device), y.to(self.device) + with torch.no_grad(): + pred_y = self.model(x) + loss: Loss = self.loss(pred_y, y) + self.log( + item=loss.item(), + identifier='validation_loss', + kind='metric', + step=self.validation_glob_step, + batch_idx=batch_idx + ) + metrics: Dict[str, Any] = self.compute_metrics( + true=y, + pred=pred_y, + logger_step=self.validation_glob_step, + batch_idx=batch_idx, + stage='validation' + ) + return loss, metrics + + def train_epoch(self) -> Loss: + self.model.train() + train_losses = [] + for batch_idx, train_batch in enumerate(self.train_dataloader): + loss, metrics = self.training_step( + batch=train_batch, + batch_idx=batch_idx + ) + # TODO: merge and log batch metrics and loss into epoch metrics + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + train_losses.append(loss) + # Important: update counter + self.train_glob_step += 1 + + # Aggregate and log losses + avg_loss = torch.mean(torch.stack(train_losses)).detach().cpu() + self.log( + item=avg_loss.item(), + identifier='training_loss_epoch', + kind='metric', + step=self.train_glob_step, + ) + return avg_loss + + def validation_epoch(self) -> Loss: + if self.validation_dataloader is not None: + self.model.eval() + validation_losses = [] + for batch_idx, val_batch \ + in enumerate(self.validation_dataloader): + # TODO: merge and log batch metrics and loss into epoch metrics + loss, metrics = self.validation_step( + batch=val_batch, + batch_idx=batch_idx + ) + validation_losses.append(loss) + # Important: update counter + self.validation_glob_step += 1 + + # Aggregate and log losses + avg_loss = torch.mean( + torch.stack(validation_losses) + ).detach().cpu() + self.log( + item=avg_loss.item(), + identifier='validation_loss_epoch', + kind='metric', + step=self.validation_glob_step, + ) + return avg_loss def test_epoch(self, epoch: int): - pass + # TODO: implement test epoch + raise NotImplementedError() def save_state(self): return super().save_state() From 94baf1140efa2c98fc177eed5007dd69f4b4e0c7 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Sun, 28 Apr 2024 19:23:44 +0200 Subject: [PATCH 130/171] UPDATE MNIST use case --- src/itwinai/loggers.py | 28 ++++++++++---- src/itwinai/torch/trainer.py | 22 +++++++---- use-cases/mnist/torch/dataloader.py | 2 +- use-cases/mnist/torch/pipeline.yaml | 58 ++++++++++++++--------------- 4 files changed, 65 insertions(+), 45 deletions(-) diff --git a/src/itwinai/loggers.py b/src/itwinai/loggers.py index d5ed0008..aeb850b0 100644 --- a/src/itwinai/loggers.py +++ b/src/itwinai/loggers.py @@ -4,7 +4,7 @@ import csv from abc import ABCMeta, abstractmethod from contextlib import contextmanager -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, Literal import pickle import pathlib @@ -38,12 +38,12 @@ class Logger(LogMixin, metaclass=ABCMeta): """ savedir: str = None supported_types: List[str] # Supported logging 'kinds' - _log_freq: Union[int, str] + _log_freq: Union[int, Literal['epoch', 'batch']] def __init__( self, savedir: str = 'mllogs', - log_freq: Union[int, str] = 'epoch' + log_freq: Union[int, Literal['epoch', 'batch']] = 'epoch' ) -> None: self.savedir = savedir self.log_freq = log_freq @@ -120,7 +120,7 @@ class ConsoleLogger(Logger): def __init__( self, savedir: str = 'mllogs', - log_freq: Union[int, str] = 'epoch' + log_freq: Union[int, Literal['epoch', 'batch']] = 'epoch' ) -> None: savedir = os.path.join(savedir, 'simple-logger') super().__init__(savedir=savedir, log_freq=log_freq) @@ -190,7 +190,7 @@ def __init__( experiment_name: str = BASE_EXP_NAME, tracking_uri: Optional[str] = None, run_description: Optional[str] = None, - log_freq: Union[int, str] = 'epoch' + log_freq: Union[int, Literal['epoch', 'batch']] = 'epoch' ): savedir = os.path.join(savedir, 'mlflow') super().__init__(savedir=savedir, log_freq=log_freq) @@ -317,7 +317,7 @@ def __init__( self, savedir: str = 'mllogs', project_name: str = BASE_EXP_NAME, - log_freq: Union[int, str] = 'epoch' + log_freq: Union[int, Literal['epoch', 'batch']] = 'epoch' ) -> None: savedir = os.path.join(savedir, 'wandb') super().__init__(savedir=savedir, log_freq=log_freq) @@ -376,7 +376,7 @@ class TensorBoardLogger(Logger): def __init__( self, savedir: str = 'mllogs', - log_freq: Union[int, str] = 'epoch' + log_freq: Union[int, Literal['epoch', 'batch']] = 'epoch' ) -> None: savedir = os.path.join(savedir, 'tensorboard') super().__init__(savedir=savedir, log_freq=log_freq) @@ -425,7 +425,7 @@ def __init__( self, loggers: List[Logger] ) -> None: - super().__init__(savedir='/.tmp_mllogs_LoggersCollection', log_freq=0) + super().__init__(savedir='/.tmp_mllogs_LoggersCollection', log_freq=1) self.loggers = loggers def should_log(self, batch_idx: int = None) -> bool: @@ -450,6 +450,18 @@ def log( **kwargs ) + def create_logger_context(self): + for logger in self.loggers: + logger.create_logger_context() + + def destroy_logger_context(self): + for logger in self.loggers: + logger.destroy_logger_context() + + def save_hyperparameters(self, params: Dict[str, Any]) -> None: + for logger in self.loggers: + logger.save_hyperparameters(params=params) + class EpochTimeTracker: def __init__(self, series_name: str, csv_file: str) -> None: diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index 827598d4..e5b8c608 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -39,6 +39,11 @@ ) +class Config: + def __init__(self, my_dict): + self.__dict__.update(my_dict) + + class TorchTrainer(Trainer, LogMixin): """Trainer class for torch training algorithms. @@ -60,6 +65,8 @@ class TorchTrainer(Trainer, LogMixin): Defaults to None. log_all_workers (bool, optional): if True, the ``log`` method is called on all workers in the distributed context. Defaults to False. + metrics (Optional[Dict[str, Metric]], optional): map of torchmetrics + metrics. Defaults to None. name (Optional[str], optional): trainer custom name. Defaults to None. """ @@ -101,7 +108,7 @@ def __init__( # config is mean to store all hyperparameters, which can very from use # case to use case # and include learning_rate, batch_size.... - self.config = config + self.config = Config(config) self.epochs = epochs self.model = model self.strategy = strategy @@ -335,12 +342,13 @@ def train(self): """Main training logic (training loop).""" # start_time = time.perf_counter() for epoch in range(self.epochs): + epoch_n = epoch + 1 self._set_epoch_dataloaders(epoch) - self.train_epoch(epoch) - if self.validation_every and self.validation_every % epoch == 0: - self.validation_epoch(epoch) - if self.test_every and self.test_every % epoch == 0: - self.test_epoch(epoch) + self.train_epoch() + if self.validation_every and self.validation_every % epoch_n == 0: + self.validation_epoch() + if self.test_every and self.test_every % epoch_n == 0: + self.test_epoch() def compute_metrics( self, @@ -481,7 +489,7 @@ def validation_epoch(self) -> Loss: ) return avg_loss - def test_epoch(self, epoch: int): + def test_epoch(self): # TODO: implement test epoch raise NotImplementedError() diff --git a/use-cases/mnist/torch/dataloader.py b/use-cases/mnist/torch/dataloader.py index e4243763..a19c647e 100644 --- a/use-cases/mnist/torch/dataloader.py +++ b/use-cases/mnist/torch/dataloader.py @@ -34,7 +34,7 @@ def execute(self) -> Tuple[Dataset, Dataset]: transforms.Normalize((0.1307,), (0.3081,)) ])) print("Train and validation datasets loaded.") - return train_dataset, validation_dataset + return train_dataset, validation_dataset, None class InferenceMNIST(Dataset): diff --git a/use-cases/mnist/torch/pipeline.yaml b/use-cases/mnist/torch/pipeline.yaml index 99f35c73..4b1b04cb 100644 --- a/use-cases/mnist/torch/pipeline.yaml +++ b/use-cases/mnist/torch/pipeline.yaml @@ -8,27 +8,22 @@ pipeline: save_path: .tmp/ training_step: - class_path: itwinai.torch.trainer.TorchTrainerMG + class_path: itwinai.torch.trainer.TorchTrainer init_args: + config: + batch_size: 64 + num_workers: 4 + pin_memory: False + lr: 0.001 + momentum: 0.9 + fp16_allreduce: False + use_adasum: False + gradient_predivide_factor: 1.0 + model: class_path: model.Net - loss: - class_path: torch.nn.NLLLoss - init_args: - reduction: mean - optimizer_class: torch.optim.SGD - optimizer_kwargs: - lr: 0.001 - train_dataloader_kwargs: - batch_size: 32 - pin_memory: True - shuffle: True - validation_dataloader_kwargs: - batch_size: 32 - pin_memory: True - shuffle: False epochs: 2 - train_metrics: + metrics: accuracy: class_path: torchmetrics.classification.MulticlassAccuracy init_args: @@ -41,16 +36,21 @@ pipeline: class_path: torchmetrics.classification.MulticlassRecall init_args: num_classes: 10 - logger: - - class_path: itwinai.loggers.ConsoleLogger - - class_path: itwinai.loggers.MLFlowLogger - init_args: - experiment_name: MNIST classifier - log_freq: batch - strategy: ddp - checkpoint_every: 1 - cluster: - class_path: itwinai.torch.cluster.LocalCluster + logger: + class_path: itwinai.loggers.LoggersCollection init_args: - gpus: '0,1,2' - backend: nccl + loggers: + - class_path: itwinai.loggers.ConsoleLogger + init_args: + log_freq: 100 + - class_path: itwinai.loggers.MLFlowLogger + init_args: + experiment_name: MNIST classifier + log_freq: batch + strategy: ddp + # checkpoint_every: 1 + # cluster: + # class_path: itwinai.torch.cluster.LocalCluster + # init_args: + # gpus: '0,1,2' + # backend: nccl From 1a55be2468894423df472a515b1387be18e044fa Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 11:59:23 +0200 Subject: [PATCH 131/171] INtegrate new trainer into MNIST use case --- src/itwinai/cli.py | 28 +++++ src/itwinai/loggers.py | 2 +- src/itwinai/torch/trainer.py | 26 +++- use-cases/mnist/torch/Dockerfile | 3 - use-cases/mnist/torch/README.md | 44 +++++-- use-cases/mnist/torch/config.yaml | 99 +++++++++++++++ use-cases/mnist/torch/inference-pipeline.yaml | 22 ---- use-cases/mnist/torch/pipeline.yaml | 56 --------- use-cases/mnist/torch/runall.sh | 39 ++++++ use-cases/mnist/torch/slurm.sh | 116 ++++++++++++++++++ use-cases/mnist/torch/train.py | 44 ------- 11 files changed, 342 insertions(+), 137 deletions(-) create mode 100644 use-cases/mnist/torch/config.yaml delete mode 100644 use-cases/mnist/torch/inference-pipeline.yaml delete mode 100644 use-cases/mnist/torch/pipeline.yaml create mode 100644 use-cases/mnist/torch/runall.sh create mode 100644 use-cases/mnist/torch/slurm.sh delete mode 100644 use-cases/mnist/torch/train.py diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index 8f16d676..d5535ada 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -173,6 +173,22 @@ def scalability_report( print("Archived logs and plot at: ", archive_name) +def str_to_slice(interval: str) -> slice: + import re + # TODO: add support for slices starting with empty index + # e.g., :20:3 + if not re.match("\d+(:\d+)?(:\d+)?", interval): + raise ValueError( + f"Received invalid interval for slice: '{interval}'" + ) + if ":" in interval: + return slice(*map( + lambda x: int(x.strip()) if x.strip() else None, + interval.split(':') + )) + return int(interval) + + @app.command() def exec_pipeline( config: Annotated[Path, typer.Option( @@ -182,6 +198,11 @@ def exec_pipeline( help=("Key in the configuration file identifying " "the pipeline object to execute.") )] = "pipeline", + steps: Annotated[Optional[str], typer.Option( + help=("Run only some steps of the pipeline. Accepted values are " + "indices, python slices (e.g., 0:3 or 2:10:100), and " + "string names of steps.") + )] = None, print_config: Annotated[bool, typer.Option( help=("Print config to be executed after overrides.") )] = False, @@ -207,6 +228,7 @@ def exec_pipeline( # to find the local python files imported from the pipeline file import os import sys + import re sys.path.append(os.path.dirname(config)) sys.path.append(os.getcwd()) @@ -225,6 +247,12 @@ def exec_pipeline( print("#="*50) print() pipeline = parser.parse_pipeline(pipeline_nested_key=pipe_key) + if steps: + if not re.match("\d+(:\d+)?(:\d+)?", steps): + print(f"Looking for step name '{steps}'") + else: + steps = str_to_slice(steps) + pipeline = pipeline[steps] pipeline.execute() diff --git a/src/itwinai/loggers.py b/src/itwinai/loggers.py index aeb850b0..90026037 100644 --- a/src/itwinai/loggers.py +++ b/src/itwinai/loggers.py @@ -203,7 +203,7 @@ def __init__( saved_abs_path = os.path.abspath(self.savedir) self.tracking_uri = pathlib.Path(saved_abs_path).as_uri() # self.tracking_uri = "file://" + self.savedir - print(f'MLFLOW URI: {self.tracking_uri}') + # print(f'MLFLOW URI: {self.tracking_uri}') # TODO: for pytorch lightning: # mlflow.pytorch.autolog() diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index e5b8c608..ad48ec9e 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -40,7 +40,8 @@ class Config: - def __init__(self, my_dict): + def __init__(self, my_dict: Optional[Dict] = None): + my_dict = my_dict if my_dict is not None else {} self.__dict__.update(my_dict) @@ -69,6 +70,12 @@ class TorchTrainer(Trainer, LogMixin): metrics. Defaults to None. name (Optional[str], optional): trainer custom name. Defaults to None. """ + # TODO: + # - add checkpointing. + # - extract BaseTorchTrainer and extend it creating a set of trainer + # templates (e.g.. GAN, Classifier, Transformer) allowing scientists + # to reuse ML algos. + # - improve get from configuration object _strategy: TorchDistributedStrategy = None @@ -276,7 +283,8 @@ def execute( validation_dataset: Dataset, test_dataset: Dataset ) -> Tuple[Dataset, Dataset, Dataset, Any]: - """Trains a machine learning model. + """Prepares distributed environment and data structures + for the actual training. Args: train_dataset (Dataset): training dataset. @@ -305,6 +313,7 @@ def execute( if self.strategy.is_main_worker: self.logger.destroy_logger_context() + self.strategy.clean_up() return train_dataset, validation_dataset, test_dataset, self.model def _set_epoch_dataloaders(self, epoch: int): @@ -339,7 +348,18 @@ def log( ) def train(self): - """Main training logic (training loop).""" + """Trains a machine learning model. + Main training loop/logic. + + Args: + train_dataset (Dataset): training dataset. + validation_dataset (Dataset): validation dataset. + test_dataset (Dataset): test dataset. + + Returns: + Tuple[Dataset, Dataset, Dataset, Any]: training dataset, + validation dataset, test dataset, trained model. + """ # start_time = time.perf_counter() for epoch in range(self.epochs): epoch_n = epoch + 1 diff --git a/use-cases/mnist/torch/Dockerfile b/use-cases/mnist/torch/Dockerfile index b4cf3654..dcc75225 100644 --- a/use-cases/mnist/torch/Dockerfile +++ b/use-cases/mnist/torch/Dockerfile @@ -13,6 +13,3 @@ RUN pip install --no-cache-dir . # Add torch MNIST use case COPY use-cases/mnist/torch/* ./ - -# Run inference -CMD [ "python", "train.py", "-p", "inference-pipeline.yaml"] \ No newline at end of file diff --git a/use-cases/mnist/torch/README.md b/use-cases/mnist/torch/README.md index c953671f..e333f14b 100644 --- a/use-cases/mnist/torch/README.md +++ b/use-cases/mnist/torch/README.md @@ -3,10 +3,18 @@ ## Training ```bash -python train.py -p pipeline.yaml [-d] +# Download dataset and exit +itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline --steps dataloading_step + +# Run the whole training pipeline +itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline ``` -Use `-d` flag to run only the fist step in the pipeline. +View training logs on MLFLow server (if activated from the configuration): + +```bash +mlflow ui --backend-store-uri mllogs/mlflow/ +``` ## Inference @@ -30,24 +38,37 @@ Use `-d` flag to run only the fist step in the pipeline. folder containing a CSV file with the predictions as rows. ```bash - python train.py -p inference-pipeline.yaml + itwinai exec-pipeline --config config.yaml --pipe-key inference_pipeline ``` Note the same entry point as for training. -### Docker image +## Docker image Build from project root with ```bash # Local -docker buildx build -t itwinai-mnist-torch-inference -f use-cases/mnist/torch/Dockerfile . +docker buildx build -t itwinai:0.0.1-mnist-torch-0.1 -f use-cases/mnist/torch/Dockerfile . # Ghcr.io -docker buildx build -t ghcr.io/intertwin-eu/itwinai-mnist-torch-inference:0.0.1 -f use-cases/mnist/torch/Dockerfile . -docker push ghcr.io/intertwin-eu/itwinai-mnist-torch-inference:0.0.1 +docker buildx build -t ghcr.io/intertwin-eu/itwinai:0.0.1-mnist-torch-0.1 -f use-cases/mnist/torch/Dockerfile . +docker push ghcr.io/intertwin-eu/itwinai:0.0.1-mnist-torch-0.1 ``` +### Training with Docker container + +```bash +docker run -it --rm --name running-inference \ + -v "$PWD":/usr/data ghcr.io/intertwin-eu/itwinai:0.01-mnist-torch-0.1 \ + /bin/bash -c "itwinai exec-pipeline --print-config \ + --config /usr/src/app/config.yaml \ + --pipe-key training_pipeline \ + -o dataset_root=/usr/data/mnist-dataset " +``` + +### Inference with Docker container + From wherever a sample of MNIST jpg images is available (folder called 'mnist-sample-data/'): @@ -62,7 +83,14 @@ From wherever a sample of MNIST jpg images is available ``` ```bash -docker run -it --rm --name running-inference -v "$PWD":/usr/data ghcr.io/intertwin-eu/itwinai-mnist-torch-inference:0.0.1 +docker run -it --rm --name running-inference \ + -v "$PWD":/usr/data ghcr.io/intertwin-eu/itwinai:0.01-mnist-torch-0.1 \ + /bin/bash -c "itwinai exec-pipeline --print-config \ + --config /usr/src/app/config.yaml \ + --pipe-key inference_pipeline \ + -o test_data_path=/usr/data/mnist-sample-data \ + -o inference_model_mlflow_uri=/usr/src/app/mnist-pre-trained.pth \ + -o predictions_dir=/usr/data/mnist-predictions " ``` This command will store the results in a folder called "mnist-predictions": diff --git a/use-cases/mnist/torch/config.yaml b/use-cases/mnist/torch/config.yaml new file mode 100644 index 00000000..c5d71204 --- /dev/null +++ b/use-cases/mnist/torch/config.yaml @@ -0,0 +1,99 @@ +# General config +dataset_root: .tmp/ +num_classes: 10 +batch_size: 64 +num_workers_dataloader: 4 +pin_memory: False +lr: 0.001 +momentum: 0.9 +fp16_allreduce: False +use_adasum: False +gradient_predivide_factor: 1.0 +epochs: 2 +strategy: ddp +test_data_path: mnist-sample-data +inference_model_mlflow_uri: mnist-pre-trained.pth +predictions_dir: mnist-predictions +predictions_file: predictions.csv +class_labels: null + +# Workflows configuration +training_pipeline: + class_path: itwinai.pipeline.Pipeline + init_args: + steps: + dataloading_step: + class_path: dataloader.MNISTDataModuleTorch + init_args: + save_path: ${dataset_root} + + training_step: + class_path: itwinai.torch.trainer.TorchTrainer + init_args: + config: + batch_size: ${batch_size} + num_workers: ${num_workers_dataloader} + pin_memory: ${pin_memory} + lr: ${lr} + momentum: ${momentum} + fp16_allreduce: ${fp16_allreduce} + use_adasum: ${use_adasum} + gradient_predivide_factor: ${gradient_predivide_factor} + + model: + class_path: model.Net + epochs: ${epochs} + metrics: + accuracy: + class_path: torchmetrics.classification.MulticlassAccuracy + init_args: + num_classes: ${num_classes} + precision: + class_path: torchmetrics.classification.MulticlassPrecision + init_args: + num_classes: ${num_classes} + recall: + class_path: torchmetrics.classification.MulticlassRecall + init_args: + num_classes: ${num_classes} + logger: + class_path: itwinai.loggers.LoggersCollection + init_args: + loggers: + - class_path: itwinai.loggers.ConsoleLogger + init_args: + log_freq: 100 + - class_path: itwinai.loggers.MLFlowLogger + init_args: + experiment_name: MNIST classifier + log_freq: batch + strategy: ${strategy} + # checkpoint_every: 1 + # cluster: + # class_path: itwinai.torch.cluster.LocalCluster + # init_args: + # gpus: '0,1,2' + # backend: nccl + +inference_pipeline: + class_path: itwinai.pipeline.Pipeline + init_args: + steps: + - class_path: dataloader.MNISTPredictLoader + init_args: + test_data_path: ${test_data_path} + + - class_path: itwinai.torch.inference.MulticlassTorchPredictor + init_args: + model: + class_path: itwinai.torch.inference.TorchModelLoader + init_args: + model_uri: ${inference_model_mlflow_uri} + test_dataloader_kwargs: + batch_size: ${batch_size} + + - class_path: saver.TorchMNISTLabelSaver + init_args: + save_dir: ${predictions_dir} + predictions_file: ${predictions_file} + class_labels: ${class_labels} \ No newline at end of file diff --git a/use-cases/mnist/torch/inference-pipeline.yaml b/use-cases/mnist/torch/inference-pipeline.yaml deleted file mode 100644 index 5edf6ce9..00000000 --- a/use-cases/mnist/torch/inference-pipeline.yaml +++ /dev/null @@ -1,22 +0,0 @@ -pipeline: - class_path: itwinai.pipeline.Pipeline - init_args: - steps: - - class_path: dataloader.MNISTPredictLoader - init_args: - test_data_path: /usr/data/mnist-sample-data - - - class_path: itwinai.torch.inference.MulticlassTorchPredictor - init_args: - model: - class_path: itwinai.torch.inference.TorchModelLoader - init_args: - model_uri: mnist-pre-trained.pth - test_dataloader_kwargs: - batch_size: 3 - - - class_path: saver.TorchMNISTLabelSaver - init_args: - save_dir: /usr/data/mnist-predictions - predictions_file: predictions.csv - class_labels: null \ No newline at end of file diff --git a/use-cases/mnist/torch/pipeline.yaml b/use-cases/mnist/torch/pipeline.yaml deleted file mode 100644 index 4b1b04cb..00000000 --- a/use-cases/mnist/torch/pipeline.yaml +++ /dev/null @@ -1,56 +0,0 @@ -pipeline: - class_path: itwinai.pipeline.Pipeline - init_args: - steps: - dataloading_step: - class_path: dataloader.MNISTDataModuleTorch - init_args: - save_path: .tmp/ - - training_step: - class_path: itwinai.torch.trainer.TorchTrainer - init_args: - config: - batch_size: 64 - num_workers: 4 - pin_memory: False - lr: 0.001 - momentum: 0.9 - fp16_allreduce: False - use_adasum: False - gradient_predivide_factor: 1.0 - - model: - class_path: model.Net - epochs: 2 - metrics: - accuracy: - class_path: torchmetrics.classification.MulticlassAccuracy - init_args: - num_classes: 10 - precision: - class_path: torchmetrics.classification.MulticlassPrecision - init_args: - num_classes: 10 - recall: - class_path: torchmetrics.classification.MulticlassRecall - init_args: - num_classes: 10 - logger: - class_path: itwinai.loggers.LoggersCollection - init_args: - loggers: - - class_path: itwinai.loggers.ConsoleLogger - init_args: - log_freq: 100 - - class_path: itwinai.loggers.MLFlowLogger - init_args: - experiment_name: MNIST classifier - log_freq: batch - strategy: ddp - # checkpoint_every: 1 - # cluster: - # class_path: itwinai.torch.cluster.LocalCluster - # init_args: - # gpus: '0,1,2' - # backend: nccl diff --git a/use-cases/mnist/torch/runall.sh b/use-cases/mnist/torch/runall.sh new file mode 100644 index 00000000..e81ed74d --- /dev/null +++ b/use-cases/mnist/torch/runall.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Python virtual environment (no conda/micromamba) +PYTHON_VENV="../../../envAI_hdfml" + +# Clear SLURM logs (*.out and *.err files) +rm -rf logs_slurm +mkdir logs_slurm +rm -rf logs_torchrun + +# DDP itwinai +DIST_MODE="ddp" +RUN_NAME="ddp-itwinai" +TRAINING_CMD="$PYTHON_VENV/bin/itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline -o strategy=ddp" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# DeepSpeed itwinai +DIST_MODE="deepspeed" +RUN_NAME="deepspeed-itwinai" +TRAINING_CMD="$PYTHON_VENV/bin/itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline -o strategy=deepspeed" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh + +# Horovod itwinai +DIST_MODE="horovod" +RUN_NAME="horovod-itwinai" +TRAINING_CMD="$PYTHON_VENV/bin/itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline -o strategy=horovod" +sbatch --export=ALL,DIST_MODE="$DIST_MODE",RUN_NAME="$RUN_NAME",TRAINING_CMD="$TRAINING_CMD",PYTHON_VENV="$PYTHON_VENV" \ + --job-name="$RUN_NAME-n$N" \ + --output="logs_slurm/job-$RUN_NAME-n$N.out" \ + --error="logs_slurm/job-$RUN_NAME-n$N.err" \ + slurm.sh \ No newline at end of file diff --git a/use-cases/mnist/torch/slurm.sh b/use-cases/mnist/torch/slurm.sh new file mode 100644 index 00000000..2a2a15d8 --- /dev/null +++ b/use-cases/mnist/torch/slurm.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# SLURM jobscript for JSC systems + +# Job configuration +#SBATCH --job-name=distributed_training +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=00:30:00 + +# Resources allocation +#SBATCH --partition=batch +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=4 +#SBATCH --cpus-per-gpu=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +# Load environment modules +ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py + +# Job info +echo "DEBUG: TIME: $(date)" +sysN="$(uname -n | cut -f2- -d.)" +sysN="${sysN%%[0-9]*}" +echo "Running on system: $sysN" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +if [ "$DEBUG" = true ] ; then + echo "DEBUG: NCCL_DEBUG=INFO" + export NCCL_DEBUG=INFO +fi +echo + +# Setup env for distributed ML +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_GPU" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_GPU +fi + +# Env vairables check +if [ -z "$DIST_MODE" ]; then + >&2 echo "ERROR: env variable DIST_MODE is not set. Allowed values are 'horovod', 'ddp' or 'deepspeed'" + exit 1 +fi +if [ -z "$RUN_NAME" ]; then + >&2 echo "WARNING: env variable RUN_NAME is not set. It's a way to identify some specific run of an experiment." + RUN_NAME=$DIST_MODE +fi +if [ -z "$TRAINING_CMD" ]; then + >&2 echo "ERROR: env variable TRAINING_CMD is not set. It's the python command to execute." + exit 1 +fi +if [ -z "$PYTHON_VENV" ]; then + >&2 echo "WARNING: env variable PYTHON_VENV is not set. It's the path to a python virtual environment." +else + # Activate Python virtual env + source $PYTHON_VENV/bin/activate +fi + +# Get GPUs info per node +srun --cpu-bind=none --ntasks-per-node=1 bash -c 'echo -e "NODE hostname: $(hostname)\n$(nvidia-smi)\n\n"' + +# Launch training +if [ "$DIST_MODE" == "ddp" ] ; then + echo "DDP training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=1 \ + bash -c "torchrun \ + --log_dir='logs_torchrun' \ + --nnodes=$SLURM_NNODES \ + --nproc_per_node=$SLURM_GPUS_PER_NODE \ + --rdzv_id=$SLURM_JOB_ID \ + --rdzv_conf=is_host=\$(((SLURM_NODEID)) && echo 0 || echo 1) \ + --rdzv_backend=c10d \ + --rdzv_endpoint='$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)'i:29500 \ + $TRAINING_CMD" +elif [ "$DIST_MODE" == "deepspeed" ] ; then + echo "DEEPSPEED training: $TRAINING_CMD" + MASTER_ADDR=$(scontrol show hostnames "\$SLURM_JOB_NODELIST" | head -n 1)i + export MASTER_ADDR + export MASTER_PORT=29500 + + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + $TRAINING_CMD + + # # Run with deepspeed launcher: set --ntasks-per-node=1 + # # https://www.deepspeed.ai/getting-started/#multi-node-environment-variables + # export NCCL_IB_DISABLE=1 + # export NCCL_SOCKET_IFNAME=eth0 + # nodelist=$(scontrol show hostname $SLURM_NODELIST) + # echo "$nodelist" | sed -e 's/$/ slots=4/' > .hostfile + # # Requires passwordless SSH access among compute node + # srun --cpu-bind=none deepspeed --hostfile=.hostfile $TRAINING_CMD --deepspeed + # rm .hostfile +elif [ "$DIST_MODE" == "horovod" ] ; then + echo "HOROVOD training: $TRAINING_CMD" + srun --cpu-bind=none --ntasks-per-node=$SLURM_GPUS_PER_NODE --cpus-per-task=$SLURM_CPUS_PER_GPU \ + $TRAINING_CMD +else + >&2 echo "ERROR: unrecognized \$DIST_MODE env variable" + exit 1 +fi diff --git a/use-cases/mnist/torch/train.py b/use-cases/mnist/torch/train.py deleted file mode 100644 index 97f53093..00000000 --- a/use-cases/mnist/torch/train.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Training pipeline. To run this script, use the following commands. - -On login node: - ->>> micromamba run -p ../../../.venv-pytorch/ \ - python train.py -p pipeline.yaml -d - -On compute nodes: - ->>> micromamba run -p ../../../.venv-pytorch/ \ - python train.py -p pipeline.yaml - -""" - -import argparse - -from itwinai.parser import ConfigParser - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-p", "--pipeline", type=str, required=True, - help='Configuration file to the pipeline to execute.' - ) - parser.add_argument( - '-d', '--download-only', - action=argparse.BooleanOptionalAction, - default=False, - help=('Whether to download only the dataset and exit execution ' - '(suggested on login nodes of HPC systems)') - ) - args = parser.parse_args() - - # Create parser for the pipeline - pipe_parser = ConfigParser(config=args.pipeline) - pipeline = pipe_parser.parse_pipeline() - - if args.download_only: - print('Downloading datasets and exiting...') - pipeline = pipeline[:1] - - pipeline.execute() From 98079c533240bbe817afd0df0a6e5b8e639b18fd Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:08:25 +0200 Subject: [PATCH 132/171] UPDATE structure: remove unused files and refactor tests --- .github/linters/.jscpd.json | 3 +- .../{workflows-dt.yml => pytest.yml} | 4 +- .github/workflows/test-torch.yml | 17 + src/itwinai/cli.py | 26 +- src/itwinai/cluster.py | 72 -- src/itwinai/components.py | 8 - src/itwinai/loggers.py | 1 - src/itwinai/parser.py | 245 +---- src/itwinai/tensorflow/distributed.py | 16 +- src/itwinai/tensorflow/trainer.py | 27 +- src/itwinai/torch/cluster.py | 225 ----- src/itwinai/torch/engine.py | 276 ------ src/itwinai/torch/inference.py | 3 +- src/itwinai/torch/mlflow.py | 2 + src/itwinai/torch/trainer.py | 908 ++---------------- src/itwinai/torch/utils.py | 84 -- src/itwinai/utils.py | 82 +- tests/components/test_components.py | 5 - tests/test_cli.py | 26 - tests/use-cases/conftest.py | 33 +- tests/use-cases/test_3dgan.py | 45 +- tests/use-cases/test_cyclones.py | 12 +- tests/use-cases/test_mnist.py | 121 ++- tutorials/ml-workflows/basic_components.py | 6 - use-cases/3dgan/trainer.py | 6 - use-cases/cyclones/README.md | 12 + use-cases/cyclones/trainer.py | 6 - use-cases/mnist/tensorflow/pipeline.yaml | 12 +- use-cases/mnist/tensorflow/trainer.py | 6 - use-cases/mnist/torch-lightning/README.md | 17 + .../{pipeline.yaml => config.yaml} | 4 +- use-cases/mnist/torch-lightning/dataloader.py | 2 +- use-cases/mnist/torch-lightning/train.py | 44 - use-cases/mnist/torch-lightning/trainer.py | 40 - use-cases/mnist/torch/Dockerfile | 3 +- 35 files changed, 353 insertions(+), 2046 deletions(-) rename .github/workflows/{workflows-dt.yml => pytest.yml} (88%) create mode 100644 .github/workflows/test-torch.yml delete mode 100644 src/itwinai/cluster.py delete mode 100644 src/itwinai/torch/cluster.py delete mode 100644 src/itwinai/torch/engine.py delete mode 100644 src/itwinai/torch/utils.py delete mode 100644 tests/test_cli.py create mode 100644 use-cases/cyclones/README.md create mode 100644 use-cases/mnist/torch-lightning/README.md rename use-cases/mnist/torch-lightning/{pipeline.yaml => config.yaml} (96%) delete mode 100644 use-cases/mnist/torch-lightning/train.py delete mode 100644 use-cases/mnist/torch-lightning/trainer.py diff --git a/.github/linters/.jscpd.json b/.github/linters/.jscpd.json index 1a035770..8a003c54 100644 --- a/.github/linters/.jscpd.json +++ b/.github/linters/.jscpd.json @@ -1,7 +1,6 @@ { "threshold": 2.0, "ignore": [ - "**/itwinai/loggers.py", - "**/itwinai/torch/engine.py" + "**/itwinai/loggers.py" ] } \ No newline at end of file diff --git a/.github/workflows/workflows-dt.yml b/.github/workflows/pytest.yml similarity index 88% rename from .github/workflows/workflows-dt.yml rename to .github/workflows/pytest.yml index 53a72e43..ecee2bc1 100644 --- a/.github/workflows/workflows-dt.yml +++ b/.github/workflows/pytest.yml @@ -1,10 +1,12 @@ --- -name: Test workflows +name: Unit and integration tests on: pull_request: branches: [main, dev] +# TODO: use container and set custom TORCH_ENV and TF_ENV env variables + jobs: test-itwinai: name: Test itwinai with pytest diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml new file mode 100644 index 00000000..834e2941 --- /dev/null +++ b/.github/workflows/test-torch.yml @@ -0,0 +1,17 @@ +name: Test workflows based on torch +on: [push] + +jobs: + my_job: + runs-on: ubuntu-latest + container: + image: pytorch/pytorch:latest # docker://{docker-image-name}:{tag} + options: --volume ${{ github.workspace }}:${{ github.workspace }} + steps: + - name: Run commands in container + run: | + # Now the repository code is available inside the Docker container + # You can use it as needed + cd ${{ github.workspace }} && + # Run your commands here + ls -la diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py index d5535ada..6c27d069 100644 --- a/src/itwinai/cli.py +++ b/src/itwinai/cli.py @@ -16,7 +16,7 @@ import typer -app = typer.Typer() +app = typer.Typer(pretty_exceptions_enable=False) @app.command() @@ -173,22 +173,6 @@ def scalability_report( print("Archived logs and plot at: ", archive_name) -def str_to_slice(interval: str) -> slice: - import re - # TODO: add support for slices starting with empty index - # e.g., :20:3 - if not re.match("\d+(:\d+)?(:\d+)?", interval): - raise ValueError( - f"Received invalid interval for slice: '{interval}'" - ) - if ":" in interval: - return slice(*map( - lambda x: int(x.strip()) if x.strip() else None, - interval.split(':') - )) - return int(interval) - - @app.command() def exec_pipeline( config: Annotated[Path, typer.Option( @@ -229,11 +213,13 @@ def exec_pipeline( import os import sys import re + from .utils import str_to_slice sys.path.append(os.path.dirname(config)) sys.path.append(os.getcwd()) # Parse and execute pipeline from itwinai.parser import ConfigParser + overrides_list = overrides_list if overrides_list is not None else [] overrides = { k: v for k, v in map(lambda x: (x.split('=')[0], x.split('=')[1]), overrides_list) @@ -248,13 +234,17 @@ def exec_pipeline( print() pipeline = parser.parse_pipeline(pipeline_nested_key=pipe_key) if steps: - if not re.match("\d+(:\d+)?(:\d+)?", steps): + if not re.match(r"\d+(:\d+)?(:\d+)?", steps): print(f"Looking for step name '{steps}'") else: steps = str_to_slice(steps) pipeline = pipeline[steps] pipeline.execute() + # Cleanup PYTHONPATH + sys.path.pop() + sys.path.pop() + @app.command() def mlflow_ui( diff --git a/src/itwinai/cluster.py b/src/itwinai/cluster.py deleted file mode 100644 index 7b9f57e0..00000000 --- a/src/itwinai/cluster.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Cluster environments where to run AI workflows.""" - -from __future__ import annotations -from abc import ABCMeta, abstractmethod -import os -from contextlib import contextmanager - - -def setup_for_distributed(is_main): - """ - This function disables printing when not in master process - """ - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwself): - force = kwself.pop('force', False) - if is_main or force: - builtin_print(*args, **kwself) - - __builtin__.print = print - - -def handle_sigusr1(signum, frame): - os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}') - exit() - - -def handle_sigterm(signum, frame): - pass - - -class ClusterEnvironment(metaclass=ABCMeta): - port: int = -1 - ngpus_per_node: int = -1 - global_world_size: int = -1 - global_rank: int = -1 - local_world_size: int = -1 - local_rank: int = -1 - rnd_seed: int = None - distributed: bool = False - # This flag tells whether the user wants to use the GPU(s) - use_cuda: bool = False - - @property - def backend(self) -> str: - return self._backend - - @backend.setter - def backend(self, backend_name: str) -> None: - self._set_backend(backend_name) - - def _set_backend(self, backend_name: str) -> None: - # Override to implement sanitization - self._backend = backend_name - - @abstractmethod - def is_main_worker(self) -> bool: - """Tells if the current process is the main/master process.""" - pass - - @abstractmethod - def is_cuda_available(self) -> bool: - pass - - @abstractmethod - @contextmanager - def init_dist_gpu(self, *args, **kwargs): - pass - - def cleanup_resources(self): - pass diff --git a/src/itwinai/components.py b/src/itwinai/components.py index 1f41bacd..eca2e570 100644 --- a/src/itwinai/components.py +++ b/src/itwinai/components.py @@ -216,14 +216,6 @@ def execute( validation dataset, test dataset, trained model. """ - @abstractmethod - def save_state(self): - pass - - @abstractmethod - def load_state(self): - pass - class Predictor(BaseComponent): """Applies a pre-trained machine learning model to unseen data.""" diff --git a/src/itwinai/loggers.py b/src/itwinai/loggers.py index 90026037..7f86ffcb 100644 --- a/src/itwinai/loggers.py +++ b/src/itwinai/loggers.py @@ -10,7 +10,6 @@ import wandb import mlflow -# import mlflow.keras BASE_EXP_NAME: str = 'unk_experiment' diff --git a/src/itwinai/parser.py b/src/itwinai/parser.py index 0001627b..254e91a9 100644 --- a/src/itwinai/parser.py +++ b/src/itwinai/parser.py @@ -76,14 +76,11 @@ class ConfigParser: >>> init_args: >>> save_path: .tmp/ >>> - >>> - class_path: itwinai.torch.trainer.TorchTrainerMG + >>> - class_path: itwinai.torch.trainer.TorchTrainer >>> init_args: >>> model: >>> class_path: model.Net - >>> loss: - >>> class_path: torch.nn.NLLLoss - >>> init_args: - >>> reduction: mean + >>> >>> from itwinai.parser import ConfigParser >>> >>> parser = ConfigParser( @@ -244,241 +241,3 @@ def __init__( "-c", "--config", action=ActionConfigFile, help="Path to a configuration file in json or yaml format." ) - - -# class ConfigParser2: -# """ -# Deprecated: this pipeline structure does not allow for -# nested pipelines. However, it is more readable and the linking -# from name to step data could be achieved with OmegaConf. This -# could be reused in the future: left as example. - -# Parses a configuration file, merging the steps into -# the pipeline and returning a pipeline object. -# It also provides functionalities for dynamic override -# of fields by means of nested key notation. - -# Example: - -# >>> # pipeline.yaml -# >>> pipeline: -# >>> class_path: itwinai.pipeline.Pipeline -# >>> steps: [server, client] -# >>> -# >>> server: -# >>> class_path: mycode.ServerOptions -# >>> init_args: -# >>> host: localhost -# >>> port: 80 -# >>> -# >>> client: -# >>> class_path: mycode.ClientOptions -# >>> init_args: -# >>> url: http://${server.init_args.host}:${server.init_args.port}/ - -# >>> from itwinai.parser import ConfigParser2 -# >>> -# >>> parser = ConfigParser2( -# >>> config='pipeline.yaml', -# >>> override_keys={ -# >>> 'server.init_args.port': 777 -# >>> } -# >>> ) -# >>> pipeline = parser.parse_pipeline() -# >>> print(pipeline) -# >>> print(pipeline.steps) -# >>> print(pipeline.steps['server'].port) -# >>> -# >>> server = parser.parse_step('server') -# >>> print(server) -# >>> print(server.port) -# """ - -# config: Dict -# pipeline: Pipeline - -# def __init__( -# self, -# config: Union[str, Dict], -# override_keys: Optional[Dict[str, Any]] = None -# ) -> None: -# self.config = config -# self.override_keys = override_keys -# if isinstance(self.config, str): -# self.config = load_yaml(self.config) -# self._dynamic_override_keys() -# self._omegaconf_interpolate() - -# def _dynamic_override_keys(self): -# if self.override_keys is not None: -# for key_chain, value in self.override_keys.items(): -# add_replace_field(self.config, key_chain, value) - -# def _omegaconf_interpolate(self) -> None: -# """Performs variable interpolation with OmegaConf on internal -# configuration file. -# """ -# conf = OmegaConf.create(self.config) -# self.config = OmegaConf.to_container(conf, resolve=True) - -# def parse_pipeline( -# self, -# pipeline_nested_key: str = "pipeline", -# verbose: bool = False -# ) -> Pipeline: -# """Merges steps into pipeline and parses it. - -# Args: -# pipeline_nested_key (str, optional): nested key in the -# configuration file identifying the pipeline object. -# Defaults to "pipeline". -# verbose (bool): if True, prints the assembled pipeline -# to console formatted as JSON. - -# Returns: -# Pipeline: instantiated pipeline. -# """ -# pipe_parser = JAPArgumentParser() -# pipe_parser.add_subclass_arguments(Pipeline, pipeline_nested_key) -# pipe_dict = self.config[pipeline_nested_key] - -# # Pop steps list from pipeline dictionary -# steps_list = pipe_dict['steps'] -# del pipe_dict['steps'] - -# # Link steps with respective dictionaries -# if not pipe_dict.get('init_args'): -# pipe_dict['init_args'] = {} -# steps_dict = pipe_dict['init_args']['steps'] = {} -# for step_name in steps_list: -# steps_dict[step_name] = self.config[step_name] -# pipe_dict = {pipeline_nested_key: pipe_dict} - -# if verbose: -# print("Assembled pipeline:") -# print(json.dumps(pipe_dict, indent=4)) - -# # Parse pipeline dict once merged with steps -# conf = pipe_parser.parse_object(pipe_dict) -# pipe = pipe_parser.instantiate_classes(conf) -# self.pipeline = pipe[pipeline_nested_key] -# return self.pipeline - -# def parse_step( -# self, -# step_name: str, -# verbose: bool = False -# ) -> BaseComponent: -# step_dict_config = self.config[step_name] - -# if verbose: -# print(f"STEP '{step_name}' CONFIG:") -# print(json.dumps(step_dict_config, indent=4)) - -# # Wrap config under "step" field and parse it -# step_dict_config = {'step': step_dict_config} -# step_parser = JAPArgumentParser() -# step_parser.add_subclass_arguments(BaseComponent, "step") -# parsed_namespace = step_parser.parse_object(step_dict_config) -# return step_parser.instantiate_classes(parsed_namespace)["step"] - - -# class ItwinaiCLI2: -# """ -# Deprecated: the dynamic override does not work with nested parameters -# and may be confusing. - -# CLI tool for executing a configuration file, with dynamic -# override of fields and variable interpolation with Omegaconf. - -# Example: - -# >>> # train.py -# >>> from itwinai.parser import ItwinaiCLI -# >>> cli = ItwinaiCLI() -# >>> cli.pipeline.execute() - -# >>> # pipeline.yaml -# >>> pipeline: -# >>> class_path: itwinai.pipeline.Pipeline -# >>> steps: [server, client] -# >>> -# >>> server: -# >>> class_path: mycode.ServerOptions -# >>> init_args: -# >>> host: localhost -# >>> port: 80 -# >>> -# >>> client: -# >>> class_path: mycode.ClientOptions -# >>> init_args: -# >>> url: http://${server.init_args.host}:${server.init_args.port}/ - -# From command line: - -# >>> python train.py --config itwinai-conf.yaml --help -# >>> python train.py --config itwinai-conf.yaml -# >>> python train.py --config itwinai-conf.yaml --server.port 8080 -# """ -# _parser: JAPArgumentParser -# _config: Dict -# pipeline: Pipeline - -# def __init__( -# self, -# pipeline_nested_key: str = "pipeline", -# parser_mode: str = "omegaconf" -# ) -> None: -# self.pipeline_nested_key = pipeline_nested_key -# self.parser_mode = parser_mode -# self._init_parser() -# self._parser.add_argument(f"--{self.pipeline_nested_key}", type=dict) -# self._add_steps_arguments() -# self._config = self._parser.parse_args() - -# # Merge steps into pipeline and parse it -# del self._config['config'] -# pipe_parser = ConfigParser2(config=self._config.as_dict()) -# self.pipeline = pipe_parser.parse_pipeline( -# pipeline_nested_key=self.pipeline_nested_key -# ) - -# def _init_parser(self): -# self._parser = JAPArgumentParser(parser_mode=self.parser_mode) -# self._parser.add_argument( -# "-c", "--config", action=ActionConfigFile, -# required=True, -# help="Path to a configuration file in json or yaml format." -# ) - -# def _add_steps_arguments(self): -# """Pre-parses the configuration file, dynamically adding all the -# component classes under 'steps' as arguments of the parser. -# """ -# if "--config" not in sys.argv: -# raise ValueError( -# "--config parameter has to be specified with a " -# "valid path to a configuration file." -# ) -# config_path = sys.argv.index("--config") + 1 -# config_path = sys.argv[config_path] -# config = load_yaml(config_path) - -# # Add steps to parser -# steps = filter( -# lambda itm: itm[0] != self.pipeline_nested_key, -# config.items() -# ) -# steps = { -# step_name: step_data['class_path'] -# for step_name, step_data in steps -# } - -# for st_nested_key, step_class_str in steps.items(): -# step_class = dynamically_import_class(step_class_str) -# self._add_step_arguments( -# step_class=step_class, nested_key=st_nested_key) - -# def _add_step_arguments(self, step_class, nested_key): -# self._parser.add_subclass_arguments( -# baseclass=step_class, nested_key=nested_key) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index e6c5f28a..64945ca8 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -1,17 +1,23 @@ -import tensorflow as tf import os +import tensorflow as tf +import tensorflow.distribute as dist def get_strategy(): """Strategy for distributed TensorFlow training""" - cluster_resolver = tf.distribute.cluster_resolver.SlurmClusterResolver( + if not os.environ.get('SLURM_JOB_ID'): + # TODO: improve + print('not in SLURM env!') + tf_dist_strategy = dist.MirroredStrategy() + return tf_dist_strategy, tf_dist_strategy.num_replicas_in_sync + cluster_resolver = dist.cluster_resolver.SlurmClusterResolver( port_base=12345) - implementation = tf.distribute.experimental.CommunicationImplementation.NCCL - communication_options = tf.distribute.experimental.CommunicationOptions( + implementation = dist.experimental.CommunicationImplementation.NCCL + communication_options = dist.experimental.CommunicationOptions( implementation=implementation) # declare distribution strategy - tf_dist_strategy = tf.distribute.MultiWorkerMirroredStrategy( + tf_dist_strategy = dist.MultiWorkerMirroredStrategy( cluster_resolver=cluster_resolver, communication_options=communication_options ) diff --git a/src/itwinai/tensorflow/trainer.py b/src/itwinai/tensorflow/trainer.py index d8c40012..51bfb97c 100644 --- a/src/itwinai/tensorflow/trainer.py +++ b/src/itwinai/tensorflow/trainer.py @@ -28,12 +28,19 @@ def instance_from_dict(obj_dict: Any) -> Any: return obj_dict +# TODO: the TF trainer is incomplete: +# - strategy is not received from constructor argument: if not needed, +# remove it +# - dataset is not distributed +# - much commented code that has to be removed or included + + class TensorflowTrainer(Trainer): def __init__( self, epochs, - train_dataset, - validation_dataset, + # train_dataset, + # validation_dataset, batch_size, callbacks, model_dict: Dict, @@ -61,14 +68,14 @@ def __init__( # get total number of workers print("Number of devices: {}".format(n_devices)) # distribute datasets among MirroredStrategy's replicas - dist_train_dataset = ( - tf_dist_strategy.experimental_distribute_dataset( - train_dataset - )) - dist_validation_dataset = ( - tf_dist_strategy.experimental_distribute_dataset( - validation_dataset - )) + # dist_train_dataset = ( + # tf_dist_strategy.experimental_distribute_dataset( + # train_dataset + # )) + # dist_validation_dataset = ( + # tf_dist_strategy.experimental_distribute_dataset( + # validation_dataset + # )) with self.strategy.scope(): # TODO: move loss, optimizer and metrics instantiation under # here diff --git a/src/itwinai/torch/cluster.py b/src/itwinai/torch/cluster.py deleted file mode 100644 index aece16e2..00000000 --- a/src/itwinai/torch/cluster.py +++ /dev/null @@ -1,225 +0,0 @@ -"""Cluster environments where to run AI workflows. Partially adapted from: -https://github.com/facebookresearch/detr/blob/master/util/misc.py and -https://github.com/ramyamounir/Template/blob/main/lib/utils/distributed.py -""" - -from __future__ import annotations -from typing import Optional -import os -import signal -import subprocess -from pathlib import Path -from contextlib import contextmanager - -import numpy as np - -import torch -import torch.distributed as dist -import torch.backends.cudnn as cudnn - -from ..cluster import ( - ClusterEnvironment, - setup_for_distributed, - handle_sigusr1, - handle_sigterm -) -from .types import TorchDistributedBackend as BackendT - - -def fix_random_seeds(seed=31): - """ - Fix random seeds. - """ - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - np.random.seed(seed) - - -class TorchCluster(ClusterEnvironment): - def __init__(self) -> None: - super().__init__() - - def _set_backend(self, backend_name: str) -> None: - if backend_name not in BackendT: - raise ValueError( - "Unrecognized 'backend' field. Allowed values " - f"are: {BackendT.list()}. Received '{backend_name}'") - self._backend = backend_name - - def is_cuda_available(self) -> bool: - return self.use_cuda and torch.cuda.is_available() - - def is_main_worker(self) -> bool: - """Checks if the current process is the main/master process - in the whole job. - """ - return self.global_rank == 0 - - def cleanup_resources(self): - dist.barrier() - dist.destroy_process_group() - - -class LocalCluster(TorchCluster): - """Simple single node cluster with optional access to multiple GPUs.""" - - def __init__( - self, - backend: Optional[str] = None, - gpus: Optional[str] = '', - port: int = 49153, - rnd_seed: Optional[int] = 42 - ) -> None: - """Initialize local cluster for multi-GPU access. - - Args: - backend (Optional[str], optional): supported PyTorch backends. - If None, workload is not distributed. Defaults to None. - gpus (Optional[str], optional): list of visible GPU devices - (e.g., '1,2,3'). If empty string uses all available GPUs. - If None, CPU is used. Defaults to ''. - port (int, optional): TCP port used by the master process. - Defaults to 49153. - rnd_seed (Optional[int], optional): random seed to be setup after - all processes are setup. Defaults to 42. - """ - super().__init__() - self.backend = backend - self.gpus = gpus - self.port = port - self.dist_url = f'tcp://127.0.0.1:{self.port}' - self.rnd_seed = rnd_seed - - if self.gpus != '' and self.gpus is not None: - # Restrict the number of GPUs visible according to user needs - os.environ['CUDA_VISIBLE_DEVICES'] = self.gpus - - self.ngpus_per_node = torch.cuda.device_count() - self.global_rank = 0 - self.global_world_size = self.ngpus_per_node - - print(f"{self.ngpus_per_node} GPUs are available.") - self.distributed = True - # This flag tells whether the user wants to use the GPU(s) - self.use_cuda = ( - self.gpus is not None # GPU is not manually disabled - and torch.cuda.device_count() >= 1 # At least one GPU is selected - ) - if self.backend is None or self.ngpus_per_node <= 1: - print("Distributed has been disabled.") - self.distributed = False - self.dist_url = None - self.global_world_size = 1 - self.global_rank = 0 - if not self.is_cuda_available(): - print("CUDA disabled... Running on single CPU.") - self.use_cuda = False - self.distributed = False - self.dist_url = None - self.global_world_size = 1 - self.global_rank = 0 - - # Since single node case - self.local_world_size = self.global_world_size - - @contextmanager - def init_dist_gpu(self, worker_id) -> torch.device: - if self.distributed: - torch.cuda.set_device(worker_id) - self.global_rank += worker_id - # print(f'GLOBAL RANK: {self.global_rank}') - # Since single node case - self.local_rank = self.global_rank - # Simplification: worker ID mapped to GPU ID - self.gpu_id = worker_id - - try: - dist.init_process_group( - backend=self.backend, - init_method=self.dist_url, - world_size=self.global_world_size, - rank=self.global_rank - ) - fix_random_seeds(self.rnd_seed) - torch.cuda.set_device(self.gpu_id) - cudnn.benchmark = True - dist.barrier() - - setup_for_distributed(self.is_main_worker()) - print("SETUP DISTRIBUTED COMPLETE") - yield torch.device('cuda', worker_id) - finally: - self.cleanup_resources() - else: - # Distributed is disabled - # Since single node case - self.global_rank = 0 - self.local_rank = self.global_rank - if self.use_cuda: - torch.cuda.set_device(worker_id) - yield torch.device('cuda', worker_id) - else: - yield torch.device('cpu') - - -class SLURMCluster(TorchCluster): - """SLURM cluster with access to multi-node multi-GPU.""" - - def __init__( - self, - port: int = 49153, - backend: str = 'gloo', - rnd_seed: Optional[int] = 42 - ) -> None: - super().__init__() - self.port = port - self.backend = backend - self.rnd_seed = rnd_seed - if 'SLURM_JOB_ID' not in os.environ: - raise RuntimeError( - "'SLURM_JOB_ID' environment variable is not set. " - "Perhaps you are not running in a slurm cluster?" - ) - - self.ngpus_per_node = torch.cuda.device_count() - - # requeue job on SLURM preemption - signal.signal(signal.SIGUSR1, handle_sigusr1) - signal.signal(signal.SIGTERM, handle_sigterm) - - # find a common host name on all nodes - cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST') - stdout = subprocess.check_output(cmd.split()) - host_name = stdout.decode().splitlines()[0] - self.dist_url = f'tcp://{host_name}:{self.port}' - - # distributed parameters - self.global_rank = int(os.getenv('SLURM_NODEID')) * self.ngpus_per_node - self.global_world_size = int( - os.getenv('SLURM_NNODES')) * self.ngpus_per_node - - @contextmanager - def init_dist_gpu(self): - import submitit - try: - job_env = submitit.JobEnvironment() - self.output_dir = Path( - str(self.output_dir).replace("%j", str(job_env.job_id))) - self.gpu = job_env.local_rank - self.global_rank = job_env.global_rank - - dist.init_process_group( - backend=self.backend, - init_method=self.dist_url, - world_size=self.global_world_size, - rank=self.global_rank - ) - fix_random_seeds(self.rnd_seed) - torch.cuda.set_device(self.gpu) - cudnn.benchmark = True - dist.barrier() - - setup_for_distributed(self.is_main_worker()) - yield - finally: - self.cleanup_resources() diff --git a/src/itwinai/torch/engine.py b/src/itwinai/torch/engine.py deleted file mode 100644 index 7084d6ec..00000000 --- a/src/itwinai/torch/engine.py +++ /dev/null @@ -1,276 +0,0 @@ -""" -Model engine which wraps a torch NN. Still under development. May be removed... -""" - -import abc -from typing import Any, Union, Optional, Callable - -from pydantic import BaseModel - -import torch -import torch.nn as nn -import torch.optim as optim -from torch.optim.lr_scheduler import _LRScheduler as LRScheduler -from torch.cuda import amp -from torch import autocast - - -class OptimizerConfig: - def __init__(self, optim_class, **kwargs) -> None: - self.optim_class = optim_class - self.kwargs = kwargs - - def to_optim(self, parameters) -> optim.Optimizer: - return self.optim_class(parameters, **self.kwargs) - - -class LRSchedulerConfig: - def __init__(self, scheduler_class, **kwargs) -> None: - self.scheduler_class = scheduler_class - self.kwargs = kwargs - - def to_scheduler(self, optim) -> LRScheduler: - return self.scheduler_class(optim, **self.kwargs) - - -class ModelEngineConfig(BaseModel): - mixed_precision: bool = False - - -class ModelEngine(abc.ABC): - """Wrapper around ML model, which abstracts from distributed and - mixed-precision models. - """ - - model: nn.Module - _model_parameters: Any - optimizer: optim.Optimizer - lr_scheduler: LRScheduler - # config: ModelEngineConfig - mixed_precision: bool = False - grad_scaler: amp.GradScaler = None - - def __init__( - self, - model: nn.Module, - # model_parameters: Any, - optimizer: Union[optim.Optimizer, OptimizerConfig], - lr_scheduler: Optional[Union[LRScheduler, LRSchedulerConfig]] = None, - mixed_precision: bool = False - # config: Optional[ModelEngineConfig] = None - ) -> None: - super().__init__() - self.model = model - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - # self._model_parameters = model_parameters - # if isinstance(optimizer, OptimizerConfig): - # self.optimizer = optimizer.to_optim(model_parameters) - # else: - # self.optimizer = optimizer - - # if isinstance(lr_scheduler, LRSchedulerConfig): - # self.lr_scheduler = lr_scheduler.to_scheduler(self.optimizer) - # else: - # self.lr_scheduler = lr_scheduler - - # if not config: - # self.config = ModelEngineConfig() - self.mixed_precision = mixed_precision - if mixed_precision: - self.grad_scaler = amp.GradScaler() - - def __call__(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - # Wrapper of self.forward() - return self.forward(*args, **kwds) - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - return self.model(*args, **kwds) - - def train(self, mode: bool = True) -> nn.Module: - """Set model in training mode.""" - self.model.train(mode=mode) - return self.model - - def eval(self) -> nn.Module: - """Set model in inference mode.""" - self.model.eval() - return self.model - - def to(self, device) -> nn.Module: - """Move model to specified device.""" - self.model.to(device) - return self.model - - @abc.abstractmethod - def zero_grad(): - """Set gradients to zero for the optimizer.""" - - @abc.abstractmethod - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - - @abc.abstractmethod - def optimizer_step(self): - """Perform optimizer step.""" - - @abc.abstractmethod - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - # This should be incorporated in the optim step: - # https://deepspeed.readthedocs.io/en/latest/schedulers.html - # scheduler is updated automatically at each training step - - @abc.abstractmethod - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - - -class DDPModelEngine(ModelEngine): - """Model engine for torch DDP distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) - - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() - - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() - - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError - - -class DSModelEngine(ModelEngine): - """Model engine for DeeSpeed distributed strategy.""" - - def forward(self, *args: Any, **kwds: Any) -> Any: - """Performs the forward operation.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - return self.model(*args, **kwds) - else: - return self.model(*args, **kwds) - - def zero_grad(self): - """Set gradients to zero for the optimizer.""" - self.optimizer.zero_grad() - - def backward(self, loss_fn: Callable, *loss_args) -> torch.Tensor: - """Perform backward pass and return the loss. - - Args: - loss_fn (Callable): computes the loss. - *loss_args: are the arguments to be passed to ``loss_fn``. - - Returns: - torch.Tensor: computed loss. - """ - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html - # Runs the forward pass with autocasting. - with autocast(device_type='cuda', dtype=torch.float16): - loss = loss_fn(*loss_args) - - # Scales loss. Calls backward() on scaled loss to create scaled - # gradients. - # Backward passes under autocast are not recommended. - # Backward ops run in the same dtype autocast chose for - # corresponding forward ops. - loss = self.grad_scaler.scale(loss) - else: - loss = loss_fn(*loss_args) - loss.backward() - return loss - - def optimizer_step(self): - """Perform optimizer step.""" - if self.mixed_precision: - # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training - # scaler.step() first unscales the gradients of the optimizer's - # assigned params. - # If these gradients do not contain infs or NaNs, optimizer.step() - # is then called, - # otherwise, optimizer.step() is skipped. - self.grad_scaler.step(self.optimizer) - - # Updates the scale for next iteration. - self.grad_scaler.update() - else: - self.optimizer.step() - - def lr_scheduler_step(self): - """Perform lr scheduler step, if present.""" - if self.lr_scheduler: - self.lr_scheduler.step() - - def save_checkpoint(self): - """Save checkpoint to persistent storage.""" - raise NotImplementedError diff --git a/src/itwinai/torch/inference.py b/src/itwinai/torch/inference.py index 02882f06..bb9af300 100644 --- a/src/itwinai/torch/inference.py +++ b/src/itwinai/torch/inference.py @@ -6,8 +6,7 @@ from torch import nn from torch.utils.data import DataLoader, Dataset -from ..utils import dynamically_import_class -from .utils import clear_key +from ..utils import dynamically_import_class, clear_key from ..components import Predictor, monitor_exec from .types import TorchDistributedStrategy as StrategyT from .types import Metric, Batch diff --git a/src/itwinai/torch/mlflow.py b/src/itwinai/torch/mlflow.py index 18a014ff..8bc854d4 100644 --- a/src/itwinai/torch/mlflow.py +++ b/src/itwinai/torch/mlflow.py @@ -16,6 +16,8 @@ def _get_mlflow_logger_conf(pl_config: Dict) -> Optional[Dict]: Optional[Dict]: if present, MLFLowLogger constructor arguments (under 'init_args' key). """ + if not pl_config['trainer'].get('logger'): + return None if isinstance(pl_config['trainer']['logger'], list): # If multiple loggers are provided for logger_conf in pl_config['trainer']['logger']: diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index ad48ec9e..4e7a108f 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -1,15 +1,12 @@ """Provides training logic for PyTorch models via Trainer classes.""" from typing import ( - Optional, Dict, Union, Tuple, Type, List, Any, Literal + Optional, Dict, Union, Tuple, List, Any, Literal ) -import time import os import sys -import numpy as np import torch -import torch.multiprocessing as mp from torch.utils.data import DataLoader, Dataset from torch.utils.data.distributed import DistributedSampler import torch.distributed as dist @@ -17,17 +14,16 @@ import torch.nn as nn from torch.optim.optimizer import Optimizer +import lightning as L +from lightning.pytorch.cli import LightningCLI + import horovod.torch as hvd from ..components import Trainer, monitor_exec -from .utils import par_allgather_obj, clear_key from .types import ( Batch, Loss, LrScheduler, Metric ) -from .types import TorchDistributedStrategy as StrategyT -from ..loggers import LogMixin, Logger, ConsoleLogger -from ..utils import dynamically_import_class -from ..cluster import ClusterEnvironment +from ..loggers import LogMixin, Logger from .reproducibility import seed_worker, set_seed from .distributed import ( TorchDistributedStrategy, @@ -37,6 +33,11 @@ NonDistributedStrategy, distributed_resources_available ) +from ..utils import load_yaml +from .mlflow import ( + init_lightning_mlflow, + teardown_lightning_mlflow +) class Config: @@ -513,11 +514,54 @@ def test_epoch(self): # TODO: implement test epoch raise NotImplementedError() - def save_state(self): - return super().save_state() - def load_state(self): - return super().load_state() +class TorchLightningTrainer(Trainer): + """Generic trainer for torch Lightning workflows. + + Args: + config (Union[Dict, str]): (path to a) Lightning configuration + https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html + mlflow_saved_model (str, optional): name of the model created in + MLFlow. Defaults to 'my_model'. + """ + + def __init__( + self, + config: Union[Dict, str], + mlflow_saved_model: str = 'my_model' + ): + self.save_parameters(**self.locals2params(locals())) + super().__init__() + if isinstance(config, str) and os.path.isfile(config): + # Load from YAML + config = load_yaml(config) + self.conf = config + self.mlflow_saved_model = mlflow_saved_model + + @monitor_exec + def execute(self) -> Any: + init_lightning_mlflow( + self.conf, + tmp_dir='/tmp', + registered_model_name=self.mlflow_saved_model + ) + old_argv = sys.argv + sys.argv = ['some_script_placeholder.py'] + cli = LightningCLI( + args=self.conf, + model_class=L.LightningModule, + datamodule_class=L.LightningDataModule, + run=False, + save_config_kwargs={ + "overwrite": True, + "config_filename": "pl-training.yml", + }, + subclass_mode_model=True, + subclass_mode_data=True, + ) + sys.argv = old_argv + cli.trainer.fit(cli.model, datamodule=cli.datamodule) + teardown_lightning_mlflow() def preproc_dataloader(dataloader: DataLoader, gwsize, grank): @@ -593,841 +637,3 @@ def dist_train( dist.barrier() dist.destroy_process_group() return dist_train - - -class TorchTrainerMG(Trainer, LogMixin): - """ - Torch trainer for optionally distributed data-parallel (DDP) workload. - Multi-GPU distribution. - - Args: - model (nn.Module): neural network instance. - loss (Loss): torch loss function instance. - optimizer_class (str): path to optimizer class - (e.g., 'torch.optim.SGD') - optimizer_kwargs (Optional[Dict], optional): optimizer constructor - arguments (except from parameters). Defaults to None. - lr_scheduler_class (Optional[str], optional): path to learning - rate scheduler class. Defaults to None. - lr_scheduler_kwargs (Optional[Dict], optional): constructor arguments - of the learning rate scheduler, except for the optimizer. - Defaults to None. - train_dataloader_class (str, optional): train dataloader class path. - Defaults to 'torch.utils.data.DataLoader'. - train_dataloader_kwargs (Optional[Dict], optional): constructor - arguments of the train dataloader, except for the dataset - instance. Defaults to None. - validation_dataloader_class (str, optional): validation dataloader - class path. Defaults to 'torch.utils.data.DataLoader'. - validation_dataloader_kwargs (Optional[Dict], optional): constructor - arguments of the validation dataloader, except for the dataset - instance. If None, it replicates `train_dataloader_kwargs`. - Defaults to None. - epochs (int, optional): number of training epochs. Defaults to 1. - strategy (Optional[TorchDistributedStrategy], optional): distributed - strategy. Defaults to StrategyT.NONE.value. - backend (TorchDistributedBackend, optional): computing backend. - Defaults to BackendT.NCCL.value. - shuffle_dataset (bool, optional): whether shuffle dataset before - sampling batches from dataloader. Defaults to False. - use_cuda (bool, optional): whether to use GPU. Defaults to True. - benchrun (bool, optional): sets up a debug run. Defaults to False. - testrun (bool, optional): deterministic training seeding everything. - Defaults to False. - seed (Optional[int], optional): random seed. Defaults to None. - logger (Optional[List[Logger]], optional): logger. Defaults to None. - checkpoint_every (int, optional): how often (epochs) to checkpoint the - best model. Defaults to 10. - cluster (Optional[ClusterEnvironment], optional): cluster environment - object describing the context in which the trainer is executed. - Defaults to None. - train_metrics (Optional[Dict[str, Metric]], optional): - list of metrics computed in the training step on the predictions. - It's a dictionary with the form - ``{'metric_unique_name': CallableMetric}``. Defaults to None. - validation_metrics (Optional[Dict[str, Metric]], optional): same - as ``training_metrics``. If not given, it mirrors the training - metrics. Defaults to None. - - Raises: - RuntimeError: When trying to use DDP without CUDA support. - NotImplementedError: when trying to use a strategy different from the - ones provided by TorchDistributedStrategy. - """ - - model: nn.Module = None - loss: Loss = None - optimizer: Optimizer = None - lr_scheduler = None - _strategy: StrategyT = StrategyT.NONE.value - train_dataset: Dataset - validation_dataset: Dataset - train_dataloader: DataLoader = None - validation_dataloader: DataLoader = None - epoch_idx: int = 0 - train_glob_step: int = 0 - validation_glob_step: int = 0 - train_metrics: Dict[str, Metric] - validation_metrics: Dict[str, Metric] - - def __init__( - self, - model: nn.Module, - loss: Loss, - optimizer_class: str, - optimizer_kwargs: Optional[Dict] = None, - lr_scheduler_class: Optional[str] = None, - lr_scheduler_kwargs: Optional[Dict] = None, - train_dataloader_class: str = 'torch.utils.data.DataLoader', - train_dataloader_kwargs: Optional[Dict] = None, - validation_dataloader_class: str = 'torch.utils.data.DataLoader', - validation_dataloader_kwargs: Optional[Dict] = None, - epochs: int = 1, - strategy: str = StrategyT.NONE.value, - benchrun: bool = False, - testrun: bool = False, - seed: Optional[int] = None, - logger: Optional[List[Logger]] = None, - checkpoint_every: int = 10, - cluster: Optional[ClusterEnvironment] = None, - train_metrics: Optional[Dict[str, Metric]] = None, - validation_metrics: Optional[Dict[str, Metric]] = None - ) -> None: - """Sets up the distributed backend and loggers. - Makes the model a DDP model. - """ - super().__init__() - self.save_parameters(**self.locals2params(locals())) - self.model = model - self.loss = loss - self.epochs = epochs - self.testrun = testrun - self.seed = seed - self.strategy = strategy - self.benchrun = benchrun - self.cluster = cluster - # Checkpoint every n epochs - self.checkpoint_every = checkpoint_every - - # Train and validation dataloaders - self.train_dataloader_class = dynamically_import_class( - train_dataloader_class - ) - self.validation_dataloader_class = dynamically_import_class( - validation_dataloader_class - ) - train_dataloader_kwargs = ( - train_dataloader_kwargs - if train_dataloader_kwargs is not None else {} - ) - self.train_dataloader_kwargs = clear_key( - train_dataloader_kwargs, 'train_dataloader_kwargs', 'dataset' - ) - # If validation_dataloader_kwargs is not given, - # copy train_dataloader_kwargs - validation_dataloader_kwargs = ( - validation_dataloader_kwargs if validation_dataloader_kwargs - is not None else train_dataloader_kwargs - ) - self.validation_dataloader_kwargs = clear_key( - validation_dataloader_kwargs, 'validation_dataloader_kwargs', - 'dataset' - ) - - # Optimizer and scheduler - optim_class = dynamically_import_class(optimizer_class) - optimizer_kwargs = ( - optimizer_kwargs if optimizer_kwargs is not None else {} - ) - optimizer_kwargs = clear_key( - optimizer_kwargs, 'optimizer_kwargs', 'parameters' - ) - self.optimizer: Optimizer = optim_class( - self.model.parameters(), **optimizer_kwargs - ) - if lr_scheduler_class is not None: - scheduler_class = dynamically_import_class(lr_scheduler_class) - lr_scheduler_kwargs = ( - lr_scheduler_kwargs if lr_scheduler_kwargs is not None else {} - ) - lr_scheduler_kwargs = clear_key( - lr_scheduler_kwargs, 'lr_scheduler_kwargs', 'optimizer' - ) - self.lr_scheduler: LrScheduler = scheduler_class( - self.optimizer, **lr_scheduler_kwargs - ) - - # Loggers - self.logger = logger if logger is not None else ConsoleLogger() - - # Metrics - self.train_metrics = ( - {} if train_metrics is None else train_metrics - ) - self.validation_metrics = ( - self.train_metrics if validation_metrics is None - else validation_metrics - ) - - @property - def strategy(self) -> Optional[str]: - return self._strategy - - @strategy.setter - def strategy(self, strategy_name) -> None: - if strategy_name not in StrategyT: - raise ValueError( - "Unrecognized 'strategy' field. Allowed values " - f"are: {StrategyT.list()}. Received '{strategy_name}'") - self._strategy = strategy_name - - @property - def global_step(self) -> int: - return self.train_glob_step + self.validation_glob_step - - def set_seed(self, seed: Optional[int] = None): - """Deterministic operations for reproducibility. - Sets the random seed. - - Args: - seed (Optional[int], optional): if not None, overrides - `self.seed`. Defaults to None. - """ - seed = seed if seed is not None else self.seed - np.random.seed(seed) - self.torch_rng = torch.Generator() - if seed is not None: - torch.manual_seed(seed) - self.torch_rng.manual_seed(seed) - if self.cluster.is_cuda_available(): - torch.cuda.manual_seed(seed) - - @monitor_exec - def execute( - self, - train_dataset: Dataset, - validation_dataset: Dataset, - model: nn.Module = None, - optimizer: Optimizer = None, - lr_scheduler: LrScheduler = None, - ) -> Any: - self.train_dataset = train_dataset - self.validation_dataset = validation_dataset - - # Update parameters passed for "interactive" use - if model is not None: - self.model = model - if optimizer is not None: - self.optimizer = optimizer - if lr_scheduler is not None: - self.lr_scheduler = lr_scheduler - - # Start training - if self.cluster.distributed: - # Make training distributed - result = mp.spawn(self._train, nprocs=self.cluster.ngpus_per_node) - else: - result = self._train(0) - - # Return value compliant with Executable.execute format - return result - - def _train( - self, - worker_id: int - ): - # Each worker has a different deterministic seed - # Here, 'worker' = replica of the training function - worker_seed = ( - self.seed + worker_id if self.seed is not None else self.seed - ) - self.set_seed(worker_seed) - - # Instantiate dataloaders - self.train_dataloader = self._instantiate_dataloader( - dataloader_class=self.train_dataloader_class, - dataset=self.train_dataset, - init_kwargs=self.train_dataloader_kwargs - ) - if self.validation_dataset is not None: - self.validation_dataloader = self._instantiate_dataloader( - dataloader_class=self.validation_dataloader_class, - dataset=self.validation_dataset, - init_kwargs=self.validation_dataloader_kwargs - ) - - # Launch actual training: - - # Single worker case - if not self.cluster.distributed: - with self.cluster.init_dist_gpu(worker_id) as device: - self.device: torch.device = device - self.model = self.model.to(self.device) - self.setup_logger() - self._setup_metrics() - try: - train_result = self.train() - except Exception as exc: - print(exc) - raise exc - finally: - print("INFO: Training ended") - self.destroy_logger() - train_result = None - return train_result - - # Init / connect to distributed backend - with self.cluster.init_dist_gpu(worker_id) as device: - self.device: torch.device = device - self._distribute_model() - self.setup_logger() - self._setup_metrics() - try: - train_result = self.train() - except Exception as exc: - print(exc) - raise exc - finally: - print("INFO: Training ended") - self.destroy_logger() - train_result = None - return train_result - - def _instantiate_dataloader( - self, - dataloader_class: Type, - dataset: Dataset, - init_kwargs: Dict - ) -> DataLoader: - """Make dataloader distributed if using distributed training strategy. - - Args: - dataloader_class (Type): some torch DataLoader type. - dataset (Dataset): torch dataset instance. - init_kwargs (Dict): constructor args. - """ - init_kwargs['generator'] = init_kwargs.get( - 'generator', self.torch_rng - ) - init_kwargs['worker_init_fn'] = init_kwargs.get( - 'worker_init_fn', seed_worker - ) - - if self.strategy == StrategyT.DDP.value and self.cluster.distributed: - sampler = DistributedSampler( - dataset=dataset, - num_replicas=self.cluster.global_world_size, - rank=self.cluster.global_rank, - shuffle=init_kwargs.get( - 'shuffle', False - ) - ) - # Overwrite existing sampler, if given. - # TODO: improve using wrapper: - # https://discuss.pytorch.org/t/how-to-use-my-own-sampler-when-i-already-use-distributedsampler/62143?page=2 - init_kwargs['sampler'] = sampler - if init_kwargs.get('shuffle') is not None: - # sampler option is mutually exclusive with shuffle - del init_kwargs['shuffle'] - - return dataloader_class(dataset, **init_kwargs) - - def _setup_metrics(self): - for m_name, metric in self.train_metrics.items(): - self.train_metrics[m_name] = metric.to(self.device) - for m_name, metric in self.validation_metrics.items(): - self.validation_metrics[m_name] = metric.to(self.device) - - def _distribute_model(self): - if self.cluster.distributed: - # Distribute model - self.model = self.model.to(self.device) - if self.strategy == StrategyT.NONE.value: - print( - "WARNING: A GPU cluster is available but no distributed " - "strategy was given... Falling back to single worker...") - if not self.cluster.is_main_worker(): - # Use only GPU:0 for single worker - sys.exit(0) - elif self.strategy == StrategyT.DDP.value: - self.model = DDP( - self.model, - device_ids=[self.device.index], - output_device=self.device - ) - else: - raise NotImplementedError("Only DDP strategy is implemented.") - else: - raise RuntimeError( - "Trying to distribute a model when a " - "distributed cluster is not available." - ) - - def setup_logger(self): - if self.cluster.is_main_worker(): - # Only setup loggers on main worker - if isinstance(self.logger, list): - for logger in self.logger: - logger.create_logger_context() - elif isinstance(self.logger, Logger): - self.logger.create_logger_context() - else: - raise TypeError( - "Unrecognized self.logger. Allowed types are 'list' and " - f"'Logger'. Received {type(self.logger)}" - ) - else: - self.logger = [] - - def destroy_logger(self): - if self.cluster.is_main_worker(): - if isinstance(self.logger, list): - for logger in self.logger: - logger.destroy_logger_context() - elif isinstance(self.logger, Logger): - self.logger.destroy_logger_context() - else: - raise TypeError( - "Unrecognized self.logger. Allowed types are 'list' and " - f"'Logger'. Received {type(self.logger)}" - ) - - def log( - self, - item: Union[Any, List[Any]], - identifier: Union[str, List[str]], - kind: str = 'metric', - step: Optional[int] = None, - batch_idx: Optional[int] = None, - every_worker: bool = False, - **kwargs - ) -> None: - if self.cluster.is_main_worker() or every_worker: - # Only log on main worker if not specified otherwise - if isinstance(self.logger, list): - for logger in self.logger: - logger.log( - item=item, - identifier=identifier, - kind=kind, - step=step, - batch_idx=batch_idx, - **kwargs - ) - elif isinstance(self.logger, Logger): - self.logger.log( - item=item, - identifier=identifier, - kind=kind, - step=step, - batch_idx=batch_idx, - **kwargs - ) - else: - raise TypeError( - "Unrecognized self.logger. Allowed types are 'list' and " - f"'Logger'. Received {type(self.logger)}" - ) - - def compute_metrics( - self, - metrics: Dict[str, Metric], - true: Batch, - pred: Batch, - logger_step: int, - batch_idx: Optional[int], - stage: str = 'train' - ) -> Dict[str, Any]: - """Compute and log metrics. - - Args: - metrics (Dict[str, Metric]): metrics dict. Can be - ``self.train_metrics`` or ``self.validation_metrics``. - true (Batch): true values. - pred (Batch): predicted values. - logger_step (int): global step to pass to the logger. - stage (str): 'train', 'validation'... - - Returns: - Dict[str, Any]: metric values. - """ - m_values = {} - for m_name, metric in metrics.items(): - # metric = metric.to(self.device) - m_val = metric(pred, true).detach().cpu().numpy() - self.log( - item=m_val, - identifier=f'{m_name}_{stage}', - kind='metric', - step=logger_step, - batch_idx=batch_idx - ) - m_values[m_name] = m_val - return m_values - - def training_step( - self, - batch: Batch, - batch_idx: int - ) -> Tuple[Loss, Dict[str, Any]]: - x, y = batch - x, y = x.to(self.device), y.to(self.device) - pred_y = self.model(x) - loss: Loss = self.loss(pred_y, y) - self.log( - item=loss.item(), - identifier='training_loss', - kind='metric', - step=self.train_glob_step, - batch_idx=batch_idx - ) - metrics: Dict[str, Any] = self.compute_metrics( - metrics=self.train_metrics, - true=y, - pred=pred_y, - logger_step=self.train_glob_step, - batch_idx=batch_idx, - stage='training' - ) - return loss, metrics - - def validation_step( - self, - batch: Batch, - batch_idx: int - ) -> Tuple[Loss, Dict[str, Any]]: - x, y = batch - x, y = x.to(self.device), y.to(self.device) - pred_y = self.model(x) - loss: Loss = self.loss(pred_y, y) - self.log( - item=loss.item(), - identifier='validation_loss', - kind='metric', - step=self.validation_glob_step, - batch_idx=batch_idx - ) - metrics: Dict[str, Any] = self.compute_metrics( - metrics=self.validation_metrics, - true=y, - pred=pred_y, - logger_step=self.validation_glob_step, - batch_idx=batch_idx, - stage='validation' - ) - return loss, metrics - - def training_epoch(self) -> Loss: - self.model.train() - train_losses = [] - for batch_idx, train_batch in enumerate(self.train_dataloader): - loss, metrics = self.training_step( - batch=train_batch, - batch_idx=batch_idx - ) - # TODO: merge and log batch metrics and loss into epoch metrics - self.optimizer.zero_grad() - loss.backward() - self.optimizer.step() - train_losses.append(loss) - # Important: update counter - self.train_glob_step += 1 - - # Aggregate and log losses - avg_loss = torch.mean(torch.stack(train_losses)).detach().cpu() - self.log( - item=avg_loss.item(), - identifier='training_loss_epoch', - kind='metric', - step=self.train_glob_step, - ) - return avg_loss - - def validation_epoch(self) -> Loss: - if self.validation_dataloader is not None: - self.model.eval() - validation_losses = [] - for batch_idx, val_batch \ - in enumerate(self.validation_dataloader): - # TODO: merge and log batch metrics and loss into epoch metrics - loss, metrics = self.validation_step( - batch=val_batch, - batch_idx=batch_idx - ) - validation_losses.append(loss) - # Important: update counter - self.validation_glob_step += 1 - - # Aggregate and log losses - avg_loss = torch.mean( - torch.stack(validation_losses) - ).detach().cpu() - self.log( - item=avg_loss.item(), - identifier='validation_loss_epoch', - kind='metric', - step=self.validation_glob_step, - ) - return avg_loss - - def train(self): - - if self.optimizer is None: - raise ValueError("Undefined optimizer!") - - if self.loss is None: - raise ValueError("Undefined loss function!") - - st = time.time() - - # Resume state - self.start_epoch = 1 - self.best_loss = np.Inf - self.load_state() - - # start training/testing loop - if self.cluster.is_main_worker(): - print(f'TIMER: broadcast: {time.time()-st}s') - print('DEBUG: start training') - print('-'*56) - - ############################## - # Start training: run epochs # - ############################## - - et = time.time() - for self.epoch_idx in range(self.start_epoch, self.epochs + 1): - lt = time.time() - - ####################################################### - # Perform one training epoch and one validation epoch # - ####################################################### - - if self.benchrun and self.epoch_idx == self.epochs: - # TODO: move profiler into cluster environment - # profiling (done on last epoch - slower!) - with torch.autograd.profiler.profile( - use_cuda=self.cluster.is_cuda_available(), - profile_memory=True - ) as prof: - train_loss = self.training_epoch() - else: - train_loss = self.training_epoch() - val_loss = self.validation_epoch() - - ##################################### - # Save checkpoint if model improved # - ##################################### - - ref_loss = val_loss if val_loss is not None else train_loss - is_best = ref_loss < self.best_loss - if (self.epoch_idx % self.checkpoint_every == 0 - and not self.benchrun): - self.save_state( - loss_val=ref_loss, - is_best=is_best - ) - self.best_loss = min(ref_loss, self.best_loss) - - ########################### - # End of epoch operations # - ########################### - - # save first epoch timer - if self.epoch_idx == self.start_epoch: - first_ep_t = time.time()-lt - - # Final epoch - if self.epoch_idx + 1 == self.epochs: - self.train_dataloader.last_epoch = True - self.validation_dataloader.last_epoch = True - - if self.cluster.is_main_worker(): - print(f'TIMER: epoch time: {time.time()-lt}s') - if self.benchrun and self.epoch_idx == self.epochs: - print('-'*56) - print('benchmark of last epoch:') - what1 = ( - 'cuda' if self.cluster.is_cuda_available() else 'cpu' - ) - print( - prof.key_averages().table( - sort_by='self_'+str(what1)+'_time_total' - ) - ) - - ########################## - # Training has completed # - ########################## - - # save final state - if not self.benchrun: - self.save_state( - loss_val=ref_loss, - is_best=is_best - ) - if self.cluster.is_cuda_available() and self.cluster.distributed: - dist.barrier() - - ######################## - # Print training stats # - ######################## - - if self.cluster.is_main_worker(): - print('-'*56) - print('training results:') - print(f'TIMER: first epoch time: {first_ep_t}s') - print(f'TIMER: last epoch time: {time.time()-lt}s') - print( - f'TIMER: average epoch time: {(time.time()-et)/self.epochs}s') - print(f'TIMER: total epoch time: {time.time()-et}s') - if self.epoch_idx > 1: - print( - f'TIMER: total epoch-1 time: {time.time()-et-first_ep_t}s' - ) - print( - 'TIMER: average epoch-1 time: ' - f'{(time.time()-et-first_ep_t)/(self.epochs-1)}s') - if self.benchrun: - print( - f'TIMER: total epoch-2 time: {lt-first_ep_t}s') - print('TIMER: average epoch-2 time: ' - f'{(lt-first_ep_t)/(self.epochs-2)}s') - mem = int(torch.cuda.memory_reserved( - self.cluster.local_rank)/1024/1024) - print( - f'memory req: {mem} MB' - if self.cluster.is_cuda_available() - and self.cluster.distributed else 'memory req: - MB' - ) - if self.cluster.is_cuda_available(): - print( - f'memory summary:\n {torch.cuda.memory_summary(0)}') - - if self.cluster.is_main_worker(): - print(f'TIMER: final time: {time.time()-st} s') - - def save_state(self, loss_val: Any, is_best: bool): - """Save training state.""" - res_name = 'checkpoint.pth.tar' - rt = time.time() - - if (self.cluster.is_cuda_available() and self.cluster.distributed): - # find if is_best happened in any worker - is_best_m = par_allgather_obj( - is_best, self.cluster.global_world_size - ) - if any(is_best_m): - # TODO: is this strategy really good? Checkpointing when - # at least one worker improves the loss on their local - # data split is prone to overfitting, especially when - # the dataset in unbalanced! - - # find which rank is_best happened - select first rank - # if multiple - best_rank = np.where(np.array(is_best_m))[0][0] - if self.cluster.global_rank == best_rank: - self._save_sate( - epoch=self.epoch_idx+1, - loss_val=loss_val, - save_path=res_name - ) - print( - f'DEBUG: state in {self.cluster.global_rank} is ' - f'saved on epoch:{self.epoch_idx} ' - f'in {time.time()-rt} s') - else: - self._save_sate( - epoch=self.epoch_idx+1, - loss_val=loss_val, - save_path=res_name - ) - print( - f'DEBUG: state in {self.cluster.global_rank} ' - f'is saved on epoch:{self.epoch_idx} in {time.time()-rt} s') - - def _save_sate( - self, - epoch: int, - loss_val: Any, - save_path: str - ): - """Save state on disk.""" - sched = ( - self.lr_scheduler.state_dict() - if self.lr_scheduler is not None else None - ) - state = { - 'epoch': epoch, - 'state_dict': self.model.state_dict(), - 'best_loss': loss_val, - 'optimizer': self.optimizer.state_dict(), - 'lr_scheduler': sched - } - self.log( - item=state, - identifier=save_path, - kind='torch', - epoch_step=self.epoch_idx, - batch_step=0 - ) - - def load_state(self): - """Load training state.""" - res_name = 'checkpoint.pth.tar' - if os.path.isfile(res_name) and not self.benchrun: - try: - if (self.cluster.is_cuda_available() - and self.cluster.distributed): - dist.barrier() - # Map model to be loaded to specified single gpu. - # loc = ( - # {'cuda:%d' % 0: 'cuda:%d' % self.cluster.local_rank} - # if self.cluster.is_cuda_available() - # else {'cpu:%d' % 0: 'cpu:%d' % self.cluster.local_rank} - # ) - # checkpoint = torch.load(res_name, map_location=loc) - checkpoint = torch.load( - res_name, map_location=self.device - ) - else: - checkpoint = torch.load(res_name, map_location='cpu') - self.start_epoch = checkpoint['epoch'] - self.best_loss = checkpoint['best_loss'] - self.model.load_state_dict(checkpoint['state_dict']) - self.optimizer.load_state_dict(checkpoint['optimizer']) - if self.lr_scheduler is not None: - self.lr_scheduler.load_state_dict( - checkpoint['lr_scheduler'] - ) - if self.cluster.is_cuda_available(): - if self.cluster.is_main_worker(): - print( - f'WARNING: restarting from {self.start_epoch} ' - 'epoch') - else: - print( - f'WARNING: restarting from {self.start_epoch} epoch') - except Exception: - if self.cluster.is_cuda_available(): - if self.cluster.is_main_worker(): - print( - 'restart file cannot be loaded, restarting!') - else: - print( - 'WARNING: restart file cannot be loaded, restarting!') - - if self.start_epoch >= self.epochs + 1: - if self.cluster.is_cuda_available() and self.cluster.distributed: - if self.cluster.is_main_worker(): - print( - 'WARNING: given epochs are less than the ' - 'one in the restart file!') - print('WARNING: SYS.EXIT is issued') - sys.exit() - else: - print( - 'WARNING: given epochs are less than the ' - 'one in the restart file!') - print('WARNING: SYS.EXIT is issued') - sys.exit() diff --git a/src/itwinai/torch/utils.py b/src/itwinai/torch/utils.py deleted file mode 100644 index 99bcd246..00000000 --- a/src/itwinai/torch/utils.py +++ /dev/null @@ -1,84 +0,0 @@ -from typing import Hashable, Dict -import time -import numpy as np -import random - -import torch -import torch.distributed as dist - - -def save_state( - epoch, distrib_model, loss_val, optimizer, res_name, grank, gwsize, - is_best, distributed: bool = True -): - """Save training state""" - rt = time.time() - # find if is_best happened in any worker - if torch.cuda.is_available() and distributed: - is_best_m = par_allgather_obj(is_best, gwsize) - - if torch.cuda.is_available() and distributed: - if any(is_best_m): - # find which rank is_best happened - select first rank if multiple - is_best_rank = np.where(np.array(is_best_m))[0][0] - - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_loss': loss_val, - 'optimizer': optimizer.state_dict()} - - # write on worker with is_best - if grank == is_best_rank: - torch.save(state, './'+res_name) - print(f'DEBUG: state in {grank} is saved on ' - f'epoch:{epoch} in {time.time()-rt} s') - else: - # collect state - state = {'epoch': epoch + 1, - 'state_dict': distrib_model.state_dict(), - 'best_loss': loss_val, - 'optimizer': optimizer.state_dict()} - - torch.save(state, './'+res_name) - print( - f'DEBUG: state in {grank} is saved on epoch:{epoch} ' - f'in {time.time()-rt} s') - - -def seed_worker(worker_id): - """deterministic dataloader""" - worker_seed = torch.initial_seed() % 2**32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def par_allgather_obj(obj, gwsize): - """gathers any object from the whole group in a list (to all workers)""" - res = [None]*gwsize - dist.all_gather_object(res, obj, group=None) - # print(f'ALLGATHER: {res}') - return res - - -def clear_key( - my_dict: Dict, - dict_name: str, - key: Hashable, - complain: bool = True -) -> Dict: - """Remove key from dictionary if present and complain. - - Args: - my_dict (Dict): Dictionary. - dict_name (str): name of the dictionary. - key (Hashable): Key to remove. - """ - if key in my_dict: - if complain: - print( - f"Field '{key}' should not be present " - f"in dictionary '{dict_name}'" - ) - del my_dict[key] - return my_dict diff --git a/src/itwinai/utils.py b/src/itwinai/utils.py index 52279aeb..280de5d3 100644 --- a/src/itwinai/utils.py +++ b/src/itwinai/utils.py @@ -1,14 +1,11 @@ """ Utilities for itwinai package. """ -from typing import Dict, Type, Callable, Tuple -import os +from typing import Dict, Type, Callable, Tuple, Hashable import sys import inspect from collections.abc import MutableMapping import yaml -from omegaconf import OmegaConf -from omegaconf.dictconfig import DictConfig def load_yaml(path: str) -> Dict: @@ -32,32 +29,6 @@ def load_yaml(path: str) -> Dict: return loaded_config -def load_yaml_with_deps(path: str) -> DictConfig: - """ - Load YAML file with OmegaConf and merge it with its dependencies - specified in the `conf-dependencies` field. - Assume that the dependencies live in the same folder of the - YAML file which is importing them. - - Args: - path (str): path to YAML file. - - Raises: - exc: yaml.YAMLError for loading/parsing errors. - - Returns: - DictConfig: nested representation of parsed YAML file. - """ - yaml_conf = load_yaml(path) - use_case_dir = os.path.dirname(path) - deps = [] - if yaml_conf.get("conf-dependencies"): - for dependency in yaml_conf["conf-dependencies"]: - deps.append(load_yaml(os.path.join(use_case_dir, dependency))) - - return OmegaConf.merge(yaml_conf, *deps) - - def dynamically_import_class(name: str) -> Type: """ Dynamically import class by module path. @@ -115,18 +86,6 @@ def flatten_dict( return dict(items) -# Parse (part of) YAML loaded in memory -def parse_pipe_config(yaml_file, parser): - with open(yaml_file, "r", encoding="utf-8") as f: - try: - config = yaml.safe_load(f) - except yaml.YAMLError as exc: - print(exc) - raise exc - - return parser.parse_object(config) - - class SignatureInspector: """Provides the functionalities to inspect the signature of a function or a method. @@ -181,3 +140,42 @@ def max_params_num(self) -> int: if self.has_kwargs or self.has_varargs: return self.INFTY return len(self.func_params) + + +def str_to_slice(interval: str) -> slice: + import re + # TODO: add support for slices starting with empty index + # e.g., :20:3 + if not re.match(r"\d+(:\d+)?(:\d+)?", interval): + raise ValueError( + f"Received invalid interval for slice: '{interval}'" + ) + if ":" in interval: + return slice(*map( + lambda x: int(x.strip()) if x.strip() else None, + interval.split(':') + )) + return int(interval) + + +def clear_key( + my_dict: Dict, + dict_name: str, + key: Hashable, + complain: bool = True +) -> Dict: + """Remove key from dictionary if present and complain. + + Args: + my_dict (Dict): Dictionary. + dict_name (str): name of the dictionary. + key (Hashable): Key to remove. + """ + if key in my_dict: + if complain: + print( + f"Field '{key}' should not be present " + f"in dictionary '{dict_name}'" + ) + del my_dict[key] + return my_dict diff --git a/tests/components/test_components.py b/tests/components/test_components.py index 3ec55453..890188d7 100644 --- a/tests/components/test_components.py +++ b/tests/components/test_components.py @@ -74,11 +74,6 @@ class MyTrainer(Trainer): def execute(self): ... - def save_state(self): - ... - - def load_state(self): - ... comp = MyTrainer() with pytest.raises(SerializationError) as exc_info: dict_serializ = comp.to_dict() diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index 26b57cb0..00000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Test itwinai CLI. -""" - -import subprocess -import pytest - - -@pytest.mark.skip(reason="cli deprecated") -def test_datasets_viz(): - """ - Test visualization of use case's dataset registry. - """ - USE_CASE = "use-cases/mnist/" - subprocess.run( - f"itwinai datasets --use-case {USE_CASE}".split(), check=True) - - -@pytest.mark.skip(reason="cli deprecated") -def test_workflows_viz(): - """ - Test visualization of use case's workflows. - """ - USE_CASE = "./use-cases/mnist/" - subprocess.run( - f"itwinai workflows --use-case {USE_CASE}".split(), check=True) diff --git a/tests/use-cases/conftest.py b/tests/use-cases/conftest.py index d080e0a8..eccdc208 100644 --- a/tests/use-cases/conftest.py +++ b/tests/use-cases/conftest.py @@ -3,8 +3,6 @@ import pytest import subprocess -pytest.TORCH_PREFIX = './.venv-pytorch' -pytest.TF_PREFIX = './.venv-tf' FNAMES = [ 'pipeline.yaml', @@ -12,6 +10,34 @@ ] +@pytest.fixture +def torch_env() -> str: + """ + Return absolute path to torch virtual environment parsing it + from environment variables, if provided, otherwise fall back + to ``./.venv-pytorch``. + """ + if os.environ.get('TORCH_ENV') is None: + env_p = './.venv-pytorch' + else: + env_p = os.environ.get('TORCH_ENV') + return os.path.join(os.getcwd(), env_p) + + +@pytest.fixture +def tf_env() -> str: + """ + Return absolute path to tensorflow virtual environment parsing it + from environment variables, if provided, otherwise fall back + to ``./.venv-tf``. + """ + if os.environ.get('TF_ENV') is None: + env_p = './.venv-tf' + else: + env_p = os.environ.get('TF_ENV') + return os.path.join(os.getcwd(), env_p) + + @pytest.fixture def check_folder_structure() -> Callable: """ @@ -31,7 +57,6 @@ def install_requirements() -> Callable: def _install_reqs(root: str, env_prefix: str): req_path = os.path.join(root, 'requirements.txt') if os.path.isfile(req_path): - cmd = (f"micromamba run -p {env_prefix} " - f"pip install -r {req_path}") + cmd = f"{env_prefix}/bin/pip install -r {req_path}" subprocess.run(cmd.split(), check=True) return _install_reqs diff --git a/tests/use-cases/test_3dgan.py b/tests/use-cases/test_3dgan.py index c57e21ff..c40c584c 100644 --- a/tests/use-cases/test_3dgan.py +++ b/tests/use-cases/test_3dgan.py @@ -3,7 +3,7 @@ """ import pytest import subprocess -# from itwinai.utils import dynamically_import_class +import os CERN_PATH = "use-cases/3dgan" CKPT_PATH = "3dgan-inference.pth" @@ -12,48 +12,57 @@ @pytest.fixture(scope="module") def fake_model_checkpoint() -> None: """ - Create a dummy model checkpoint for inference. + Create a dummy model checkpoint for inference + under ``CERN_PATH`` location. """ import sys import torch - sys.path.append(CERN_PATH) + curr_path = os.getcwd() + os.chdir(CERN_PATH) + sys.path.append(os.getcwd()) + from model import ThreeDGAN - # ThreeDGAN = dynamically_import_class('model.ThreeDGAN') net = ThreeDGAN() torch.save(net, CKPT_PATH) + sys.path.pop(sys.path.index(os.getcwd())) + os.chdir(curr_path) + +@pytest.mark.skip("deprecated") def test_structure_3dgan(check_folder_structure): """Test 3DGAN folder structure.""" check_folder_structure(CERN_PATH) @pytest.mark.functional -def test_3dgan_train(install_requirements): +def test_3dgan_train(torch_env, install_requirements): """ Test 3DGAN torch lightning trainer by running it end-to-end. """ - install_requirements(CERN_PATH, pytest.TORCH_PREFIX) - # cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} python " - # f"{CERN_PATH}/train.py -p {CERN_PATH}/pipeline.yaml") + install_requirements(CERN_PATH, torch_env) trainer_params = "pipeline.init_args.steps.training_step.init_args" - cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} itwinai exec-pipeline " - f"--config {CERN_PATH}/pipeline.yaml " + cmd = (f"{torch_env}/bin/itwinai exec-pipeline " + f"--config pipeline.yaml " f'-o {trainer_params}.config.trainer.accelerator=cpu ' f'-o {trainer_params}.config.trainer.strategy=auto ' ) - subprocess.run(cmd.split(), check=True) + subprocess.run(cmd.split(), check=True, cwd=CERN_PATH) @pytest.mark.functional -def test_3dgan_inference(install_requirements, fake_model_checkpoint): +def test_3dgan_inference( + torch_env, + install_requirements, + fake_model_checkpoint +): """ Test 3DGAN torch lightning trainer by running it end-to-end. """ - install_requirements(CERN_PATH, pytest.TORCH_PREFIX) - # cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} python " + install_requirements(CERN_PATH, torch_env) + # cmd = (f"micromamba run -p {torch_env} python " # f"{CERN_PATH}/train.py -p {CERN_PATH}/pipeline.yaml") - # cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} itwinai exec-pipeline " + # cmd = (f"micromamba run -p {torch_env} itwinai exec-pipeline " # f"--config {CERN_PATH}/inference-pipeline.yaml") getter_params = "pipeline.init_args.steps.dataloading_step.init_args" @@ -62,8 +71,8 @@ def test_3dgan_inference(install_requirements, fake_model_checkpoint): data_params = trainer_params + ".config.data.init_args" saver_params = "pipeline.init_args.steps.saver_step.init_args" cmd = ( - 'itwinai exec-pipeline ' - '--config use-cases/3dgan/inference-pipeline.yaml ' + f'{torch_env}/bin/itwinai exec-pipeline ' + '--config inference-pipeline.yaml ' f'-o {getter_params}.data_path=exp_data ' f'-o {trainer_params}.model.init_args.model_uri={CKPT_PATH} ' f'-o {trainer_params}.config.trainer.accelerator=cpu ' @@ -72,4 +81,4 @@ def test_3dgan_inference(install_requirements, fake_model_checkpoint): f'-o {data_params}.datapath=exp_data/*/*.h5 ' f'-o {saver_params}.save_dir=3dgan-generated-data ' ) - subprocess.run(cmd.split(), check=True) + subprocess.run(cmd.split(), check=True, cwd=CERN_PATH) diff --git a/tests/use-cases/test_cyclones.py b/tests/use-cases/test_cyclones.py index 1a5ebb3f..6b262d45 100644 --- a/tests/use-cases/test_cyclones.py +++ b/tests/use-cases/test_cyclones.py @@ -11,6 +11,7 @@ CYCLONES_PATH = "use-cases/cyclones" +@pytest.mark.skip("deprecated") def test_structure_cyclones(check_folder_structure): """Test cyclones folder structure.""" check_folder_structure(CYCLONES_PATH) @@ -18,11 +19,12 @@ def test_structure_cyclones(check_folder_structure): @pytest.mark.functional @pytest.mark.memory_heavy -def test_cyclones_train_tf(install_requirements): +def test_cyclones_train_tf(tf_env, install_requirements): """ Test Cyclones tensorflow trainer by running it end-to-end. """ - install_requirements(CYCLONES_PATH, pytest.TF_PREFIX) - cmd = (f"micromamba run -p {pytest.TF_PREFIX} python " - f"{CYCLONES_PATH}/train.py -p {CYCLONES_PATH}/pipeline.yaml") - subprocess.run(cmd.split(), check=True) + # TODO: create a small sample dataset for tests only + install_requirements(CYCLONES_PATH, tf_env) + cmd = (f"{tf_env}/bin/python train.py " + f"-p pipeline.yaml") + subprocess.run(cmd.split(), check=True, cwd=CYCLONES_PATH) diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index d32aab1c..fad3a110 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -6,73 +6,134 @@ """ import pytest +import os +import sys import subprocess +# from itwinai.cli import exec_pipeline TORCH_PATH = "use-cases/mnist/torch" LIGHTNING_PATH = "use-cases/mnist/torch-lightning" TF_PATH = "use-cases/mnist/tensorflow" +def mnist_torch_inference_files( + root: str = '.', + samples_path: str = 'mnist-sample-data/', + model_name: str = 'mnist-pre-trained.pth' +): + """Create sample dataset and fake model to test mnist + inference workflow. Assumes to be run from + the use case folder. + + Args: + root (str, optional): where to create the files. + Defaults to '.'. + """ + from dataloader import InferenceMNIST + sample = os.path.join(root, samples_path) + InferenceMNIST.generate_jpg_sample(sample, 10) + + import torch + from model import Net + dummy_nn = Net() + mdl_ckpt = os.path.join(root, model_name) + torch.save(dummy_nn, mdl_ckpt) + + +@pytest.mark.skip(reason="structure changed") def test_structure_mnist_torch(check_folder_structure): """Test MNIST folder structure for torch native trainer.""" check_folder_structure(TORCH_PATH) +@pytest.mark.skip(reason="structure changed") def test_structure_mnist_lightning(check_folder_structure): """Test MNIST folder structure for torch lightning trainer.""" check_folder_structure(LIGHTNING_PATH) +@pytest.mark.skip(reason="structure changed") def test_structure_mnist_tf(check_folder_structure): """Test MNIST folder structure for tensorflow trainer.""" check_folder_structure(TF_PATH) @pytest.mark.functional -def test_mnist_train_torch(install_requirements): +def test_mnist_train_torch(torch_env, install_requirements): """ Test MNIST torch native trainer by running it end-to-end. + + To set the torch env path set the ``TORCH_ENV`` env variable: + + >>> export TORCH_ENV="my_env" """ - install_requirements(TORCH_PATH, pytest.TORCH_PREFIX) - cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} python " - f"{TORCH_PATH}/train.py -p {TORCH_PATH}/pipeline.yaml") - subprocess.run(cmd.split(), check=True) + install_requirements(TORCH_PATH, torch_env) + cmd = (f"{torch_env}/bin/itwinai exec-pipeline " + f"--config config.yaml --pipe-key training_pipeline") + subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) @pytest.mark.functional -def test_mnist_train_lightning(install_requirements): +def test_mnist_inference_torch(torch_env, install_requirements): """ - Test MNIST torch lightning trainer by running it end-to-end. + Test MNIST torch native inference by running it end-to-end. + + To set the torch env path set the ``TORCH_ENV`` env variable: + + >>> export TORCH_ENV="my_env" """ - install_requirements(TORCH_PATH, pytest.TORCH_PREFIX) - cmd = (f"micromamba run -p {pytest.TORCH_PREFIX} python " - f"{LIGHTNING_PATH}/train.py -p {LIGHTNING_PATH}/pipeline.yaml") - subprocess.run(cmd.split(), check=True) + install_requirements(TORCH_PATH, torch_env) + + samples_path: str = 'mnist-sample-data/' + model_name: str = 'mnist-pre-trained.pth' + root_path = os.getcwd() + os.chdir(TORCH_PATH) + # sys.path.append(os.path.join(os.getcwd(), TORCH_PATH)) + sys.path.append(os.getcwd()) + try: + mnist_torch_inference_files( + samples_path=samples_path, + model_name=model_name + ) + # exec_pipeline( + # config='config.yaml', + # pipe_key='inference_pipeline', + # overrides_list=[ + # f"predictions_dir={samples_path}", + # f"inference_model_mlflow_uri={model_name}" + # ] + # ) + except Exception as e: + raise e + finally: + os.chdir(root_path) + sys.path.pop(sys.path.index(os.getcwd())) + cmd = (f"{torch_env}/bin/itwinai exec-pipeline " + f"--config config.yaml --pipe-key inference_pipeline") + subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) @pytest.mark.functional -def test_mnist_train_tf(install_requirements): +def test_mnist_train_torch_lightning(torch_env, install_requirements): """ - Test MNIST tensorflow trainer by running it end-to-end. + Test MNIST torch lightning trainer by running it end-to-end. + + To set the torch env path set the ``TORCH_ENV`` env variable: + + >>> export TORCH_ENV="my_env" """ - install_requirements(TF_PATH, pytest.TF_PREFIX) - cmd = (f"micromamba run -p {pytest.TF_PREFIX} python " - f"{TF_PATH}/train.py -p {TF_PATH}/pipeline.yaml") - subprocess.run(cmd.split(), check=True) + install_requirements(TORCH_PATH, torch_env) + cmd = (f"{torch_env}/bin/itwinai exec-pipeline " + f"--config config.yaml --pipe-key training_pipeline") + subprocess.run(cmd.split(), check=True, cwd=LIGHTNING_PATH) -@pytest.mark.skip(reason="workflow changed. Left as example") -@pytest.mark.integration -def test_mnist_train_legacy(): +@pytest.mark.functional +def test_mnist_train_tf(tf_env, install_requirements): """ - Test MNIST training workflow(s) by running it end-to-end. + Test MNIST tensorflow trainer by running it end-to-end. """ - workflows = [ - "./use-cases/mnist/torch/workflows/training-workflow.yml", - "./use-cases/mnist/tensorflow/workflows/training-workflow.yml", - ] - - for workflow in workflows: - cmd = f"micromamba run -p ./.venv python run-workflow.py -f {workflow}" - subprocess.run(cmd.split(), check=True) - subprocess.run(cmd.split() + ["--cwl"], check=True) + install_requirements(TF_PATH, tf_env) + cmd = (f"{tf_env}/bin/itwinai exec-pipeline " + f"--config pipeline.yaml --pipe-key pipeline") + subprocess.run(cmd.split(), check=True, cwd=TF_PATH) diff --git a/tutorials/ml-workflows/basic_components.py b/tutorials/ml-workflows/basic_components.py index 49e74180..1fca03d8 100644 --- a/tutorials/ml-workflows/basic_components.py +++ b/tutorials/ml-workflows/basic_components.py @@ -70,12 +70,6 @@ def execute( """ return train_set, vaild_set, test_set, "my_trained_model" - def save_state(self): - return super().save_state() - - def load_state(self): - return super().load_state() - class MySaver(Saver): @monitor_exec diff --git a/use-cases/3dgan/trainer.py b/use-cases/3dgan/trainer.py index 3bb5a1fd..8e022bc9 100644 --- a/use-cases/3dgan/trainer.py +++ b/use-cases/3dgan/trainer.py @@ -52,12 +52,6 @@ def execute(self) -> Any: cli.trainer.fit(cli.model, datamodule=cli.datamodule) teardown_lightning_mlflow() - def save_state(self): - return super().save_state() - - def load_state(self): - return super().load_state() - class LightningModelLoader(TorchModelLoader): """Loads a torch lightning model from somewhere. diff --git a/use-cases/cyclones/README.md b/use-cases/cyclones/README.md new file mode 100644 index 00000000..6b504fb0 --- /dev/null +++ b/use-cases/cyclones/README.md @@ -0,0 +1,12 @@ +# Tropical cyclone detection + +## Dataset + +If the automatic download from python does not work, try from the command line from +within the virtual environment: + +```bash +gdown https://drive.google.com/drive/folders/1TnmujO4T-8_j4bCxqNe5HEw9njJIIBQD -O data/tmp_data/trainval --folder +``` + +For more info visit the [gdown](https://github.com/wkentaro/gdown) repository. diff --git a/use-cases/cyclones/trainer.py b/use-cases/cyclones/trainer.py index 1c47819b..054f772b 100644 --- a/use-cases/cyclones/trainer.py +++ b/use-cases/cyclones/trainer.py @@ -155,9 +155,3 @@ def setup_config(self, config: Dict) -> None: if self.model_backup: self.best_model_name = join(self.model_backup, "best_model.h5") self.last_model_name = join(self.run_dir, "last_model.h5") - - def load_state(self): - return super().load_state() - - def save_state(self): - return super().save_state() diff --git a/use-cases/mnist/tensorflow/pipeline.yaml b/use-cases/mnist/tensorflow/pipeline.yaml index 9fced327..314f78b1 100644 --- a/use-cases/mnist/tensorflow/pipeline.yaml +++ b/use-cases/mnist/tensorflow/pipeline.yaml @@ -32,9 +32,9 @@ pipeline: strategy: class_path: tensorflow.python.distribute.mirrored_strategy.MirroredStrategy - logger: - - class_path: itwinai.loggers.ConsoleLogger - - class_path: itwinai.loggers.MLFlowLogger - init_args: - experiment_name: MNIST classifier - log_freq: batch + # logger: + # - class_path: itwinai.loggers.ConsoleLogger + # - class_path: itwinai.loggers.MLFlowLogger + # init_args: + # experiment_name: MNIST classifier + # log_freq: batch diff --git a/use-cases/mnist/tensorflow/trainer.py b/use-cases/mnist/tensorflow/trainer.py index 17ef19a5..435f79f4 100644 --- a/use-cases/mnist/tensorflow/trainer.py +++ b/use-cases/mnist/tensorflow/trainer.py @@ -35,9 +35,3 @@ def __init__( @monitor_exec def execute(self, train_dataset, validation_dataset) -> Any: return super().execute(train_dataset, validation_dataset) - - def load_state(self): - return super().load_state() - - def save_state(self): - return super().save_state() diff --git a/use-cases/mnist/torch-lightning/README.md b/use-cases/mnist/torch-lightning/README.md new file mode 100644 index 00000000..bd769c70 --- /dev/null +++ b/use-cases/mnist/torch-lightning/README.md @@ -0,0 +1,17 @@ +# Torch Lightning example on MNIST dataset + +## Training + +```bash +# Download dataset and exit: only run first step in the pipeline (index=0) +itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline --steps 0 + +# Run the whole training pipeline +itwinai exec-pipeline --config config.yaml --pipe-key training_pipeline +``` + +View training logs on MLFLow server (if activated from the configuration): + +```bash +mlflow ui --backend-store-uri mllogs/mlflow/ +``` diff --git a/use-cases/mnist/torch-lightning/pipeline.yaml b/use-cases/mnist/torch-lightning/config.yaml similarity index 96% rename from use-cases/mnist/torch-lightning/pipeline.yaml rename to use-cases/mnist/torch-lightning/config.yaml index cf754b2f..23fde03d 100644 --- a/use-cases/mnist/torch-lightning/pipeline.yaml +++ b/use-cases/mnist/torch-lightning/config.yaml @@ -1,4 +1,4 @@ -pipeline: +training_pipeline: class_path: itwinai.pipeline.Pipeline init_args: steps: @@ -6,7 +6,7 @@ pipeline: init_args: data_path: data/ - - class_path: trainer.LightningMNISTTrainer + - class_path: itwinai.torch.trainer.TorchLightningTrainer #trainer.LightningMNISTTrainer init_args: # Pytorch lightning config for training config: diff --git a/use-cases/mnist/torch-lightning/dataloader.py b/use-cases/mnist/torch-lightning/dataloader.py index 1f062fe5..b7e8d46e 100644 --- a/use-cases/mnist/torch-lightning/dataloader.py +++ b/use-cases/mnist/torch-lightning/dataloader.py @@ -31,7 +31,7 @@ def execute(self) -> None: self._downloader.setup(stage='predict') -class MNISTDataModule(L.LightningModule): +class MNISTDataModule(L.LightningDataModule): def __init__( self, data_path: str, diff --git a/use-cases/mnist/torch-lightning/train.py b/use-cases/mnist/torch-lightning/train.py deleted file mode 100644 index 97f53093..00000000 --- a/use-cases/mnist/torch-lightning/train.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Training pipeline. To run this script, use the following commands. - -On login node: - ->>> micromamba run -p ../../../.venv-pytorch/ \ - python train.py -p pipeline.yaml -d - -On compute nodes: - ->>> micromamba run -p ../../../.venv-pytorch/ \ - python train.py -p pipeline.yaml - -""" - -import argparse - -from itwinai.parser import ConfigParser - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-p", "--pipeline", type=str, required=True, - help='Configuration file to the pipeline to execute.' - ) - parser.add_argument( - '-d', '--download-only', - action=argparse.BooleanOptionalAction, - default=False, - help=('Whether to download only the dataset and exit execution ' - '(suggested on login nodes of HPC systems)') - ) - args = parser.parse_args() - - # Create parser for the pipeline - pipe_parser = ConfigParser(config=args.pipeline) - pipeline = pipe_parser.parse_pipeline() - - if args.download_only: - print('Downloading datasets and exiting...') - pipeline = pipeline[:1] - - pipeline.execute() diff --git a/use-cases/mnist/torch-lightning/trainer.py b/use-cases/mnist/torch-lightning/trainer.py deleted file mode 100644 index 128cf5c6..00000000 --- a/use-cases/mnist/torch-lightning/trainer.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Union, Dict, Any - -from itwinai.components import Trainer, monitor_exec -from itwinai.torch.models.mnist import MNISTModel -from dataloader import MNISTDataModule -from lightning.pytorch.cli import LightningCLI -from utils import load_yaml - - -class LightningMNISTTrainer(Trainer): - def __init__(self, config: Union[Dict, str]): - super().__init__() - self.save_parameters(**self.locals2params(locals())) - if isinstance(config, str) and os.path.isfile(config): - # Load from YAML - config = load_yaml(config) - self.conf = config - - @monitor_exec - def execute(self) -> Any: - cli = LightningCLI( - args=self.conf, - model_class=MNISTModel, - datamodule_class=MNISTDataModule, - run=False, - save_config_kwargs={ - "overwrite": True, - "config_filename": "pl-training.yml", - }, - subclass_mode_model=True, - subclass_mode_data=True, - ) - cli.trainer.fit(cli.model, datamodule=cli.datamodule) - - def save_state(self): - return super().save_state() - - def load_state(self): - return super().load_state() diff --git a/use-cases/mnist/torch/Dockerfile b/use-cases/mnist/torch/Dockerfile index dcc75225..5b96feb5 100644 --- a/use-cases/mnist/torch/Dockerfile +++ b/use-cases/mnist/torch/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.9.12 +# FROM python:3.9 +FROM nvcr.io/nvidia/pytorch:23.09-py3 WORKDIR /usr/src/app From 4dd16d029bb9b86058f9ec7de66a3d8422d1435c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:11:13 +0200 Subject: [PATCH 133/171] Tmp disable unused tests --- .github/workflows/check-links.yml | 2 +- .github/workflows/lint.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index c7d0737b..7253ac26 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -2,7 +2,7 @@ name: Check links on: - push: + # push: pull_request: jobs: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8eca0a3c..bb241d0d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,7 +2,7 @@ name: Lint on: - push: + # push: pull_request: jobs: From 9c03dfe0d0efc4c3a5fe592091e3ff05ff835a9c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:12:45 +0200 Subject: [PATCH 134/171] Update action --- .github/workflows/test-torch.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 834e2941..617acdf2 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -14,4 +14,6 @@ jobs: # You can use it as needed cd ${{ github.workspace }} && # Run your commands here - ls -la + ls -la && + pwd + && echo ${{ github.workspace }} From ea0c9b23c1a4203de88967635a0455fe1d77e7bc Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:18:09 +0200 Subject: [PATCH 135/171] Update action --- .github/workflows/test-torch.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 617acdf2..bea8e67e 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -8,6 +8,11 @@ jobs: image: pytorch/pytorch:latest # docker://{docker-image-name}:{tag} options: --volume ${{ github.workspace }}:${{ github.workspace }} steps: + - name: Cache Docker image layers + uses: actions/cache@v2 + with: + path: /var/lib/docker/image + key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - name: Run commands in container run: | # Now the repository code is available inside the Docker container @@ -15,5 +20,5 @@ jobs: cd ${{ github.workspace }} && # Run your commands here ls -la && - pwd - && echo ${{ github.workspace }} + pwd && + echo ${{ github.workspace }} From 70252ed7af4094af30b6c77128546975f6812b65 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:26:23 +0200 Subject: [PATCH 136/171] Update action --- .github/workflows/test-torch.yml | 4 +++- tests/use-cases/test_mnist.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index bea8e67e..26ecaf97 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -19,6 +19,8 @@ jobs: # You can use it as needed cd ${{ github.workspace }} && # Run your commands here - ls -la && + ls -la . && + ls -la ${{ github.workspace }} && + ls -la ${{ github.workspace }}/.. && pwd && echo ${{ github.workspace }} diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index fad3a110..0f0de865 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -29,6 +29,7 @@ def mnist_torch_inference_files( root (str, optional): where to create the files. Defaults to '.'. """ + sys.path.append(os.getcwd()) from dataloader import InferenceMNIST sample = os.path.join(root, samples_path) InferenceMNIST.generate_jpg_sample(sample, 10) @@ -39,6 +40,8 @@ def mnist_torch_inference_files( mdl_ckpt = os.path.join(root, model_name) torch.save(dummy_nn, mdl_ckpt) + sys.path.pop() + @pytest.mark.skip(reason="structure changed") def test_structure_mnist_torch(check_folder_structure): @@ -89,7 +92,7 @@ def test_mnist_inference_torch(torch_env, install_requirements): root_path = os.getcwd() os.chdir(TORCH_PATH) # sys.path.append(os.path.join(os.getcwd(), TORCH_PATH)) - sys.path.append(os.getcwd()) + # sys.path.append(os.getcwd()) try: mnist_torch_inference_files( samples_path=samples_path, @@ -107,7 +110,7 @@ def test_mnist_inference_torch(torch_env, install_requirements): raise e finally: os.chdir(root_path) - sys.path.pop(sys.path.index(os.getcwd())) + # sys.path.pop(sys.path.index(os.getcwd())) cmd = (f"{torch_env}/bin/itwinai exec-pipeline " f"--config config.yaml --pipe-key inference_pipeline") subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) From b2100543cdc892e36c219b3d5ad8f84c126eecca Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:37:05 +0200 Subject: [PATCH 137/171] Update action --- .github/workflows/test-torch.yml | 2 +- tests/use-cases/test_mnist.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 26ecaf97..d3858066 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -5,7 +5,7 @@ jobs: my_job: runs-on: ubuntu-latest container: - image: pytorch/pytorch:latest # docker://{docker-image-name}:{tag} + image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} options: --volume ${{ github.workspace }}:${{ github.workspace }} steps: - name: Cache Docker image layers diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index 0f0de865..9c2f2ace 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -29,7 +29,7 @@ def mnist_torch_inference_files( root (str, optional): where to create the files. Defaults to '.'. """ - sys.path.append(os.getcwd()) + sys.path = [os.getcwd()] + sys.path from dataloader import InferenceMNIST sample = os.path.join(root, samples_path) InferenceMNIST.generate_jpg_sample(sample, 10) @@ -40,7 +40,7 @@ def mnist_torch_inference_files( mdl_ckpt = os.path.join(root, model_name) torch.save(dummy_nn, mdl_ckpt) - sys.path.pop() + sys.path = sys.path[1:] @pytest.mark.skip(reason="structure changed") From 772c776226c1e2662b27ac53751d95ae642ce687 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:38:02 +0200 Subject: [PATCH 138/171] Update action --- .github/workflows/test-torch.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index d3858066..8d84c400 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -23,4 +23,5 @@ jobs: ls -la ${{ github.workspace }} && ls -la ${{ github.workspace }}/.. && pwd && - echo ${{ github.workspace }} + echo ${{ github.workspace }} && + pip install . From 6f99f886cfb3f6c53f0b4d0bff37d354730de05a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:41:31 +0200 Subject: [PATCH 139/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 8d84c400..fd733276 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} - options: --volume ${{ github.workspace }}:${{ github.workspace }} + volumes: ${{ github.workspace }}:${{ github.workspace }} steps: - name: Cache Docker image layers uses: actions/cache@v2 From a32b506a9f661b34e8846dc32254a86011a05f0b Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:42:29 +0200 Subject: [PATCH 140/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index fd733276..46f261d5 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} - volumes: ${{ github.workspace }}:${{ github.workspace }} + volumes: "${{ github.workspace }}:${{ github.workspace }}" steps: - name: Cache Docker image layers uses: actions/cache@v2 From 32327d777d4e0912631050f84d4839d4e9de9bce Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:43:06 +0200 Subject: [PATCH 141/171] Update action --- .github/workflows/test-torch.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 46f261d5..2cbc91ce 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -6,7 +6,8 @@ jobs: runs-on: ubuntu-latest container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} - volumes: "${{ github.workspace }}:${{ github.workspace }}" + volumes: + - "${{ github.workspace }}:${{ github.workspace }}" steps: - name: Cache Docker image layers uses: actions/cache@v2 From 298f6863863a891f45fc0619080dd6d2afbcec0f Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:48:23 +0200 Subject: [PATCH 142/171] Update action --- .github/workflows/test-torch.yml | 11 +++++++++++ tests/use-cases/test_mnist.py | 1 + 2 files changed, 12 insertions(+) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 2cbc91ce..ceebe81a 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -2,6 +2,17 @@ name: Test workflows based on torch on: [push] jobs: + + my_job1: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install micromamba + shell: bash -l {0} + run: ls -la ${{ github.workspace }} + my_job: runs-on: ubuntu-latest container: diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index 9c2f2ace..a9996cc7 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -30,6 +30,7 @@ def mnist_torch_inference_files( Defaults to '.'. """ sys.path = [os.getcwd()] + sys.path + raise ValueError(sys.path) from dataloader import InferenceMNIST sample = os.path.join(root, samples_path) InferenceMNIST.generate_jpg_sample(sample, 10) From fefca94cd8571080bdcdd16a13b7f3221f3998d5 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:50:44 +0200 Subject: [PATCH 143/171] Update action --- .github/workflows/test-torch.yml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index ceebe81a..724a217a 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install micromamba + - name: Check workspace shell: bash -l {0} run: ls -la ${{ github.workspace }} @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - "${{ github.workspace }}:${{ github.workspace }}" + - ${{ github.workspace }}:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 @@ -26,14 +26,15 @@ jobs: path: /var/lib/docker/image key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - name: Run commands in container - run: | - # Now the repository code is available inside the Docker container - # You can use it as needed - cd ${{ github.workspace }} && - # Run your commands here - ls -la . && - ls -la ${{ github.workspace }} && - ls -la ${{ github.workspace }}/.. && - pwd && - echo ${{ github.workspace }} && - pip install . + run: ls -la /repo + # run: | + # # Now the repository code is available inside the Docker container + # # You can use it as needed + # cd ${{ github.workspace }} && + # # Run your commands here + # ls -la . && + # ls -la ${{ github.workspace }} && + # ls -la ${{ github.workspace }}/.. && + # pwd && + # echo ${{ github.workspace }} && + # pip install . From 50a7e1b9002bed07da1b4c70274dd75f513f41ca Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:52:59 +0200 Subject: [PATCH 144/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 724a217a..5a419eea 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - ${{ github.workspace }}:/repo + - ${{ github.workspace }}/use-cases:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 From 1d04f6ba3b5452466db4c6fabfab2ba2fce4623a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:55:12 +0200 Subject: [PATCH 145/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 5a419eea..3c881598 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - ${{ github.workspace }}/use-cases:/repo + - /home/runner/work/itwinai/itwinai/use-cases:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 From f5b473180e8750dcc4cc2f3f531f1bbed608b9e4 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:56:59 +0200 Subject: [PATCH 146/171] Update action --- .github/workflows/test-torch.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 3c881598..b878f795 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -26,8 +26,11 @@ jobs: path: /var/lib/docker/image key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - name: Run commands in container - run: ls -la /repo - # run: | + # run: ls -la /repo + run: | + whoami && + pwd && + ls $(pwd) # # Now the repository code is available inside the Docker container # # You can use it as needed # cd ${{ github.workspace }} && From 41e7c61d178bf1b31791b5a975b050630f958a0f Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:58:09 +0200 Subject: [PATCH 147/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index b878f795..9c34f3fd 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - /home/runner/work/itwinai/itwinai/use-cases:/repo + - ${{ github.workspace }}:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 From 975e1605bd31813a0ccf7f7d0f3c7ec23dcc1ad7 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 20:59:41 +0200 Subject: [PATCH 148/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 9c34f3fd..0f81df94 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - ${{ github.workspace }}:/repo + - /home/runner/work/itwinai/itwinai:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 From 281489efa5955bd61c1a765b16e4e4d5c673bd96 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:00:52 +0200 Subject: [PATCH 149/171] Update action --- .github/workflows/test-torch.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 0f81df94..001b1593 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - /home/runner/work/itwinai/itwinai:/repo + - /home/runner/work/itwinai/itwinai/use-cases:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 @@ -30,7 +30,7 @@ jobs: run: | whoami && pwd && - ls $(pwd) + ls -la $(pwd) # # Now the repository code is available inside the Docker container # # You can use it as needed # cd ${{ github.workspace }} && From 1e7a7e6a57d2eff3635c48d678425efc90eb9fd3 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:02:22 +0200 Subject: [PATCH 150/171] Update action --- .github/workflows/test-torch.yml | 2 +- tests/use-cases/test_mnist.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 001b1593..f9b9a6ce 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,7 +18,7 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - /home/runner/work/itwinai/itwinai/use-cases:/repo + - /home/runner/work/itwinai/itwinai/:/repo steps: - name: Cache Docker image layers uses: actions/cache@v2 diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index a9996cc7..e426af3e 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -30,12 +30,13 @@ def mnist_torch_inference_files( Defaults to '.'. """ sys.path = [os.getcwd()] + sys.path - raise ValueError(sys.path) + from dataloader import InferenceMNIST sample = os.path.join(root, samples_path) InferenceMNIST.generate_jpg_sample(sample, 10) import torch + raise ValueError(sys.path) from model import Net dummy_nn = Net() mdl_ckpt = os.path.join(root, model_name) From 37b6eb79e88306ea67014c5e57f4a0994acf31b0 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:07:50 +0200 Subject: [PATCH 151/171] Update action --- .github/workflows/test-torch.yml | 12 ++++++------ tests/use-cases/test_mnist.py | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index f9b9a6ce..b2859134 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -18,13 +18,13 @@ jobs: container: image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - - /home/runner/work/itwinai/itwinai/:/repo + - ${{ github.workspace }}:/repo steps: - - name: Cache Docker image layers - uses: actions/cache@v2 - with: - path: /var/lib/docker/image - key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} + # - name: Cache Docker image layers + # uses: actions/cache@v2 + # with: + # path: /var/lib/docker/image + # key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - name: Run commands in container # run: ls -la /repo run: | diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index e426af3e..a3f42c29 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -36,7 +36,6 @@ def mnist_torch_inference_files( InferenceMNIST.generate_jpg_sample(sample, 10) import torch - raise ValueError(sys.path) from model import Net dummy_nn = Net() mdl_ckpt = os.path.join(root, model_name) From d8cf85ae1e6315e2af143bfcd7e8058b6308cca8 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:08:47 +0200 Subject: [PATCH 152/171] Update action --- .github/workflows/test-torch.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index b2859134..8c623178 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -30,7 +30,8 @@ jobs: run: | whoami && pwd && - ls -la $(pwd) + ls -la $(pwd) && + ls -la /repo # # Now the repository code is available inside the Docker container # # You can use it as needed # cd ${{ github.workspace }} && From bc8ef40e185226006c5dc07b81292743b654317f Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:10:51 +0200 Subject: [PATCH 153/171] Update action --- .github/workflows/test-torch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 8c623178..dc6be46b 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -16,7 +16,7 @@ jobs: my_job: runs-on: ubuntu-latest container: - image: python:3.9 #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} + image: ubuntu:latest #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} volumes: - ${{ github.workspace }}:/repo steps: From 105fdcedc4484774db973a2f0ece482030398834 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:12:26 +0200 Subject: [PATCH 154/171] Update action --- .github/workflows/test-torch.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index dc6be46b..49091524 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -17,8 +17,7 @@ jobs: runs-on: ubuntu-latest container: image: ubuntu:latest #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} - volumes: - - ${{ github.workspace }}:/repo + options: --volume ${{ github.workspace }}:/repo steps: # - name: Cache Docker image layers # uses: actions/cache@v2 From ff191409f0838b2e4fd881b4be981c178592ee3c Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:14:12 +0200 Subject: [PATCH 155/171] Update action --- .github/workflows/test-torch.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 49091524..882b1683 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -17,7 +17,11 @@ jobs: runs-on: ubuntu-latest container: image: ubuntu:latest #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} - options: --volume ${{ github.workspace }}:/repo + volumes: + - ${{ github.workspace }}/src:/repo/src + - ${{ github.workspace }}/use-cases:/repo/use-cases + - ${{ github.workspace }}/tests:/repo/tests + - ${{ github.workspace }}/pyproject.toml:/repo/pyproject.toml steps: # - name: Cache Docker image layers # uses: actions/cache@v2 From 90523afa09914de45ce5f3bfda1d42706428f354 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:22:24 +0200 Subject: [PATCH 156/171] Update action --- .github/workflows/test-torch.yml | 55 +++++++++++++++----------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 882b1683..74700729 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -2,46 +2,43 @@ name: Test workflows based on torch on: [push] jobs: - - my_job1: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Check workspace - shell: bash -l {0} - run: ls -la ${{ github.workspace }} - - my_job: + torch_tests: runs-on: ubuntu-latest container: - image: ubuntu:latest #pytorch/pytorch:latest # docker://{docker-image-name}:{tag} + image: pytorch/pytorch:latest volumes: - ${{ github.workspace }}/src:/repo/src - ${{ github.workspace }}/use-cases:/repo/use-cases - ${{ github.workspace }}/tests:/repo/tests + - ${{ github.workspace }}/env-files:/repo/env-files - ${{ github.workspace }}/pyproject.toml:/repo/pyproject.toml + - ${{ github.workspace }}/Makefile:/repo/Makefile steps: # - name: Cache Docker image layers # uses: actions/cache@v2 # with: # path: /var/lib/docker/image # key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - - name: Run commands in container - # run: ls -la /repo - run: | - whoami && - pwd && - ls -la $(pwd) && - ls -la /repo - # # Now the repository code is available inside the Docker container - # # You can use it as needed - # cd ${{ github.workspace }} && - # # Run your commands here - # ls -la . && - # ls -la ${{ github.workspace }} && - # ls -la ${{ github.workspace }}/.. && + - name: Install environment + run: pip install --no-cache .[dev] + + - name: Run tests + run: which pip + + - name: Run tests + run: pytest -v tests/ + # run: | + # whoami && # pwd && - # echo ${{ github.workspace }} && - # pip install . + # ls -la $(pwd) && + # ls -la /repo + # # # Now the repository code is available inside the Docker container + # # # You can use it as needed + # # cd ${{ github.workspace }} && + # # # Run your commands here + # # ls -la . && + # # ls -la ${{ github.workspace }} && + # # ls -la ${{ github.workspace }}/.. && + # # pwd && + # # echo ${{ github.workspace }} && + # # pip install . From 8bd8769d292625c24786f651e3e9135707331d4a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:26:59 +0200 Subject: [PATCH 157/171] Update action --- .github/workflows/test-torch.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 74700729..57b81e07 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -3,6 +3,7 @@ on: [push] jobs: torch_tests: + name: Run pytest for torch runs-on: ubuntu-latest container: image: pytorch/pytorch:latest @@ -14,13 +15,12 @@ jobs: - ${{ github.workspace }}/pyproject.toml:/repo/pyproject.toml - ${{ github.workspace }}/Makefile:/repo/Makefile steps: - # - name: Cache Docker image layers - # uses: actions/cache@v2 - # with: - # path: /var/lib/docker/image - # key: ${{ runner.os }}-docker-${{ hashFiles('**/Dockerfile') }} - name: Install environment - run: pip install --no-cache .[dev] + run: | + ls -la $(pwd) && + ls -la /repo && + cd /repo && + pip install --no-cache .[dev] - name: Run tests run: which pip From 7f59e5c439125992041a4a040b8611360206586a Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:29:58 +0200 Subject: [PATCH 158/171] Update action --- .github/workflows/test-torch.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 57b81e07..9423ebcb 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -20,10 +20,14 @@ jobs: ls -la $(pwd) && ls -la /repo && cd /repo && - pip install --no-cache .[dev] + pip install --no-cache /repo[dev] - name: Run tests - run: which pip + run: | + which pip && + which pip3 && + which python && + which python3 - name: Run tests run: pytest -v tests/ From 8fb77ece2a09ae5b50162c21250a042bc5afee85 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Mon, 29 Apr 2024 21:48:02 +0200 Subject: [PATCH 159/171] FIX failing inference --- .github/workflows/check-links.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/test-torch.yml | 4 +- tests/use-cases/test_3dgan.py | 32 ++-------- tests/use-cases/test_mnist.py | 59 ++----------------- use-cases/3dgan/create_inference_sample.py | 10 ++++ .../mnist/torch/create_inference_sample.py | 34 +++++++++++ 7 files changed, 60 insertions(+), 83 deletions(-) create mode 100644 use-cases/3dgan/create_inference_sample.py create mode 100644 use-cases/mnist/torch/create_inference_sample.py diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index 7253ac26..c7d0737b 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -2,7 +2,7 @@ name: Check links on: - # push: + push: pull_request: jobs: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bb241d0d..8eca0a3c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,7 +2,7 @@ name: Lint on: - # push: + push: pull_request: jobs: diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml index 9423ebcb..8ca772f7 100644 --- a/.github/workflows/test-torch.yml +++ b/.github/workflows/test-torch.yml @@ -1,5 +1,7 @@ name: Test workflows based on torch -on: [push] +on: null + +# https://docs.github.com/en/actions/using-jobs/running-jobs-in-a-container jobs: torch_tests: diff --git a/tests/use-cases/test_3dgan.py b/tests/use-cases/test_3dgan.py index c40c584c..3ec84d48 100644 --- a/tests/use-cases/test_3dgan.py +++ b/tests/use-cases/test_3dgan.py @@ -3,32 +3,11 @@ """ import pytest import subprocess -import os CERN_PATH = "use-cases/3dgan" CKPT_PATH = "3dgan-inference.pth" -@pytest.fixture(scope="module") -def fake_model_checkpoint() -> None: - """ - Create a dummy model checkpoint for inference - under ``CERN_PATH`` location. - """ - import sys - import torch - curr_path = os.getcwd() - os.chdir(CERN_PATH) - sys.path.append(os.getcwd()) - - from model import ThreeDGAN - net = ThreeDGAN() - torch.save(net, CKPT_PATH) - - sys.path.pop(sys.path.index(os.getcwd())) - os.chdir(curr_path) - - @pytest.mark.skip("deprecated") def test_structure_3dgan(check_folder_structure): """Test 3DGAN folder structure.""" @@ -54,17 +33,18 @@ def test_3dgan_train(torch_env, install_requirements): def test_3dgan_inference( torch_env, install_requirements, - fake_model_checkpoint + # fake_model_checkpoint ): """ Test 3DGAN torch lightning trainer by running it end-to-end. """ install_requirements(CERN_PATH, torch_env) - # cmd = (f"micromamba run -p {torch_env} python " - # f"{CERN_PATH}/train.py -p {CERN_PATH}/pipeline.yaml") - # cmd = (f"micromamba run -p {torch_env} itwinai exec-pipeline " - # f"--config {CERN_PATH}/inference-pipeline.yaml") + # Create fake inference dataset and checkpoint + cmd = f"{torch_env}/bin/python create_inference_sample.py" + subprocess.run(cmd.split(), check=True, cwd=CERN_PATH) + + # Test inference getter_params = "pipeline.init_args.steps.dataloading_step.init_args" trainer_params = "pipeline.init_args.steps.inference_step.init_args" logger_params = trainer_params + ".config.trainer.logger.init_args" diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index a3f42c29..b39eb1a8 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -6,8 +6,6 @@ """ import pytest -import os -import sys import subprocess # from itwinai.cli import exec_pipeline @@ -16,34 +14,6 @@ TF_PATH = "use-cases/mnist/tensorflow" -def mnist_torch_inference_files( - root: str = '.', - samples_path: str = 'mnist-sample-data/', - model_name: str = 'mnist-pre-trained.pth' -): - """Create sample dataset and fake model to test mnist - inference workflow. Assumes to be run from - the use case folder. - - Args: - root (str, optional): where to create the files. - Defaults to '.'. - """ - sys.path = [os.getcwd()] + sys.path - - from dataloader import InferenceMNIST - sample = os.path.join(root, samples_path) - InferenceMNIST.generate_jpg_sample(sample, 10) - - import torch - from model import Net - dummy_nn = Net() - mdl_ckpt = os.path.join(root, model_name) - torch.save(dummy_nn, mdl_ckpt) - - sys.path = sys.path[1:] - - @pytest.mark.skip(reason="structure changed") def test_structure_mnist_torch(check_folder_structure): """Test MNIST folder structure for torch native trainer.""" @@ -88,30 +58,11 @@ def test_mnist_inference_torch(torch_env, install_requirements): """ install_requirements(TORCH_PATH, torch_env) - samples_path: str = 'mnist-sample-data/' - model_name: str = 'mnist-pre-trained.pth' - root_path = os.getcwd() - os.chdir(TORCH_PATH) - # sys.path.append(os.path.join(os.getcwd(), TORCH_PATH)) - # sys.path.append(os.getcwd()) - try: - mnist_torch_inference_files( - samples_path=samples_path, - model_name=model_name - ) - # exec_pipeline( - # config='config.yaml', - # pipe_key='inference_pipeline', - # overrides_list=[ - # f"predictions_dir={samples_path}", - # f"inference_model_mlflow_uri={model_name}" - # ] - # ) - except Exception as e: - raise e - finally: - os.chdir(root_path) - # sys.path.pop(sys.path.index(os.getcwd())) + # Create fake inference dataset and checkpoint + cmd = f"{torch_env}/bin/python create_inference_sample.py" + subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) + + # Test inference cmd = (f"{torch_env}/bin/itwinai exec-pipeline " f"--config config.yaml --pipe-key inference_pipeline") subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) diff --git a/use-cases/3dgan/create_inference_sample.py b/use-cases/3dgan/create_inference_sample.py new file mode 100644 index 00000000..366bc672 --- /dev/null +++ b/use-cases/3dgan/create_inference_sample.py @@ -0,0 +1,10 @@ +"""Create a simple inference dataset sample and a checkpoint.""" + +import torch + +CKPT_PATH = "3dgan-inference.pth" + +if __name__ == "__main__": + from model import ThreeDGAN + net = ThreeDGAN() + torch.save(net, CKPT_PATH) diff --git a/use-cases/mnist/torch/create_inference_sample.py b/use-cases/mnist/torch/create_inference_sample.py new file mode 100644 index 00000000..2b03f610 --- /dev/null +++ b/use-cases/mnist/torch/create_inference_sample.py @@ -0,0 +1,34 @@ +"""Create a simple inference dataset sample and a checkpoint.""" + +import torch +import os + +from model import Net +from dataloader import InferenceMNIST + + +def mnist_torch_inference_files( + root: str = '.', + samples_path: str = 'mnist-sample-data/', + model_name: str = 'mnist-pre-trained.pth' +): + """Create sample dataset and fake model to test mnist + inference workflow. Assumes to be run from + the use case folder. + + Args: + root (str, optional): where to create the files. + Defaults to '.'. + """ + + sample = os.path.join(root, samples_path) + InferenceMNIST.generate_jpg_sample(sample, 10) + + # Fake checkpoint + dummy_nn = Net() + mdl_ckpt = os.path.join(root, model_name) + torch.save(dummy_nn, mdl_ckpt) + + +if __name__ == "__main__": + mnist_torch_inference_files() From 0e9d090e62cce18c1db3921c0d4beb6d309061ac Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:00:49 +0200 Subject: [PATCH 160/171] Functiona tests (#133) * UPDATE tests * FIX errors --- README.md | 30 ++++++++++++- tests/use-cases/conftest.py | 24 +++++++++- tests/use-cases/test_3dgan.py | 26 +++++++---- tests/use-cases/test_cyclones.py | 11 +++-- tests/use-cases/test_mnist.py | 42 +++++++++++------- tfrecords/.DS_Store | Bin 0 -> 6148 bytes use-cases/3dgan/create_inference_sample.py | 21 +++++++-- use-cases/3dgan/dataloader.py | 3 +- use-cases/cyclones/dataloader.py | 1 + .../mnist/torch/create_inference_sample.py | 10 ++++- 10 files changed, 131 insertions(+), 37 deletions(-) create mode 100644 tfrecords/.DS_Store diff --git a/README.md b/README.md index dc9a60dc..ce8b6684 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,35 @@ pip install -e .[dev] #### Test with `pytest` -To run tests on itwinai package: +Do this only if you are a developer wanting to test your code with pytest. + +First, you need to create virtual environments both for torch and tensorflow. +For instance, you can use: + +```bash +make torch-cpu +make make tf-2.13-cpu +``` + +To select the name of the torch and tf environments you can set the following +environment variables, which allow to run the tests in environments with +custom names which are different from `.venv-pytorch` and `.venv-tf`. + +```bash +export TORCH_ENV="my_torch_env" +export TF_ENV="my_tf_env" +``` + +Functional tests (marked with `pytest.mark.functional`) will be executed under +`/tmp/pytest` location to guarantee they are run in a clean environment. + +To run functional tests use: + +```bash +pytest -v tests/ -m "functional" +``` + +To run all tests on itwinai package: ```bash # Activate env diff --git a/tests/use-cases/conftest.py b/tests/use-cases/conftest.py index eccdc208..69229db6 100644 --- a/tests/use-cases/conftest.py +++ b/tests/use-cases/conftest.py @@ -2,6 +2,8 @@ from typing import Callable import pytest import subprocess +import random +import string FNAMES = [ @@ -10,6 +12,24 @@ ] +def rnd_string(len: int = 26): + return ''.join(random.sample(string.ascii_lowercase, len)) + + +@pytest.fixture +def tmp_test_dir(): + root = '/tmp/pytest' + os.makedirs(root, exist_ok=True) + test_dir = os.path.join(root, rnd_string()) + while os.path.exists(test_dir): + test_dir = os.path.join(root, rnd_string()) + os.makedirs(test_dir, exist_ok=True) + + yield test_dir + + # Optional: remove dir here... + + @pytest.fixture def torch_env() -> str: """ @@ -21,7 +41,7 @@ def torch_env() -> str: env_p = './.venv-pytorch' else: env_p = os.environ.get('TORCH_ENV') - return os.path.join(os.getcwd(), env_p) + return os.path.abspath(env_p) @pytest.fixture @@ -35,7 +55,7 @@ def tf_env() -> str: env_p = './.venv-tf' else: env_p = os.environ.get('TF_ENV') - return os.path.join(os.getcwd(), env_p) + return os.path.abspath(env_p) @pytest.fixture diff --git a/tests/use-cases/test_3dgan.py b/tests/use-cases/test_3dgan.py index 3ec84d48..9d19d1f3 100644 --- a/tests/use-cases/test_3dgan.py +++ b/tests/use-cases/test_3dgan.py @@ -3,9 +3,10 @@ """ import pytest import subprocess +import os CERN_PATH = "use-cases/3dgan" -CKPT_PATH = "3dgan-inference.pth" +CKPT_NAME = "3dgan-inference.pth" @pytest.mark.skip("deprecated") @@ -15,23 +16,25 @@ def test_structure_3dgan(check_folder_structure): @pytest.mark.functional -def test_3dgan_train(torch_env, install_requirements): +def test_3dgan_train(torch_env, tmp_test_dir, install_requirements): """ Test 3DGAN torch lightning trainer by running it end-to-end. """ install_requirements(CERN_PATH, torch_env) + conf = os.path.join(os.path.abspath(CERN_PATH), 'pipeline.yaml') trainer_params = "pipeline.init_args.steps.training_step.init_args" cmd = (f"{torch_env}/bin/itwinai exec-pipeline " - f"--config pipeline.yaml " + f"--config {conf} " f'-o {trainer_params}.config.trainer.accelerator=cpu ' f'-o {trainer_params}.config.trainer.strategy=auto ' ) - subprocess.run(cmd.split(), check=True, cwd=CERN_PATH) + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) @pytest.mark.functional def test_3dgan_inference( torch_env, + tmp_test_dir, install_requirements, # fake_model_checkpoint ): @@ -41,10 +44,15 @@ def test_3dgan_inference( install_requirements(CERN_PATH, torch_env) # Create fake inference dataset and checkpoint - cmd = f"{torch_env}/bin/python create_inference_sample.py" - subprocess.run(cmd.split(), check=True, cwd=CERN_PATH) + exec = os.path.join(os.path.abspath(CERN_PATH), + 'create_inference_sample.py') + cmd = (f"{torch_env}/bin/python {exec} " + f"--root {tmp_test_dir} " + f"--ckpt-name {CKPT_NAME}") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) # Test inference + conf = os.path.join(os.path.abspath(CERN_PATH), 'inference-pipeline.yaml') getter_params = "pipeline.init_args.steps.dataloading_step.init_args" trainer_params = "pipeline.init_args.steps.inference_step.init_args" logger_params = trainer_params + ".config.trainer.logger.init_args" @@ -52,10 +60,10 @@ def test_3dgan_inference( saver_params = "pipeline.init_args.steps.saver_step.init_args" cmd = ( f'{torch_env}/bin/itwinai exec-pipeline ' - '--config inference-pipeline.yaml ' + f'--config {conf} ' f'-o {getter_params}.data_path=exp_data ' - f'-o {trainer_params}.model.init_args.model_uri={CKPT_PATH} ' - f'-o {trainer_params}.config.trainer.accelerator=cpu ' + f'-o {trainer_params}.model.init_args.model_uri={CKPT_NAME} ' + f'-o {trainer_params}.config.trainer.accelerator=auto ' f'-o {trainer_params}.config.trainer.strategy=auto ' f'-o {logger_params}.save_dir=ml_logs/mlflow_logs ' f'-o {data_params}.datapath=exp_data/*/*.h5 ' diff --git a/tests/use-cases/test_cyclones.py b/tests/use-cases/test_cyclones.py index 6b262d45..d6a1ea2c 100644 --- a/tests/use-cases/test_cyclones.py +++ b/tests/use-cases/test_cyclones.py @@ -7,6 +7,7 @@ import pytest import subprocess +import os CYCLONES_PATH = "use-cases/cyclones" @@ -19,12 +20,14 @@ def test_structure_cyclones(check_folder_structure): @pytest.mark.functional @pytest.mark.memory_heavy -def test_cyclones_train_tf(tf_env, install_requirements): +def test_cyclones_train_tf(tf_env, tmp_test_dir, install_requirements): """ Test Cyclones tensorflow trainer by running it end-to-end. """ # TODO: create a small sample dataset for tests only install_requirements(CYCLONES_PATH, tf_env) - cmd = (f"{tf_env}/bin/python train.py " - f"-p pipeline.yaml") - subprocess.run(cmd.split(), check=True, cwd=CYCLONES_PATH) + pipe = os.path.join(os.path.abspath(CYCLONES_PATH), 'pipeline.yaml') + train = os.path.join(os.path.abspath(CYCLONES_PATH), 'train.py') + cmd = (f"{tf_env}/bin/python {train} " + f"-p {pipe}") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index b39eb1a8..1f18a8e6 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -7,6 +7,7 @@ import pytest import subprocess +import os # from itwinai.cli import exec_pipeline TORCH_PATH = "use-cases/mnist/torch" @@ -33,7 +34,7 @@ def test_structure_mnist_tf(check_folder_structure): @pytest.mark.functional -def test_mnist_train_torch(torch_env, install_requirements): +def test_mnist_train_torch(torch_env, tmp_test_dir, install_requirements): """ Test MNIST torch native trainer by running it end-to-end. @@ -42,13 +43,14 @@ def test_mnist_train_torch(torch_env, install_requirements): >>> export TORCH_ENV="my_env" """ install_requirements(TORCH_PATH, torch_env) + conf = os.path.join(os.path.abspath(TORCH_PATH), 'config.yaml') cmd = (f"{torch_env}/bin/itwinai exec-pipeline " - f"--config config.yaml --pipe-key training_pipeline") - subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) + f"--config {conf} --pipe-key training_pipeline") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) @pytest.mark.functional -def test_mnist_inference_torch(torch_env, install_requirements): +def test_mnist_inference_torch(torch_env, tmp_test_dir, install_requirements): """ Test MNIST torch native inference by running it end-to-end. @@ -59,17 +61,25 @@ def test_mnist_inference_torch(torch_env, install_requirements): install_requirements(TORCH_PATH, torch_env) # Create fake inference dataset and checkpoint - cmd = f"{torch_env}/bin/python create_inference_sample.py" - subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) + exec = os.path.join(os.path.abspath(TORCH_PATH), + 'create_inference_sample.py') + cmd = (f"{torch_env}/bin/python {exec} " + f"--root {tmp_test_dir}") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) # Test inference + conf = os.path.join(os.path.abspath(TORCH_PATH), 'config.yaml') cmd = (f"{torch_env}/bin/itwinai exec-pipeline " - f"--config config.yaml --pipe-key inference_pipeline") - subprocess.run(cmd.split(), check=True, cwd=TORCH_PATH) + f"--config {conf} --pipe-key inference_pipeline") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) @pytest.mark.functional -def test_mnist_train_torch_lightning(torch_env, install_requirements): +def test_mnist_train_torch_lightning( + torch_env, + tmp_test_dir, + install_requirements +): """ Test MNIST torch lightning trainer by running it end-to-end. @@ -77,18 +87,20 @@ def test_mnist_train_torch_lightning(torch_env, install_requirements): >>> export TORCH_ENV="my_env" """ - install_requirements(TORCH_PATH, torch_env) + install_requirements(LIGHTNING_PATH, torch_env) + conf = os.path.join(os.path.abspath(LIGHTNING_PATH), 'config.yaml') cmd = (f"{torch_env}/bin/itwinai exec-pipeline " - f"--config config.yaml --pipe-key training_pipeline") - subprocess.run(cmd.split(), check=True, cwd=LIGHTNING_PATH) + f"--config {conf} --pipe-key training_pipeline") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) @pytest.mark.functional -def test_mnist_train_tf(tf_env, install_requirements): +def test_mnist_train_tf(tf_env, tmp_test_dir, install_requirements): """ Test MNIST tensorflow trainer by running it end-to-end. """ install_requirements(TF_PATH, tf_env) + conf = os.path.join(os.path.abspath(TF_PATH), 'pipeline.yaml') cmd = (f"{tf_env}/bin/itwinai exec-pipeline " - f"--config pipeline.yaml --pipe-key pipeline") - subprocess.run(cmd.split(), check=True, cwd=TF_PATH) + f"--config {conf} --pipe-key pipeline") + subprocess.run(cmd.split(), check=True, cwd=tmp_test_dir) diff --git a/tfrecords/.DS_Store b/tfrecords/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c1aed8745f8d4bbf03ec376068d57fd643ed8428 GIT binary patch literal 6148 zcmeHK%}N6?5Kh`^Q$#7E&||=B!CI>zUe>j~fGc`XS$Ex{i`z|Ux7|Z2?pfc%XYd6) z_yRtUlb?#R6}%`aGcfsPlbKBOB_tg}2(^d71|d~K2m_T^s6aDA56vStX&>@_jc)NZ?<3XCBNSF`G9d2<4l2@$E;3ES?{aKao z|H0?=|HmNwLI#k5f5iYTwVk#NOEPb3ZgKLh<)9a!Qlwv|@ht=l^%R4ze2QzJN}!)h X1JE* None: if not exists(join(root_dir, self.data_path)): gdown.download_folder( url=self.data_url, quiet=False, + verify=False, output=join(root_dir, self.data_path) ) diff --git a/use-cases/mnist/torch/create_inference_sample.py b/use-cases/mnist/torch/create_inference_sample.py index 2b03f610..1c588c48 100644 --- a/use-cases/mnist/torch/create_inference_sample.py +++ b/use-cases/mnist/torch/create_inference_sample.py @@ -2,6 +2,7 @@ import torch import os +import argparse from model import Net from dataloader import InferenceMNIST @@ -31,4 +32,11 @@ def mnist_torch_inference_files( if __name__ == "__main__": - mnist_torch_inference_files() + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default='.') + parser.add_argument("--samples-path", type=str, + default='mnist-sample-data') + parser.add_argument("--model-name", type=str, + default='mnist-pre-trained.pth') + args = parser.parse_args() + mnist_torch_inference_files(**vars(args)) From c82fbef5333bfed4dbf7411cc1db255156092be1 Mon Sep 17 00:00:00 2001 From: Matteo Bunino Date: Tue, 30 Apr 2024 12:01:42 +0200 Subject: [PATCH 161/171] CLEANUP --- tfrecords/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tfrecords/.DS_Store diff --git a/tfrecords/.DS_Store b/tfrecords/.DS_Store deleted file mode 100644 index c1aed8745f8d4bbf03ec376068d57fd643ed8428..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}N6?5Kh`^Q$#7E&||=B!CI>zUe>j~fGc`XS$Ex{i`z|Ux7|Z2?pfc%XYd6) z_yRtUlb?#R6}%`aGcfsPlbKBOB_tg}2(^d71|d~K2m_T^s6aDA56vStX&>@_jc)NZ?<3XCBNSF`G9d2<4l2@$E;3ES?{aKao z|H0?=|HmNwLI#k5f5iYTwVk#NOEPb3ZgKLh<)9a!Qlwv|@ht=l^%R4ze2QzJN}!)h X1JE* Date: Tue, 30 Apr 2024 12:02:43 +0200 Subject: [PATCH 162/171] Remove unused workflow --- .github/workflows/test-torch.yml | 50 -------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .github/workflows/test-torch.yml diff --git a/.github/workflows/test-torch.yml b/.github/workflows/test-torch.yml deleted file mode 100644 index 8ca772f7..00000000 --- a/.github/workflows/test-torch.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Test workflows based on torch -on: null - -# https://docs.github.com/en/actions/using-jobs/running-jobs-in-a-container - -jobs: - torch_tests: - name: Run pytest for torch - runs-on: ubuntu-latest - container: - image: pytorch/pytorch:latest - volumes: - - ${{ github.workspace }}/src:/repo/src - - ${{ github.workspace }}/use-cases:/repo/use-cases - - ${{ github.workspace }}/tests:/repo/tests - - ${{ github.workspace }}/env-files:/repo/env-files - - ${{ github.workspace }}/pyproject.toml:/repo/pyproject.toml - - ${{ github.workspace }}/Makefile:/repo/Makefile - steps: - - name: Install environment - run: | - ls -la $(pwd) && - ls -la /repo && - cd /repo && - pip install --no-cache /repo[dev] - - - name: Run tests - run: | - which pip && - which pip3 && - which python && - which python3 - - - name: Run tests - run: pytest -v tests/ - # run: | - # whoami && - # pwd && - # ls -la $(pwd) && - # ls -la /repo - # # # Now the repository code is available inside the Docker container - # # # You can use it as needed - # # cd ${{ github.workspace }} && - # # # Run your commands here - # # ls -la . && - # # ls -la ${{ github.workspace }} && - # # ls -la ${{ github.workspace }}/.. && - # # pwd && - # # echo ${{ github.workspace }} && - # # pip install . From 52005b90dcb3517fdc01596ca83b17549a943c75 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 11:48:06 +0200 Subject: [PATCH 163/171] Fixes to TF new version errors --- env-files/tensorflow/createEnvJSCTF.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh index 377940d4..25b3273d 100644 --- a/env-files/tensorflow/createEnvJSCTF.sh +++ b/env-files/tensorflow/createEnvJSCTF.sh @@ -14,16 +14,7 @@ echo "system:${sysN}" echo cont1=false -if [ "$sysN" = 'deepv' ] ; then - ml use "$OTHERSTAGES" - ml Stages/2022 GCC OpenMPI cuDNN NCCL Python CMake - cont1=true -elif [ "$sysN" = 'juwels' ] ; then - ml Stages/2022 GCC ParaStationMPI Python CMake NCCL libaio cuDNN - cont1=true -elif [ "$sysN" = 'hdfml' ] ; then - #ml Stages/2022 GCC OpenMPI Python NCCL cuDNN libaio CMake - #ml Stages/2023 NVHPC/23.1 ParaStationMPI/5.8.0-1-mt NCCL/default-CUDA-11.7 cuDNN/8.6.0.163-CUDA-11.7 Python CMake +if [ "$sysN" = 'hdfml' ] ; then ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 cont1=true else @@ -108,4 +99,10 @@ fi pip install --upgrade pip pip install -e .[dev] +# install legacy version of keras (2.16) +# Since TF 2.16, keras updated to 3.3, +# which leads to an error when more than 1 node is used +# https://keras.io/getting_started/ +pip3 install tf_keras + # eof From 23b2aa01963ef2b47c7d08d39a51e5a6f2add59a Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 11:50:29 +0200 Subject: [PATCH 164/171] Fixes to TF new version errors --- src/itwinai/tensorflow/distributed.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index 64945ca8..322ba940 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -1,4 +1,5 @@ import os +os.environ["TF_USE_LEGACY_KERAS"] = "1" import tensorflow as tf import tensorflow.distribute as dist From d9c0cd42bc74ba98defcef79b9861c053a028b91 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 11:52:43 +0200 Subject: [PATCH 165/171] Fixes to TF new version errors --- tutorials/distributed-ml/tf-tutorial-0-basics/train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py index ee29bca5..75a043aa 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py @@ -5,11 +5,15 @@ >>> sbatch tfmirrored_slurm.sh """ +import os +# Using legacy (2.16) version of Keras + # Latest version with TF (2.16) installs Keras 3.3 + # which returns an error for multi-node execution + os.environ["TF_USE_LEGACY_KERAS"] = "1" from typing import Any import argparse import tensorflow as tf from tensorflow import keras -import os from itwinai.tensorflow.distributed import get_strategy From 5f23fba48e649573cb5bce391ae91b206b92f1ce Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 11:54:45 +0200 Subject: [PATCH 166/171] Fixes to TF new version errors --- .../distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh index e1c8d54b..f19e744b 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh @@ -27,8 +27,8 @@ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY ml --force purge ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 -# set env -source /p/project/intertwin/rakesh/T6.5-AI-and-ML/dist_trainer/TF_runs/testAI_hdfml/bin/activate +# set env - change to location of your environment +source itwinai/envAItf_hdfml/bin/activate # sleep a sec sleep 1 From d582cfdc46e548b69b75279031f566141d579a7b Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 16:33:07 +0200 Subject: [PATCH 167/171] Update distributed.py --- src/itwinai/tensorflow/distributed.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/itwinai/tensorflow/distributed.py b/src/itwinai/tensorflow/distributed.py index 322ba940..64945ca8 100644 --- a/src/itwinai/tensorflow/distributed.py +++ b/src/itwinai/tensorflow/distributed.py @@ -1,5 +1,4 @@ import os -os.environ["TF_USE_LEGACY_KERAS"] = "1" import tensorflow as tf import tensorflow.distribute as dist From b802845bd4063945c602c3a8f5aef6967bc9bdb9 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 16:34:07 +0200 Subject: [PATCH 168/171] Update tfmirrored_slurm.sh --- .../distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh index f19e744b..e7721868 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/tfmirrored_slurm.sh @@ -30,6 +30,11 @@ ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF # set env - change to location of your environment source itwinai/envAItf_hdfml/bin/activate +# Using legacy (2.16) version of Keras +# Latest version with TF (2.16) installs Keras 3.3 +# which returns an error for multi-node execution +export TF_USE_LEGACY_KERAS=1 + # sleep a sec sleep 1 From efea5f93ce63f66533b7ddb040f9e4ffb129f234 Mon Sep 17 00:00:00 2001 From: r-sarma <126173968+r-sarma@users.noreply.github.com> Date: Thu, 2 May 2024 16:34:23 +0200 Subject: [PATCH 169/171] Update train.py --- tutorials/distributed-ml/tf-tutorial-0-basics/train.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py index 75a043aa..4c0afd53 100644 --- a/tutorials/distributed-ml/tf-tutorial-0-basics/train.py +++ b/tutorials/distributed-ml/tf-tutorial-0-basics/train.py @@ -6,10 +6,6 @@ """ import os -# Using legacy (2.16) version of Keras - # Latest version with TF (2.16) installs Keras 3.3 - # which returns an error for multi-node execution - os.environ["TF_USE_LEGACY_KERAS"] = "1" from typing import Any import argparse import tensorflow as tf From a69ea8bd7e7cf19dd9a810970797ee178277c283 Mon Sep 17 00:00:00 2001 From: r-sarma Date: Thu, 2 May 2024 18:41:52 +0200 Subject: [PATCH 170/171] TF updates --- .vscode/settings.json | 5 +- .../tf-scaling-test-jube/README.md | 44 +++++ .../tf-scaling-test-jube/bench_plot.ipynb | 170 +++++++++++++++++ .../tf-scaling-test-jube/general_jobsys.xml | 140 ++++++++++++++ .../tf-scaling-test-jube/jube_ddp.sh | 72 ++++++++ .../tf-scaling-test-jube/train.py | 171 ++++++++++++++++++ .../tfmirrored_slurm.sh | 70 +++++++ .../tf-tutorial-1-imagenet/train.py | 171 ++++++++++++++++++ 8 files changed, 842 insertions(+), 1 deletion(-) create mode 100644 tutorials/distributed-ml/tf-scaling-test-jube/README.md create mode 100644 tutorials/distributed-ml/tf-scaling-test-jube/bench_plot.ipynb create mode 100644 tutorials/distributed-ml/tf-scaling-test-jube/general_jobsys.xml create mode 100644 tutorials/distributed-ml/tf-scaling-test-jube/jube_ddp.sh create mode 100644 tutorials/distributed-ml/tf-scaling-test-jube/train.py create mode 100644 tutorials/distributed-ml/tf-tutorial-1-imagenet/tfmirrored_slurm.sh create mode 100644 tutorials/distributed-ml/tf-tutorial-1-imagenet/train.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 08d06d81..32e0cccb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -59,5 +59,8 @@ "tests" ], "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": [ + "./src/itwinai" + ] } \ No newline at end of file diff --git a/tutorials/distributed-ml/tf-scaling-test-jube/README.md b/tutorials/distributed-ml/tf-scaling-test-jube/README.md new file mode 100644 index 00000000..bc2cab1c --- /dev/null +++ b/tutorials/distributed-ml/tf-scaling-test-jube/README.md @@ -0,0 +1,44 @@ +# Benchmarking tutorial using JUBE + +Benchmarking of itwinai can also be performed with the JUBE Benchmarking Environment from JSC. +The JUBE benchmarking tool is already setup in the environment files provided under `env-files`. + +## Source the environment + +Find the location of your environment file along with the module load commands, such as: + +```bash +ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 +source envAI_hdfml/bin/activate +``` + +## Run benchmark + +The benchmarks are defined in the `general_jobsys.xml` file. +One can specify the configurations in terms of parameters such as the number of nodes. +The benchmark can be simply launched with the command: + +```bash +jube run general_jobsys.xml +``` + +## Monitor status of benchmark run + +The status of the run can be monitored with: + +```bash +jube continue bench_run --id last +``` + +## Check results of the benchmark run + +The results can be viewed with: + +```bash +jube result -a bench_run --id last +``` + +This will create `result-csv.dat` file in the `results` folder. + +The scaling and efficiency plots can be generated with the `bench_plot.ipynb` file +which takes the `result-csv.dat` file as input. diff --git a/tutorials/distributed-ml/tf-scaling-test-jube/bench_plot.ipynb b/tutorials/distributed-ml/tf-scaling-test-jube/bench_plot.ipynb new file mode 100644 index 00000000..fda6cd13 --- /dev/null +++ b/tutorials/distributed-ml/tf-scaling-test-jube/bench_plot.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plot benchmark results of itwinai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os, pandas as pd, matplotlib.pyplot as plt, numpy as np\n", + "%matplotlib inline\n", + "pd.options.display.max_columns = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "plt.rcParams['figure.figsize'] = [12, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df = pd.read_csv('result-csv.dat',header=0)\n", + "df.rename(columns=lambda x: x.split('[')[0], inplace=True)\n", + "\n", + "# gpus\n", + "df[\"NGPUs\"] = df[\"Nnodes\"]*4\n", + "\n", + "# speedup\n", + "df[\"Speedup - ideal\"] = df[\"Nnodes\"].astype(float)\n", + "df[\"Speedup\"] = df[\"Naet\"].iloc[0] / df[\"Naet\"]\n", + "\n", + "# efficiency\n", + "df[\"Threadscaled Sim. Time / s\"] = df[\"Naet\"] * df[\"Nnodes\"] * df[\"Nworkers\"]\n", + "df[\"Efficiency\"] = df[\"Threadscaled Sim. Time / s\"].iloc[0] / df[\"Threadscaled Sim. Time / s\"]\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ax = df.pivot_table(index=[\"NGPUs\"], columns=[\"Nworkers\"], values=\"Naet\").plot(kind=\"bar\", title=\"Runtime behaviour\");\n", + "ax.set_ylabel(\"Epoch Time / s\");\n", + "ax_abs = ax\n", + "for p in ax.patches:\n", + " ax.annotate(\"{:.2f} s\".format(p.get_height()), (p.get_x() + p.get_width()/1.33, p.get_height() * 1.01), \\\n", + " color=\"dimgray\", horizontalalignment=\"center\", verticalalignment=\"bottom\", rotation=\"vertical\")\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scaling Behaviour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ax = df.pivot_table(index=[\"NGPUs\"], columns=[\"Nworkers\"], values=\"Speedup\").plot(style=\"*-\", \\\n", + " loglog=False, title=\"Scaling behaviour\", color=\"r\", legend=False);\n", + "ax.plot(df[\"NGPUs\"].values,df[\"Speedup - ideal\"].values,ls='dashed',lw=1.0,c='k',label=\"ideal\")\n", + "\n", + "ax.legend(ncol=1, title=\"(Nworkers)\")\n", + "ax.set_xticks(df[\"NGPUs\"].values)\n", + "ax.set_yticks(df[\"Speedup - ideal\"].values)\n", + "ax.set_ylabel(r'Speedup')\n", + "ax.set_xlim((0,np.amax(df[\"NGPUs\"].values+1)))\n", + "ax.set_ylim((0,np.amax(df[\"Speedup - ideal\"].values+1)))\n", + "\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Runtime Efficiencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ax = df.pivot_table(index=[\"NGPUs\"], columns=[\"Nworkers\"], values=\"Efficiency\").plot(kind=\"bar\", \\\n", + " legend=False, title=\"Runtime efficiency\")\n", + "ax.legend(ncol=1, title=\"(Ntasks, Ncells)\",loc=4)\n", + "ax.set_ylabel(\"Efficiency\");\n", + "for p, abs in zip(ax.patches, ax_abs.patches):\n", + " ax.annotate(\"{:.2f}\".format(p.get_height()), (p.get_x() + p.get_width()/1.33, p.get_height() * 1.01), \\\n", + " color=\"dimgray\", horizontalalignment=\"center\", verticalalignment=\"bottom\", rotation=\"vertical\")\n", + " ax.annotate(\"Abs: {:.1f} s\".format(abs.get_height()), (p.get_x() + p.get_width()/1.33, p.get_height() * 0.95), \\\n", + " color=\"white\", horizontalalignment=\"center\", verticalalignment=\"top\", rotation=\"vertical\")\n", + "ax.plot(df[\"NGPUs\"].values-8,df[\"Speedup - ideal\"].values*0+1,ls='dashed',lw=1.0,c='r',label=\"ideal\")\n", + "pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EOF" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tutorials/distributed-ml/tf-scaling-test-jube/general_jobsys.xml b/tutorials/distributed-ml/tf-scaling-test-jube/general_jobsys.xml new file mode 100644 index 00000000..6f981f57 --- /dev/null +++ b/tutorials/distributed-ml/tf-scaling-test-jube/general_jobsys.xml @@ -0,0 +1,140 @@ + + + + General benchmark script + + + + + 1,2,4,8 + + 8 + + train.py + + + + + if [ -f /etc/FZJ/systemname ]; then cat /etc/FZJ/systemname | tr -d "\n"; else uname -n | head -c 3; fi + sbatch + $iterNO + $iterNW + ready + jube_ddp.sh + + { "hdfml": 4, + }["${systemname}"] + + intertwin + + 04:00:00 + + { "hdfml": "batch", + }["${systemname}"] + + + 00:10:00 + + { "hdfml": "batch", + }["${systemname}"] + + + + + { + "hdfml": "ml ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12", + }["${systemname}"] + + source /p/project/intertwin/rakesh/repo_push/itwinai/envAItf_hdfml/bin/activate + { + "hdfml": "export CUDA_VISIBLE_DEVICES=0,1,2,3" + }["${systemname}"] + + + + + + $job_file + $script + + + + + + + + + + + + + + + + + + + + + paramset + executeset + envirset + files,sub_job + echo "nID: $jube_wp_id" + + $submit_cmd $job_file + + + + + + ${jube_wp_id} + ${nodes} + ${nnw} + \s*TIMER: total epoch time:\s+$jube_pat_wrd\s* + \s*TIMER: average epoch time:\s+$jube_pat_wrd\s* + ${avgEpochT} + + + + + pattern + + stdout + job.out + + + + + + analyse + + ID + Nnodes + Nworkers + calcTime + avgEpochT + Naet + memoryGPU +
+
+ + + + analyse + + ID + Nnodes + Nworkers + calcTime + avgEpochT + Naet + memoryGPU +
+
+ +
+
+ + + diff --git a/tutorials/distributed-ml/tf-scaling-test-jube/jube_ddp.sh b/tutorials/distributed-ml/tf-scaling-test-jube/jube_ddp.sh new file mode 100644 index 00000000..adafae78 --- /dev/null +++ b/tutorials/distributed-ml/tf-scaling-test-jube/jube_ddp.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=JUBE_DDP +#SBATCH --account=#ACC# +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=#TIMELIM# + +# configure node and process count on the CM +#SBATCH --partition=#QUEUE# +#SBATCH --nodes=#NODES# +#SBATCH --cpus-per-task=#NW# +#SBATCH --gpus-per-node=#NGPU# +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +set -x +unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY + +# set modules +ml --force purge +ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 + +# set env +source /p/project/intertwin/rakesh/repo_push/itwinai/envAItf_hdfml/bin/activate + +# Using legacy (2.16) version of Keras +# Latest version with TF (2.16) installs Keras 3.3 +# which returns an error for multi-node execution +export TF_USE_LEGACY_KERAS=1 + +# sleep a sec +sleep 1 + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +echo "DEBUG: SLURM_NODELIST: $SLURM_NODELIST" +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +dataDir='/p/scratch/intertwin/datasets/imagenet/' + +COMMAND="train.py" + +EXEC="$COMMAND \ + --data_dir $dataDir" + +srun python -u $EXEC + + +#eof diff --git a/tutorials/distributed-ml/tf-scaling-test-jube/train.py b/tutorials/distributed-ml/tf-scaling-test-jube/train.py new file mode 100644 index 00000000..4bd4ff58 --- /dev/null +++ b/tutorials/distributed-ml/tf-scaling-test-jube/train.py @@ -0,0 +1,171 @@ +""" + Show how to use TensorFlow MultiWorkerMirroredStrategy on itwinai. + for an Imagenet dataset + with SLURM: + >>> sbatch tfmirrored_slurm.sh + + """ +import argparse +import sys +from timeit import default_timer as timer + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Dense, GlobalAveragePooling2D +from tensorflow.keras.models import Model + +from itwinai.tensorflow.distributed import get_strategy + + +def parse_args(): + """ + Parse args + """ + parser = argparse.ArgumentParser(description='TensorFlow ImageNet') + + parser.add_argument( + "--strategy", "-s", type=str, + choices=['mirrored'], + default='mirrored' + ) + parser.add_argument( + "--data_dir", type=str, + default='./' + ) + parser.add_argument( + "--batch_size", type=int, + default=128 + ) + parser.add_argument( + "--epochs", type=int, + default=3 + ) + + args = parser.parse_args() + return args + + +def deserialization_fn(serialized_fn): + """Imagenet data processing + + Args: + serialized_example (Any): Input function + + Returns: + Any: Images and associated labels + """ + parsed_example = tf.io.parse_single_example( + serialized_fn, + features={ + 'image/encoded': tf.io.FixedLenFeature([], tf.string), + 'image/class/label': tf.io.FixedLenFeature([], tf.int64), + } + ) + image = tf.image.decode_jpeg(parsed_example['image/encoded'], channels=3) + image = tf.image.resize(image, (224, 224)) + label = tf.cast(parsed_example['image/class/label'], tf.int64) - 1 + return image, label + + +def tf_records_loader(files_path, shuffle=False): + """tf_records dataset reader + + Args: + files_path (String): Path to location of data + shuffle (bool, optional): If dataset should be shuffled. + Defaults to False. + + Returns: + tf.data.Dataset: Returns dataset to be trained + """ + datasets = tf.data.Dataset.from_tensor_slices(files_path) + datasets = datasets.shuffle(len(files_path)) if shuffle else datasets + datasets = datasets.flat_map(tf.data.TFRecordDataset) + datasets = datasets.map( + deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE) + return datasets + + +def main(): + args = parse_args() + + input_shape = (224, 224, 3) + num_classes = 1000 + + if args.strategy == 'mirrored': + strategy = get_strategy()[0] + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + with strategy.scope(): + base_model = keras.applications.ResNet50( + weights=None, + input_shape=input_shape, + include_top=False, + ) + + x = base_model.output + x = GlobalAveragePooling2D()(x) + x = Dense(1024, activation='relu')(x) + predictions = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=base_model.input, outputs=predictions) + + model.compile(loss=keras.losses.sparse_categorical_crossentropy, + optimizer=keras.optimizers.Adam(), + metrics=['accuracy'] + ) + + # scale batch size with number of workers + batch_size = args.batch_size * get_strategy()[1] + + dir_imagenet = args.data_dir+'imagenet-1K-tfrecords' + train_shard_suffix = 'train-*-of-01024' + test_shard_suffix = 'validation-*-of-00128' + + train_set_path = sorted( + tf.io.gfile.glob(dir_imagenet + f'/{train_shard_suffix}') + ) + test_set_path = sorted( + tf.io.gfile.glob(dir_imagenet + f'/{test_shard_suffix}') + ) + + train_dataset = tf_records_loader(train_set_path, shuffle=True) + test_dataset = tf_records_loader(test_set_path) + + train_dataset = train_dataset.batch( + batch_size).prefetch(tf.data.experimental.AUTOTUNE) + test_dataset = test_dataset.batch( + batch_size).prefetch(tf.data.experimental.AUTOTUNE) + + # distribute datasets among mirrored replicas + dist_train = strategy.experimental_distribute_dataset( + train_dataset + ) + dist_test = strategy.experimental_distribute_dataset( + test_dataset + ) + + # TODO: add callbacks to evaluate per epoch time + et = timer() + + # trains the model + model.fit(dist_train, epochs=args.epochs, steps_per_epoch=2000, verbose=10) + + print('TIMER: total epoch time:', + timer() - et, ' s') + print('TIMER: average epoch time:', + (timer() - et) / (args.epochs), ' s') + + test_scores = model.evaluate(dist_test, steps=100, verbose=5) + + print('Test loss:', test_scores[0]) + print('Test accuracy:', test_scores[1]) + + +if __name__ == "__main__": + main() + sys.exit() + +# eof diff --git a/tutorials/distributed-ml/tf-tutorial-1-imagenet/tfmirrored_slurm.sh b/tutorials/distributed-ml/tf-tutorial-1-imagenet/tfmirrored_slurm.sh new file mode 100644 index 00000000..7c886f14 --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-1-imagenet/tfmirrored_slurm.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# general configuration of the job +#SBATCH --job-name=TFTest +#SBATCH --account=intertwin +#SBATCH --mail-user= +#SBATCH --mail-type=ALL +#SBATCH --output=job.out +#SBATCH --error=job.err +#SBATCH --time=01:00:00 + +# configure node and process count on the CM +#SBATCH --partition=batch +#SBATCH --nodes=4 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gpus-per-node=4 +#SBATCH --exclusive + +# gres options have to be disabled for deepv +#SBATCH --gres=gpu:4 + +set -x +unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY + +# set modules +ml --force purge +ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py CMake cuDNN/8.9.5.29-CUDA-12 + +# set env +source /p/project/intertwin/rakesh/repo_push/itwinai/envAItf_hdfml/bin/activate + +# Using legacy (2.16) version of Keras +# Latest version with TF (2.16) installs Keras 3.3 +# which returns an error for multi-node execution +export TF_USE_LEGACY_KERAS=1 + +# sleep a sec +sleep 1 + +# job info +echo "DEBUG: TIME: $(date)" +echo "DEBUG: EXECUTE: $EXEC" +echo "DEBUG: SLURM_SUBMIT_DIR: $SLURM_SUBMIT_DIR" +echo "DEBUG: SLURM_JOB_ID: $SLURM_JOB_ID" +echo "DEBUG: SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" +echo "DEBUG: SLURM_NNODES: $SLURM_NNODES" +echo "DEBUG: SLURM_NTASKS: $SLURM_NTASKS" +echo "DEBUG: SLURM_TASKS_PER_NODE: $SLURM_TASKS_PER_NODE" +echo "DEBUG: SLURM_SUBMIT_HOST: $SLURM_SUBMIT_HOST" +echo "DEBUG: SLURMD_NODENAME: $SLURMD_NODENAME" +echo "DEBUG: CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" +echo "DEBUG: SLURM_NODELIST: $SLURM_NODELIST" +echo + +# set comm +export CUDA_VISIBLE_DEVICES="0,1,2,3" +export OMP_NUM_THREADS=1 +if [ "$SLURM_CPUS_PER_TASK" -gt 0 ] ; then + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +fi + +dataDir='/p/scratch/intertwin/datasets/imagenet/' + +COMMAND="train.py" + +EXEC="$COMMAND \ + --data_dir $dataDir" + +srun python -u $EXEC diff --git a/tutorials/distributed-ml/tf-tutorial-1-imagenet/train.py b/tutorials/distributed-ml/tf-tutorial-1-imagenet/train.py new file mode 100644 index 00000000..4bd4ff58 --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-1-imagenet/train.py @@ -0,0 +1,171 @@ +""" + Show how to use TensorFlow MultiWorkerMirroredStrategy on itwinai. + for an Imagenet dataset + with SLURM: + >>> sbatch tfmirrored_slurm.sh + + """ +import argparse +import sys +from timeit import default_timer as timer + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Dense, GlobalAveragePooling2D +from tensorflow.keras.models import Model + +from itwinai.tensorflow.distributed import get_strategy + + +def parse_args(): + """ + Parse args + """ + parser = argparse.ArgumentParser(description='TensorFlow ImageNet') + + parser.add_argument( + "--strategy", "-s", type=str, + choices=['mirrored'], + default='mirrored' + ) + parser.add_argument( + "--data_dir", type=str, + default='./' + ) + parser.add_argument( + "--batch_size", type=int, + default=128 + ) + parser.add_argument( + "--epochs", type=int, + default=3 + ) + + args = parser.parse_args() + return args + + +def deserialization_fn(serialized_fn): + """Imagenet data processing + + Args: + serialized_example (Any): Input function + + Returns: + Any: Images and associated labels + """ + parsed_example = tf.io.parse_single_example( + serialized_fn, + features={ + 'image/encoded': tf.io.FixedLenFeature([], tf.string), + 'image/class/label': tf.io.FixedLenFeature([], tf.int64), + } + ) + image = tf.image.decode_jpeg(parsed_example['image/encoded'], channels=3) + image = tf.image.resize(image, (224, 224)) + label = tf.cast(parsed_example['image/class/label'], tf.int64) - 1 + return image, label + + +def tf_records_loader(files_path, shuffle=False): + """tf_records dataset reader + + Args: + files_path (String): Path to location of data + shuffle (bool, optional): If dataset should be shuffled. + Defaults to False. + + Returns: + tf.data.Dataset: Returns dataset to be trained + """ + datasets = tf.data.Dataset.from_tensor_slices(files_path) + datasets = datasets.shuffle(len(files_path)) if shuffle else datasets + datasets = datasets.flat_map(tf.data.TFRecordDataset) + datasets = datasets.map( + deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE) + return datasets + + +def main(): + args = parse_args() + + input_shape = (224, 224, 3) + num_classes = 1000 + + if args.strategy == 'mirrored': + strategy = get_strategy()[0] + else: + raise NotImplementedError( + f"Strategy {args.strategy} is not recognized/implemented.") + + with strategy.scope(): + base_model = keras.applications.ResNet50( + weights=None, + input_shape=input_shape, + include_top=False, + ) + + x = base_model.output + x = GlobalAveragePooling2D()(x) + x = Dense(1024, activation='relu')(x) + predictions = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=base_model.input, outputs=predictions) + + model.compile(loss=keras.losses.sparse_categorical_crossentropy, + optimizer=keras.optimizers.Adam(), + metrics=['accuracy'] + ) + + # scale batch size with number of workers + batch_size = args.batch_size * get_strategy()[1] + + dir_imagenet = args.data_dir+'imagenet-1K-tfrecords' + train_shard_suffix = 'train-*-of-01024' + test_shard_suffix = 'validation-*-of-00128' + + train_set_path = sorted( + tf.io.gfile.glob(dir_imagenet + f'/{train_shard_suffix}') + ) + test_set_path = sorted( + tf.io.gfile.glob(dir_imagenet + f'/{test_shard_suffix}') + ) + + train_dataset = tf_records_loader(train_set_path, shuffle=True) + test_dataset = tf_records_loader(test_set_path) + + train_dataset = train_dataset.batch( + batch_size).prefetch(tf.data.experimental.AUTOTUNE) + test_dataset = test_dataset.batch( + batch_size).prefetch(tf.data.experimental.AUTOTUNE) + + # distribute datasets among mirrored replicas + dist_train = strategy.experimental_distribute_dataset( + train_dataset + ) + dist_test = strategy.experimental_distribute_dataset( + test_dataset + ) + + # TODO: add callbacks to evaluate per epoch time + et = timer() + + # trains the model + model.fit(dist_train, epochs=args.epochs, steps_per_epoch=2000, verbose=10) + + print('TIMER: total epoch time:', + timer() - et, ' s') + print('TIMER: average epoch time:', + (timer() - et) / (args.epochs), ' s') + + test_scores = model.evaluate(dist_test, steps=100, verbose=5) + + print('Test loss:', test_scores[0]) + print('Test accuracy:', test_scores[1]) + + +if __name__ == "__main__": + main() + sys.exit() + +# eof From 71f79cb522045e1d928a7c9c9daf0209621c39f8 Mon Sep 17 00:00:00 2001 From: r-sarma Date: Thu, 2 May 2024 18:45:16 +0200 Subject: [PATCH 171/171] Add README --- .../tf-tutorial-1-imagenet/README.md | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tutorials/distributed-ml/tf-tutorial-1-imagenet/README.md diff --git a/tutorials/distributed-ml/tf-tutorial-1-imagenet/README.md b/tutorials/distributed-ml/tf-tutorial-1-imagenet/README.md new file mode 100644 index 00000000..c2c49595 --- /dev/null +++ b/tutorials/distributed-ml/tf-tutorial-1-imagenet/README.md @@ -0,0 +1,20 @@ +# Tutorial: distributed strategies for Tensorflow + +In this tutorial we show how to use Tensorflow `MultiWorkerMirroredStrategy`. +Note that the environment is tested on the HDFML system at JSC. +For other systems, the module versions might need change accordingly. +Other strategies will be updated here. + +First, from the root of this repository, build the environment containing +Tensorflow. You can *try* with: + +```bash +# Creates a Python venv called envAItf_hdfml +make tf-gpu-jsc +``` + +If you want to distribute the code in `train.py`, run from terminal: + +```bash +sbatch tfmirrored_slurm.sh +```