Skip to content

Commit

Permalink
Apply isort and black reformatting
Browse files Browse the repository at this point in the history
Signed-off-by: dimapihtar <[email protected]>
  • Loading branch information
dimapihtar committed Jan 20, 2025
1 parent bfca05e commit af84c4e
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 30 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/llm/bert/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,4 @@ def average_losses_across_data_parallel_group(losses):
group=parallel_state.get_data_parallel_group()
)

return averaged_losses
return averaged_losses
16 changes: 6 additions & 10 deletions nemo/collections/llm/gpt/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@
# limitations under the License.

from dataclasses import dataclass
from typing import TYPE_CHECKING, Callable, Dict, Literal, Optional, Union, Any
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Union

import lightning.pytorch as L
import torch
import torch.distributed
from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import GPTInferenceWrapper
from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig
from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel
from megatron.core.optimizer import OptimizerConfig
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
from megatron.core.transformer.cuda_graphs import CudaGraphManager
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import TransformerBlockSubmodules, get_num_layers_to_build
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import BaseTransformerLayer
from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
from torch import nn
Expand Down Expand Up @@ -482,9 +482,7 @@ def __init__(
device: str = 'cuda',
**kwargs,
) -> None:
assert (
HAVE_TE
), "AutocastTransformerLayer requires Megatron Core and Transformer Engine to be installed."
assert HAVE_TE, "AutocastTransformerLayer requires Megatron Core and Transformer Engine to be installed."

transformer_layer_args = {
"hidden_size": hidden_size,
Expand Down Expand Up @@ -577,9 +575,7 @@ def forward(

class TETransformerLayerAutocast(AutocastTransformerLayer, BaseTransformerLayer):
def __init__(self, config, layer_number=1, hidden_dropout=None):
assert (
HAVE_TE
), "TETransformerLayerAutocast requires Megatron Core and Transformer Engine to be installed."
assert HAVE_TE, "TETransformerLayerAutocast requires Megatron Core and Transformer Engine to be installed."

self.config = config
self.is_first_microbatch = True
Expand Down
23 changes: 13 additions & 10 deletions nemo/collections/llm/gpt/model/gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Callable, Optional, Union

import torch
import math

from megatron.core import parallel_state, tensor_parallel
from megatron.core.tensor_parallel import ColumnParallelLinear
from megatron.core.extensions.transformer_engine import TENorm, TERowParallelLinear, TELayerNormColumnParallelLinear
from megatron.core.extensions.transformer_engine import TELayerNormColumnParallelLinear, TENorm, TERowParallelLinear
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.transformer import ModuleSpec, TransformerLayer, TransformerLayerSubmodules, MegatronModule, TransformerConfig
from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.tensor_parallel import ColumnParallelLinear
from megatron.core.transformer import (
MegatronModule,
ModuleSpec,
TransformerConfig,
TransformerLayer,
TransformerLayerSubmodules,
)
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax
from megatron.core.transformer.utils import attention_mask_func
from megatron.core.utils import divide

from torch import nn
from torch import Tensor
from torch import Tensor, nn

from nemo.collections.llm.fn.activation import openai_gelu
from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/llm/quantization/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,4 @@ def torch_dtype_from_precision(precision: Union[int, str]) -> torch.dtype:
elif precision in [32, '32', '32-true']:
return torch.float32
else:
raise ValueError(f"Could not parse the precision of `{precision}` to a valid torch.dtype")
raise ValueError(f"Could not parse the precision of `{precision}` to a valid torch.dtype")
14 changes: 6 additions & 8 deletions nemo/collections/llm/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,9 @@
# limitations under the License.

from pathlib import Path

from typing import Optional

import torch

from nemo import lightning as nl
from nemo.collections import llm
from nemo.collections.llm.inference.base import _setup_trainer_and_restore_model
from nemo.lightning.ckpt_utils import ckpt_to_context_subdir
from nemo.utils import logging

from megatron.core.extensions.transformer_engine import TEDotProductAttention, TENorm
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.gpt.gpt_layer_specs import _get_mlp_module_spec
Expand All @@ -34,6 +26,12 @@
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules

from nemo import lightning as nl
from nemo.collections import llm
from nemo.collections.llm.inference.base import _setup_trainer_and_restore_model
from nemo.lightning.ckpt_utils import ckpt_to_context_subdir
from nemo.utils import logging


def get_modelopt_decoder_type(model: llm.GPTModel) -> str:
"""Infers the modelopt decoder type from GPTModel subclass."""
Expand Down

0 comments on commit af84c4e

Please sign in to comment.