From dcf81f5d636a0fe23e0f8733d9a8ef105273297b Mon Sep 17 00:00:00 2001 From: Ali Taghibakhshi Date: Mon, 16 Dec 2024 07:47:49 -0800 Subject: [PATCH] minor fix --- nemo/collections/llm/gpt/model/hyena.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nemo/collections/llm/gpt/model/hyena.py b/nemo/collections/llm/gpt/model/hyena.py index b0441a125dc05..a6a5bae1f1ccf 100644 --- a/nemo/collections/llm/gpt/model/hyena.py +++ b/nemo/collections/llm/gpt/model/hyena.py @@ -99,12 +99,17 @@ class HyenaConfig(TransformerConfig, io.IOMixin): remove_activation_post_first_layer: bool = True cross_entropy_loss_fusion: bool = True tp_comm_overlap: bool = True + bias_activation_fusion: bool = True + bias_dropout_add_fusion: bool = True def __post_init__(self): super().__post_init__() self.hyena_no_weight_decay_cond_fn = hyena_no_weight_decay_cond if self.hyena_filter_no_wd else None def configure_model(self, tokenizer) -> "MCoreHyenaModel": + + self.bias_activation_fusion = False if self.remove_activation_post_first_layer else self.bias_activation_fusion + model = MCoreHyenaModel( self, hyena_stack_spec=hyena_stack_spec,