neuralmagic · dbogunowicz · Feb 8, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 26, 2024
diff --git a/...ions/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md b/...ions/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md
@@ -77,7 +77,7 @@ With the models downloaded, we will set up the Hugging Face `tokenizer`, `config
 We instantiate these classes by passing the local path to the directory containing the `pytorch_model.bin`, `tokenizer.json`, and `config.json` files from the SparseZoo download.
 
 ```python
-from sparseml.transformers.utils import SparseAutoModel
+from sparseml.transformers import SparseAutoModel
 from transformers import AutoModelForSequenceClassification, AutoConfig, AutoTokenizer
 
 NUM_LABELS = 2

diff --git a/setup.py b/setup.py
@@ -73,8 +73,7 @@
     "opencv-python<=4.6.0.66",
 ]
 _transformers_deps = _pytorch_deps + [
-    f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}"
-    f"~={version_nm_deps}",
+    "transformers<4.37",
     "datasets<=2.14.6",
     "dvc",
     "scikit-learn",

diff --git a/src/sparseml/evaluation/integrations/perplexity.py b/src/sparseml/evaluation/integrations/perplexity.py
@@ -14,8 +14,7 @@
 
 from typing import List, Optional, Union
 
-from sparseml.transformers.utils.sparse_model import SparseAutoModelForCausalLM
-from sparseml.transformers.utils.sparse_tokenizer import SparseAutoTokenizer
+from sparseml.transformers import SparseAutoModelForCausalLM, SparseAutoTokenizer
 
 
 try:

diff --git a/src/sparseml/experimental/sparsegpt/examples/llama2/recipes/llama_recipe.yaml b/src/sparseml/experimental/sparsegpt/examples/llama2/recipes/llama_recipe.yaml
@@ -11,7 +11,7 @@ quantization_modifiers:
     ignore:
       - LlamaRotaryEmbedding
       - LlamaRMSNorm
-      - SiLUActivation
+      - SiLU
       - model.layers.0.mlp.down_proj
       - model.layers.1.mlp.down_proj
       - model.layers.2.mlp.down_proj

diff --git a/src/sparseml/pytorch/sparsification/pruning/modifier_pruning_layer.py b/src/sparseml/pytorch/sparsification/pruning/modifier_pruning_layer.py
@@ -30,7 +30,7 @@
     ScheduledModifier,
     ScheduledUpdateModifier,
 )
-from sparseml.pytorch.utils import get_layer, get_prunable_layers, replace_layer
+from sparseml.pytorch.utils import get_layer, get_prunable_layers, swap_modules
 from sparseml.pytorch.utils.logger import BaseLogger
 from sparseml.sparsification import SparsificationTypes
 from sparseml.utils import ALL_PRUNABLE_TOKEN, ALL_TOKEN, validate_str_iterable
@@ -219,11 +219,11 @@ def _check_update_pruning(self, module: Module, epoch: float, steps_per_epoch: i
             epoch >= self.start_epoch or self.start_epoch == -1
         ):
             for name in list(self._layer_modules.keys()):
-                self._layer_modules[name] = replace_layer(module, name, Identity())
+                self._layer_modules[name] = swap_modules(module, name, Identity())
             self._layers_replaced = True
 
         if self._layers_replaced and (epoch >= self.end_epoch and self.end_epoch != -1):
             for name, replaced in self._layer_modules.items():
-                replace_layer(module, name, replaced)
+                swap_modules(module, name, replaced)
                 self._layer_modules[name] = None
             self._layers_replaced = False
diff --git a/src/sparseml/pytorch/utils/helpers.py b/src/sparseml/pytorch/utils/helpers.py
@@ -85,12 +85,12 @@
     "tensor_sample",
     "mask_difference",
     "get_layer",
-    "replace_layer",
     "get_terminal_layers",
     "get_conv_layers",
     "get_linear_layers",
     "get_prunable_layers",
     "get_quantizable_layers",
+    "swap_modules",
     "get_named_layers_and_params_by_regex",
     "any_str_or_regex_matches_param_name",
     "NamedLayerParam",
@@ -725,31 +725,6 @@ def get_layer(name: str, module: Module) -> Module:
     return layer
 
 
-def replace_layer(
-    module: Module,
-    name: str,
-    replace: Module,
-) -> Module:
-    """
-    General function to replace a layer in a module with the given new one.
-
-    :param module: the module to replace the layer in
-    :param name: the name of the layer to replace the activation for
-    :param replace: the module to replace the layer with
-    :return: the original layer that was replaced
-    """
-    parent = module
-    sections = name.split(".")
-
-    for sec in sections[:-1]:
-        parent = parent.__getattr__(sec)
-
-    cur = parent.__getattr__(sections[-1])
-    parent.__setattr__(sections[-1], replace)
-
-    return cur
-
-
 def get_terminal_layers(module: Module) -> Dict[str, Module]:
     """
     :param module: the module to grab all terminal layers for
@@ -1248,3 +1223,38 @@ def _exe_input(_, inp, out):
     for h in handles:
         h.remove()
     return order
+
+
+def swap_modules(
+    module: torch.nn.Module, submodule_name: str, submodule_to_replace: torch.nn.Module
+) -> torch.nn.Module:
+    """
+    Iteratively unfold the submodules of the module according to the submodule_name
+    to eventually replace the leaf submodule (accessed from the module through the
+    submodule_name) with the submodule_to_replace.
+
+    E.g
+    ```
+    swap_modules(module=Model,
+                 module_name="layers.0.sublayer",
+                 module_to_replace=ReplaceModule
+                 )
+    ```
+    this will iteratively traverse through the submodules
+    'layers' -> '0' -> to eventually replace 'sublayer' with ReplaceModule
+
+    :param module: the module to replace with the module_to_replace
+    :param submodule_name: the name of the module to replace
+    :param submodule_to_replace: the module to replace the module with
+    :return: the replaced module
+    """
+    parent = module
+    sections = submodule_name.split(".")
+
+    for sec in sections[:-1]:
+        parent = parent.__getattr__(sec)
+
+    cur = parent.__getattr__(sections[-1])
+    parent.__setattr__(sections[-1], submodule_to_replace)
+
+    return cur
diff --git a/src/sparseml/transformers/__init__.py b/src/sparseml/transformers/__init__.py
@@ -17,45 +17,21 @@
 """
 
 # flake8: noqa
-
-import logging as _logging
-
 from sparseml.analytics import sparseml_analytics as _analytics
+from sparseml.transformers.base import check_transformers_install
 
-
-try:
-    import datasets as _datasets
-    import transformers as _transformers
-except ImportError:
-    raise ImportError("Please install sparseml[transformers] to use this pathway")
-
-
+check_transformers_install()
 _analytics.send_event("python__transformers__init")
 
 
-_LOGGER = _logging.getLogger(__name__)
-
-
-def _check_transformers_install():
-    # check for NM integration in transformers version
-    import transformers as _transformers
-
-    if not getattr(_transformers, "NM_INTEGRATED", False):
-        message = (
-            "****************************************************************\n"
-            "WARNING: It appears that the Neural Magic fork of Transformers is not installed!\n"
-            "This is CRITICAL for the proper application of quantization in SparseML flows.\n\n"
-            "To resolve this, please run: `pip uninstall transformers;pip install nm-transformers`\n"
-            "Failing to do so is UNSUPPORTED and may significantly affect model performance.\n"
-            "****************************************************************"
-        )
-        _LOGGER.warning(message)
-
-
-_check_transformers_install()
-
 # isort: skip_file
 # (import order matters for circular import avoidance)
 from .utils import *
+from .sparsification import (
+    SparseAutoModel,
+    SparseAutoModelForCausalLM,
+    SparseAutoConfig,
+    SparseAutoTokenizer,
+)
 from .export import *
 from .finetune import *
diff --git a/src/sparseml/transformers/base.py b/src/sparseml/transformers/base.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def check_transformers_install():
+    try:
+        import transformers  # noqa F401
+    except ImportError as transformers_err:
+        _LOGGER.warning(
+            "transformers dependency is not installed. "
+            "To install, run `pip sparseml[transformers]`"
+        )
+        raise transformers_err
diff --git a/src/sparseml/transformers/export.py b/src/sparseml/transformers/export.py
@@ -88,9 +88,8 @@
 from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
 from sparseml.pytorch.optim import ScheduledModifierManager
 from sparseml.pytorch.utils import export_onnx
-from sparseml.transformers import SparseAutoTokenizer
+from sparseml.transformers import SparseAutoModel, SparseAutoTokenizer
 from sparseml.transformers.sparsification import Trainer
-from sparseml.transformers.utils import SparseAutoModel
 from sparsezoo.utils.onnx import EXTERNAL_ONNX_DATA_NAME
 
 

diff --git a/src/sparseml/transformers/finetune/callbacks.py b/src/sparseml/transformers/finetune/callbacks.py
@@ -131,11 +131,6 @@ def disable_amp(self, epoch: float):
 
         :param epoch: epoch to disable from
         """
-        if not self.on_begin_called:
-            # disable if training loops haven't started so we don't load
-            # the empty scaler state dict and instead disable it from the start
-            self.trainer.use_cuda_amp = False
-
         if hasattr(self.trainer, "scaler"):
             self.trainer.scaler._enabled = False
 

diff --git a/src/sparseml/transformers/finetune/session_mixin.py b/src/sparseml/transformers/finetune/session_mixin.py
@@ -388,13 +388,7 @@ def evaluate(self, *args, **kwargs):
         """
         self.initialize_structure()
 
-        # Always evaluate w/ fp32 to be closer to DeepSparse
-        use_cuda_amp = self.use_cuda_amp
-        if not self.args.fp16_full_eval and not self.args.bf16_full_eval:
-            self.use_cuda_amp = False
-
         output = super().evaluate(*args, **kwargs)
-        self.use_cuda_amp = use_cuda_amp
         self.finalize_session()
 
         return output

diff --git a/src/sparseml/transformers/finetune/text_generation.py b/src/sparseml/transformers/finetune/text_generation.py
@@ -40,7 +40,10 @@
 from sparseml.transformers.finetune.runner import StageRunner
 from sparseml.transformers.finetune.trainer import Trainer
 from sparseml.transformers.finetune.training_args import TrainingArguments
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification.sparse_model import (
+    SparseAutoModel,
+    get_shared_tokenizer_src,
+)
 from sparseml.transformers.utils.helpers import detect_last_checkpoint
 
 

diff --git a/src/sparseml/transformers/finetune/trainer.py b/src/sparseml/transformers/finetune/trainer.py
@@ -91,10 +91,6 @@ def save_optimizer_and_scheduler(self, output_dir: Optional[str] = None):
                         os.path.join(output_dir, "scheduler.pt"),
                     )
             reissue_pt_warnings(caught_warnings)
-            if self.use_cuda_amp:
-                torch.save(
-                    self.scaler.state_dict(), os.path.join(output_dir, "scaler.pt")
-                )
 
     def _save_checkpoint(self, model, trial, metrics=None):
         # Call into the save checkpoint by HF Transformers, which saves the

diff --git a/src/sparseml/transformers/masked_language_modeling.py b/src/sparseml/transformers/masked_language_modeling.py
@@ -54,8 +54,12 @@
 from transformers.utils.versions import require_version
 
 from sparseml.pytorch.utils.distributed import record
-from sparseml.transformers.sparsification import Trainer, TrainingArguments
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification import (
+    SparseAutoModel,
+    Trainer,
+    TrainingArguments,
+)
+from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src
 
 
 metadata_args = [

diff --git a/src/sparseml/transformers/modify/__init__.py b/src/sparseml/transformers/modify/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/sparseml/transformers/question_answering.py b/src/sparseml/transformers/question_answering.py
@@ -47,10 +47,11 @@
 from sparseml.pytorch.utils.distributed import record
 from sparseml.transformers.sparsification import (
     QuestionAnsweringTrainer,
+    SparseAutoModel,
     TrainingArguments,
     postprocess_qa_predictions,
 )
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src
 
 
 # You can also adapt this script on your own question answering task.

diff --git a/src/sparseml/transformers/sparsification/__init__.py b/src/sparseml/transformers/sparsification/__init__.py
@@ -20,5 +20,8 @@
 # flake8: noqa
 
 from .question_answering import *
+from .sparse_config import *
+from .sparse_model import *
+from .sparse_tokenizer import *
 from .trainer import *
 from .training_args import *
diff --git a/src/sparseml/transformers/sparsification/modification/__init__.py b/src/sparseml/transformers/sparsification/modification/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .modify_model import modify_model
+from .modifying_bert import *
+from .modifying_distilbert import *
+from .modifying_llama import *
+from .modifying_mistral import *
+from .modifying_mobilebert import *
+from .modifying_opt import *