Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Forkless SparseML Transformers] [Feature Branch] Setting Up The modification module #2046

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c278130
initial commit
dbogunowicz Feb 8, 2024
2114066
add omitted files
dbogunowicz Feb 8, 2024
8b51955
more edits
dbogunowicz Feb 8, 2024
8bd369d
[Forkless SparseML Transformers] Updating SparseML to be compatible w…
dbogunowicz Feb 26, 2024
80c185c
[Forkless SparseML Transformers] [Feature Branch] Modding `Mistral` (…
dbogunowicz Feb 26, 2024
7f146b4
[Forkless SparseML Transformers] [Feature Branch] Modding `OPT` (#2051)
dbogunowicz Feb 26, 2024
1717fa8
consolidate tests
dbogunowicz Feb 28, 2024
a97a60f
small improvements according to the PR comments
dbogunowicz Feb 28, 2024
ac3d460
beautification
dbogunowicz Feb 28, 2024
8e40c20
fix failing export tests
dbogunowicz Feb 28, 2024
113d97f
pin pytest version to run tests in GHA
Mar 4, 2024
5196960
fix tests and add modify_model functionality to non-llms models
Mar 4, 2024
366ad56
swap recursion for iteration (swap_module function)
Mar 4, 2024
7afff08
working on transformers tests (fixing out of space error) 1
Mar 4, 2024
fcb04a6
Add a checker for transformers version
Mar 11, 2024
c2570ec
Add few missing pieces that got omitted during the refactor
dbogunowicz Mar 11, 2024
ad92492
bring back Sara's commit (somehow got lost during the rebasing)
Mar 11, 2024
b36e0be
PR review changes
Mar 11, 2024
4f80c88
Merge branch 'main' into feature/damian/forkless_transformer_feature
dbogunowicz Mar 11, 2024
b337b8f
fix tests
Mar 11, 2024
aba8da0
Merge branch 'main' into feature/damian/forkless_transformer_feature
dbogunowicz Mar 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ With the models downloaded, we will set up the Hugging Face `tokenizer`, `config
We instantiate these classes by passing the local path to the directory containing the `pytorch_model.bin`, `tokenizer.json`, and `config.json` files from the SparseZoo download.

```python
from sparseml.transformers.utils import SparseAutoModel
from sparseml.transformers import SparseAutoModel
from transformers import AutoModelForSequenceClassification, AutoConfig, AutoTokenizer

NUM_LABELS = 2
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@
"opencv-python<=4.6.0.66",
]
_transformers_deps = _pytorch_deps + [
f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}"
f"~={version_nm_deps}",
"transformers<4.37",
"datasets<=2.14.6",
"dvc",
"scikit-learn",
Expand Down
3 changes: 1 addition & 2 deletions src/sparseml/evaluation/integrations/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@

from typing import List, Optional, Union

from sparseml.transformers.utils.sparse_model import SparseAutoModelForCausalLM
from sparseml.transformers.utils.sparse_tokenizer import SparseAutoTokenizer
from sparseml.transformers import SparseAutoModelForCausalLM, SparseAutoTokenizer


try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ quantization_modifiers:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
- SiLU
- model.layers.0.mlp.down_proj
- model.layers.1.mlp.down_proj
- model.layers.2.mlp.down_proj
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
ScheduledModifier,
ScheduledUpdateModifier,
)
from sparseml.pytorch.utils import get_layer, get_prunable_layers, replace_layer
from sparseml.pytorch.utils import get_layer, get_prunable_layers, swap_modules
from sparseml.pytorch.utils.logger import BaseLogger
from sparseml.sparsification import SparsificationTypes
from sparseml.utils import ALL_PRUNABLE_TOKEN, ALL_TOKEN, validate_str_iterable
Expand Down Expand Up @@ -219,11 +219,11 @@ def _check_update_pruning(self, module: Module, epoch: float, steps_per_epoch: i
epoch >= self.start_epoch or self.start_epoch == -1
):
for name in list(self._layer_modules.keys()):
self._layer_modules[name] = replace_layer(module, name, Identity())
self._layer_modules[name] = swap_modules(module, name, Identity())
self._layers_replaced = True

if self._layers_replaced and (epoch >= self.end_epoch and self.end_epoch != -1):
for name, replaced in self._layer_modules.items():
replace_layer(module, name, replaced)
swap_modules(module, name, replaced)
self._layer_modules[name] = None
self._layers_replaced = False
62 changes: 36 additions & 26 deletions src/sparseml/pytorch/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@
"tensor_sample",
"mask_difference",
"get_layer",
"replace_layer",
"get_terminal_layers",
"get_conv_layers",
"get_linear_layers",
"get_prunable_layers",
"get_quantizable_layers",
"swap_modules",
"get_named_layers_and_params_by_regex",
"any_str_or_regex_matches_param_name",
"NamedLayerParam",
Expand Down Expand Up @@ -725,31 +725,6 @@ def get_layer(name: str, module: Module) -> Module:
return layer


def replace_layer(
module: Module,
name: str,
replace: Module,
) -> Module:
"""
General function to replace a layer in a module with the given new one.

:param module: the module to replace the layer in
:param name: the name of the layer to replace the activation for
:param replace: the module to replace the layer with
:return: the original layer that was replaced
"""
parent = module
sections = name.split(".")

for sec in sections[:-1]:
parent = parent.__getattr__(sec)

cur = parent.__getattr__(sections[-1])
parent.__setattr__(sections[-1], replace)

return cur


def get_terminal_layers(module: Module) -> Dict[str, Module]:
"""
:param module: the module to grab all terminal layers for
Expand Down Expand Up @@ -1248,3 +1223,38 @@ def _exe_input(_, inp, out):
for h in handles:
h.remove()
return order


def swap_modules(
module: torch.nn.Module, submodule_name: str, submodule_to_replace: torch.nn.Module
) -> torch.nn.Module:
"""
Iteratively unfold the submodules of the module according to the submodule_name
to eventually replace the leaf submodule (accessed from the module through the
submodule_name) with the submodule_to_replace.

E.g
```
swap_modules(module=Model,
module_name="layers.0.sublayer",
module_to_replace=ReplaceModule
)
```
this will iteratively traverse through the submodules
'layers' -> '0' -> to eventually replace 'sublayer' with ReplaceModule

:param module: the module to replace with the module_to_replace
:param submodule_name: the name of the module to replace
:param submodule_to_replace: the module to replace the module with
:return: the replaced module
"""
parent = module
sections = submodule_name.split(".")

for sec in sections[:-1]:
parent = parent.__getattr__(sec)

cur = parent.__getattr__(sections[-1])
parent.__setattr__(sections[-1], submodule_to_replace)

return cur
40 changes: 8 additions & 32 deletions src/sparseml/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,45 +17,21 @@
"""

# flake8: noqa

import logging as _logging

from sparseml.analytics import sparseml_analytics as _analytics
from sparseml.transformers.base import check_transformers_install


try:
import datasets as _datasets
import transformers as _transformers
except ImportError:
raise ImportError("Please install sparseml[transformers] to use this pathway")


check_transformers_install()
_analytics.send_event("python__transformers__init")


_LOGGER = _logging.getLogger(__name__)


def _check_transformers_install():
# check for NM integration in transformers version
import transformers as _transformers

if not getattr(_transformers, "NM_INTEGRATED", False):
message = (
"****************************************************************\n"
"WARNING: It appears that the Neural Magic fork of Transformers is not installed!\n"
"This is CRITICAL for the proper application of quantization in SparseML flows.\n\n"
"To resolve this, please run: `pip uninstall transformers;pip install nm-transformers`\n"
"Failing to do so is UNSUPPORTED and may significantly affect model performance.\n"
"****************************************************************"
)
_LOGGER.warning(message)


_check_transformers_install()

# isort: skip_file
# (import order matters for circular import avoidance)
from .utils import *
from .sparsification import (
SparseAutoModel,
SparseAutoModelForCausalLM,
SparseAutoConfig,
SparseAutoTokenizer,
)
from .export import *
from .finetune import *
29 changes: 29 additions & 0 deletions src/sparseml/transformers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging


_LOGGER = logging.getLogger(__name__)


def check_transformers_install():
try:
import transformers # noqa F401
except ImportError as transformers_err:
_LOGGER.warning(
"transformers dependency is not installed. "
"To install, run `pip sparseml[transformers]`"
)
raise transformers_err
3 changes: 1 addition & 2 deletions src/sparseml/transformers/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,8 @@
from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.utils import export_onnx
from sparseml.transformers import SparseAutoTokenizer
from sparseml.transformers import SparseAutoModel, SparseAutoTokenizer
from sparseml.transformers.sparsification import Trainer
from sparseml.transformers.utils import SparseAutoModel
from sparsezoo.utils.onnx import EXTERNAL_ONNX_DATA_NAME


Expand Down
5 changes: 0 additions & 5 deletions src/sparseml/transformers/finetune/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,6 @@ def disable_amp(self, epoch: float):

:param epoch: epoch to disable from
"""
if not self.on_begin_called:
# disable if training loops haven't started so we don't load
# the empty scaler state dict and instead disable it from the start
self.trainer.use_cuda_amp = False

if hasattr(self.trainer, "scaler"):
self.trainer.scaler._enabled = False

Expand Down
6 changes: 0 additions & 6 deletions src/sparseml/transformers/finetune/session_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,7 @@ def evaluate(self, *args, **kwargs):
"""
self.initialize_structure()

# Always evaluate w/ fp32 to be closer to DeepSparse
use_cuda_amp = self.use_cuda_amp
if not self.args.fp16_full_eval and not self.args.bf16_full_eval:
self.use_cuda_amp = False

output = super().evaluate(*args, **kwargs)
self.use_cuda_amp = use_cuda_amp
self.finalize_session()

return output
Expand Down
5 changes: 4 additions & 1 deletion src/sparseml/transformers/finetune/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@
from sparseml.transformers.finetune.runner import StageRunner
from sparseml.transformers.finetune.trainer import Trainer
from sparseml.transformers.finetune.training_args import TrainingArguments
from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
from sparseml.transformers.sparsification.sparse_model import (
SparseAutoModel,
get_shared_tokenizer_src,
)
from sparseml.transformers.utils.helpers import detect_last_checkpoint


Expand Down
4 changes: 0 additions & 4 deletions src/sparseml/transformers/finetune/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,6 @@ def save_optimizer_and_scheduler(self, output_dir: Optional[str] = None):
os.path.join(output_dir, "scheduler.pt"),
)
reissue_pt_warnings(caught_warnings)
if self.use_cuda_amp:
torch.save(
self.scaler.state_dict(), os.path.join(output_dir, "scaler.pt")
)

def _save_checkpoint(self, model, trial, metrics=None):
# Call into the save checkpoint by HF Transformers, which saves the
Expand Down
8 changes: 6 additions & 2 deletions src/sparseml/transformers/masked_language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@
from transformers.utils.versions import require_version

from sparseml.pytorch.utils.distributed import record
from sparseml.transformers.sparsification import Trainer, TrainingArguments
from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
from sparseml.transformers.sparsification import (
SparseAutoModel,
Trainer,
TrainingArguments,
)
from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src


metadata_args = [
Expand Down
13 changes: 13 additions & 0 deletions src/sparseml/transformers/modify/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
3 changes: 2 additions & 1 deletion src/sparseml/transformers/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@
from sparseml.pytorch.utils.distributed import record
from sparseml.transformers.sparsification import (
QuestionAnsweringTrainer,
SparseAutoModel,
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
TrainingArguments,
postprocess_qa_predictions,
)
from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src


# You can also adapt this script on your own question answering task.
Expand Down
3 changes: 3 additions & 0 deletions src/sparseml/transformers/sparsification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@
# flake8: noqa

from .question_answering import *
from .sparse_config import *
from .sparse_model import *
from .sparse_tokenizer import *
from .trainer import *
from .training_args import *
21 changes: 21 additions & 0 deletions src/sparseml/transformers/sparsification/modification/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .modify_model import modify_model
from .modifying_bert import *
from .modifying_distilbert import *
from .modifying_llama import *
from .modifying_mistral import *
from .modifying_mobilebert import *
from .modifying_opt import *
Loading
Loading