Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Forkless SparseML Transformers] [Feature Branch] Setting Up The modification module #2046

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c278130
initial commit
dbogunowicz Feb 8, 2024
2114066
add omitted files
dbogunowicz Feb 8, 2024
8b51955
more edits
dbogunowicz Feb 8, 2024
8bd369d
[Forkless SparseML Transformers] Updating SparseML to be compatible w…
dbogunowicz Feb 26, 2024
80c185c
[Forkless SparseML Transformers] [Feature Branch] Modding `Mistral` (…
dbogunowicz Feb 26, 2024
7f146b4
[Forkless SparseML Transformers] [Feature Branch] Modding `OPT` (#2051)
dbogunowicz Feb 26, 2024
1717fa8
consolidate tests
dbogunowicz Feb 28, 2024
a97a60f
small improvements according to the PR comments
dbogunowicz Feb 28, 2024
ac3d460
beautification
dbogunowicz Feb 28, 2024
8e40c20
fix failing export tests
dbogunowicz Feb 28, 2024
113d97f
pin pytest version to run tests in GHA
Mar 4, 2024
5196960
fix tests and add modify_model functionality to non-llms models
Mar 4, 2024
366ad56
swap recursion for iteration (swap_module function)
Mar 4, 2024
7afff08
working on transformers tests (fixing out of space error) 1
Mar 4, 2024
fcb04a6
Add a checker for transformers version
Mar 11, 2024
c2570ec
Add few missing pieces that got omitted during the refactor
dbogunowicz Mar 11, 2024
ad92492
bring back Sara's commit (somehow got lost during the rebasing)
Mar 11, 2024
b36e0be
PR review changes
Mar 11, 2024
4f80c88
Merge branch 'main' into feature/damian/forkless_transformer_feature
dbogunowicz Mar 11, 2024
b337b8f
fix tests
Mar 11, 2024
aba8da0
Merge branch 'main' into feature/damian/forkless_transformer_feature
dbogunowicz Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 64 additions & 36 deletions tests/sparseml/transformers/sparsification/modification/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy

import pytest
from transformers import AutoConfig, AutoModel

from accelerate import init_empty_weights
from sparseml.transformers import (
SparseAutoConfig,
SparseAutoModel,
SparseAutoModelForCausalLM,
)
from sparsezoo import Model
from sparseml.transformers import SparseAutoConfig, SparseAutoModelForCausalLM
from sparseml.transformers.sparsification.modification import modify_model


@pytest.fixture
Expand Down Expand Up @@ -55,36 +53,6 @@ def llama_zoo_model():
return model


@pytest.fixture
def distilbert_zoo_model(tmp_path):
stub = "zoo:distilbert-squad_wikipedia_bookcorpus-pruned80.4block_quantized"
model_path = Model(stub, tmp_path).training.path
model = SparseAutoModel.question_answering_from_pretrained(
model_path, model_type="model"
)
return model


@pytest.fixture
def mobilebert_zoo_model(tmp_path):
stub = "zoo:mobilebert-squad_wikipedia_bookcorpus-14layer_pruned50.4block_quantized"
model_path = Model(stub, tmp_path).training.path
model = SparseAutoModel.question_answering_from_pretrained(
model_path, model_type="model"
)
return model


@pytest.fixture
def bert_zoo_model(tmp_path):
stub = "zoo:bert-base-squad_wikipedia_bookcorpus-pruned95.obs_quantized"
model_path = Model(stub, tmp_path).training.path
model = SparseAutoModel.question_answering_from_pretrained(
model_path, model_type="model"
)
return model


@pytest.fixture
def bert_model():
config = AutoConfig.from_pretrained("bert-base-uncased")
Expand Down Expand Up @@ -133,3 +101,63 @@ def opt_model():
with init_empty_weights():
model = AutoModel.from_config(config)
return model


@pytest.fixture
def helpers():
return Helpers


class Helpers:
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
@staticmethod
def check_model_modified(
original_model_, module_to_replace, func_to_validate_replacement
):
num_attn_blocks = original_model_.config.num_hidden_layers

original_model = deepcopy(original_model_)
modified_model = modify_model(original_model_)

modified_modules_original_model = [
module
for module in original_model.modules()
if func_to_validate_replacement(module)
and isinstance(module, module_to_replace)
]

modified_modules_modified_model = [
module
for module in modified_model.modules()
if func_to_validate_replacement(module)
and isinstance(module, module_to_replace)
]

original_modules_original_model = [
module
for module in original_model.modules()
if not func_to_validate_replacement(module)
and isinstance(module, module_to_replace)
]

original_modules_modified_model = [
module
for module in modified_model.modules()
if not func_to_validate_replacement(module)
and isinstance(module, module_to_replace)
]

# make sure that the original model has no modified modules
# and that the modified model has no original modules
assert (
len(modified_modules_original_model)
== len(original_modules_modified_model)
== 0
)
# make sure that the original model has N original modules
# and that the modified model has N modified modules
# where N is the number of transformer's attention blocks
assert (
len(modified_modules_modified_model)
== len(original_modules_original_model)
== num_attn_blocks
)
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def __init__(self):


def test_modify_model_without_actual_modification(model):

is_modified = copy(model.modified)
model = modify_model(model)
assert model.modified == is_modified == False # noqa E712
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,70 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy

from sparseml.transformers.sparsification.modification import modify_model
from transformers.models.bert.modeling_bert import BertSelfAttention


def test_modifying_bert(bert_model):
def test_modifying_bert(bert_model, helpers):
from sparseml.transformers.sparsification.modification.modifying_bert import ( # noqa F401
modify,
)

num_attn_blocks = bert_model.config.num_hidden_layers

# keep the original model for comparison
bert_ = deepcopy(bert_model)
bert = modify_model(bert_model)

# check how many modified "BertSelfAttention" modules are in the original
# model (should be 0, as the model is not modified yet)
modified_modules_original_model = [
module
for module in bert_.modules()
if _is_bert_attention_modified(module)
and module.__class__.__name__ == "BertSelfAttention"
]
# check how many modified "BertSelfAttention" modules are
# in the modified model (should be num_attn_blocks, as the
# model is modified, and has num_attn_blocks attention blocks)
modified_modules_modified_model = [
module
for module in bert.modules()
if _is_bert_attention_modified(module)
and module.__class__.__name__ == "BertSelfAttention"
]
# check how many original "BertSelfAttention"
# modules are in the original
# model (should be num_attn_blocks, as the model is
# not modified yet, and has num_attn_blocks attention blocks)
original_modules_original_model = [
module
for module in bert_.modules()
if not _is_bert_attention_modified(module)
and module.__class__.__name__ == "BertSelfAttention"
]
# check how many original "BertSelfAttention"
# modules are in the modified
# model (should be 0, as the model is
# modified, and should not contain any original
# "BertSelfAttention" modules)
original_modules_modified_model = [
module
for module in bert.modules()
if not _is_bert_attention_modified(module)
and module.__class__.__name__ == "BertSelfAttention"
]

assert (
len(modified_modules_original_model)
== len(original_modules_modified_model)
== 0
)
assert (
len(modified_modules_modified_model)
== len(original_modules_original_model)
== num_attn_blocks
helpers.check_model_modified(
bert_model,
module_to_replace=BertSelfAttention,
func_to_validate_replacement=_is_bert_attention_modified,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,105 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy
from transformers.models.distilbert.modeling_distilbert import MultiHeadSelfAttention

import pytest
from transformers import AutoConfig, AutoModel

from accelerate import init_empty_weights
from sparseml.pytorch.optim.manager import ScheduledModifierManager
from sparseml.transformers.sparsification.modification import modify_model


@pytest.fixture
def distilbert_recipe():
return """version: 1.1.0
stage_test:
stage_test_modifiers:
- !QuantizationModifier
exclude_module_types: ['QATMatMul']"""


@pytest.fixture
def distilbert_model():
config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased")
with init_empty_weights():
model = AutoModel.from_config(config)
return model


def test_modifying_distilbert(distilbert_model):
def test_modifying_distilbert(distilbert_model, helpers):
from sparseml.transformers.sparsification.modification.modifying_distilbert import ( # noqa F401
modify,
)

num_attn_blocks = distilbert_model.config.num_hidden_layers

# keep the original model for comparison
distilbert_ = deepcopy(distilbert_model)
distilbert = modify_model(distilbert_model)

# check how many modified "MultiHeadSelfAttention" modules are in the original
# model (should be 0, as the model is not modified yet)
modified_modules_original_model = [
module
for module in distilbert_.modules()
if _is_distilbert_attention_modified(module)
and module.__class__.__name__ == "MultiHeadSelfAttention"
]
# check how many modified "MultiHeadSelfAttention" modules are
# in the modified model (should be num_attn_blocks, as the
# model is modified, and has num_attn_blocks attention blocks)
modified_modules_modified_model = [
module
for module in distilbert.modules()
if _is_distilbert_attention_modified(module)
and module.__class__.__name__ == "MultiHeadSelfAttention"
]
# check how many original "MultiHeadSelfAttention"
# modules are in the original
# model (should be num_attn_blocks, as the model is
# not modified yet, and has num_attn_blocks attention blocks)
original_modules_original_model = [
module
for module in distilbert_.modules()
if not _is_distilbert_attention_modified(module)
and module.__class__.__name__ == "MultiHeadSelfAttention"
]
# check how many original "MultiHeadSelfAttention"
# modules are in the modified
# model (should be 0, as the model is
# modified, and should not contain any original
# "MultiHeadSelfAttention" modules)
original_modules_modified_model = [
module
for module in distilbert.modules()
if not _is_distilbert_attention_modified(module)
and module.__class__.__name__ == "MultiHeadSelfAttention"
]

assert (
len(modified_modules_original_model)
== len(original_modules_modified_model)
== 0
)
assert (
len(modified_modules_modified_model)
== len(original_modules_original_model)
== num_attn_blocks
)


def test_apply_recipe(distilbert_recipe, distilbert_zoo_model):
from sparseml.transformers.sparsification.modification.modifying_distilbert import ( # noqa F401
modify,
helpers.check_model_modified(
distilbert_model,
module_to_replace=MultiHeadSelfAttention,
func_to_validate_replacement=_is_distilbert_attention_modified,
)

manager = ScheduledModifierManager.from_yaml(distilbert_recipe)
distilbert_zoo_model.train()
manager.apply_structure(distilbert_zoo_model)
assert True


def _is_distilbert_attention_modified(module):
# only the modified "MultiHeadSelfAttention" modules have the
Expand Down
Loading