Merge branch 'main' into sa/marlin_24

neuralmagic · Jun 7, 2024 · bddcb09 · bddcb09
2 parents add8fbe + 3cd9a8c
commit bddcb09
Show file tree

Hide file tree

Showing 3 changed files with 129 additions and 14 deletions.
diff --git a/examples/llama7b_w4a16_quantization.ipynb b/examples/llama7b_w4a16_quantization.ipynb
@@ -153,7 +153,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.save_pretrained(\"/network/sadkins/llama1.1b_W4A16_channel_packed\", save_compressed=True)"
+    "model.save_pretrained(\"llama1.1b_W4A16_channel_packed\", save_compressed=True)"
    ]
   }
  ],

diff --git a/src/sparseml/transformers/compression/helpers.py b/src/sparseml/transformers/compression/helpers.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional
+
+import torch
+from tqdm import tqdm
+
+from sparseml.pytorch.utils import get_linear_layers
+
+
+__ALL__ = [
+    "tensor_follows_mask_structure",
+    "infer_sparsity_structure_from_stage_modifiers",
+    "infer_sparsity_structure_from_model",
+]
+
+
+def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
+    """
+    :param tensor: tensor to check
+    :param mask: mask structure to check for, in the format "n:m"
+    :return: True if the tensor follows the mask structure, False otherwise.
+        Note, some weights can incidentally be zero, so we check for
+        atleast n zeros in each chunk of size m
+    """
+
+    n, m = tuple(map(int, mask.split(":")))
+    # Reshape the tensor into chunks of size m
+    tensor = tensor.view(-1, m)
+
+    # Count the number of zeros in each chunk
+    zero_counts = (tensor == 0).sum(dim=1)
+
+    # Check if the number of zeros in each chunk atleast n
+    # Greater than sign is needed as some weights can incidentally
+    # be zero
+    return torch.all(zero_counts >= n).item()
+
+
+def infer_sparsity_structure_from_stage_modifiers(
+    stage_modifiers: List["StageModifier"],  # noqa E501
+) -> Optional[str]:
+    """
+    Determines the sparsity structure, if any exists, given the
+    list of stage modifiers
+
+    :param stage_modifiers: non-empty list of stage modifiers
+    :return: sparsity structure as a string or None
+    """
+    for stage in stage_modifiers:
+        if stage.applied:
+            for modifier in stage.modifiers:
+                if hasattr(modifier, "mask_structure"):
+                    sparsity_structure = modifier.mask_structure
+                    return sparsity_structure
+    return None
+
+
+def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]:
+    """
+    Determines the sparsity structure, if any exists, given the model
+
+    :param model: model to check for sparsity structure
+    :return: sparsity structure as a string or None
+    """
+
+    # check for the common sparsity structures
+    structures = {"2:4"}
+    for sparsity_structure in structures:
+        linear_modules = get_linear_layers(model)
+        linear_modules_with_sparsity_structure = [
+            tensor_follows_mask_structure(layer.weight)
+            for layer in tqdm(
+                linear_modules.values(),
+                desc="Checking whether model follows "
+                f"{sparsity_structure} sparsity structure",
+            )
+        ]
+        # if the majority of the linear modules follow the sparsity structure
+        # we can assume that the model follows the sparsity structure
+        # (taking into consideration the fact that some Linear layers like the
+        # embedding layer might not be sparse)
+        if (
+            sum(linear_modules_with_sparsity_structure)
+            > len(linear_modules_with_sparsity_structure) * 0.8
+        ):
+            return sparsity_structure
+
+    return None
diff --git a/src/sparseml/transformers/compression/sparsity_config.py b/src/sparseml/transformers/compression/sparsity_config.py
@@ -21,6 +21,10 @@
 from compressed_tensors import CompressionFormat, SparsityCompressionConfig
 from compressed_tensors.quantization.utils import is_model_quantized
 from sparseml.pytorch.utils import ModuleSparsificationInfo
+from sparseml.transformers.compression.helpers import (
+    infer_sparsity_structure_from_model,
+    infer_sparsity_structure_from_stage_modifiers,
+)
 
 
 class SparsityConfigMetadata:
@@ -47,26 +51,34 @@ def infer_global_sparsity(
         return global_sparsity
 
     @staticmethod
-    def infer_sparsity_structure() -> str:
+    def infer_sparsity_structure(model: Optional[Module] = None) -> str:
         """
-        Determines what sparsity structure, if any, was applied in the currently active
-        sparse session
+        Determines what sparsity structure, if any, was applied.
+
+        First, there is an attempt to dedue the sparsity structure
+        from the currently active sparse session.
+
+        If that fails, the sparsity structure is inferred from the
+        model (if provided)
+
+        Finally, if both fail, the sparsity structure is set to
+        "unstructured"
 
         :return: sparsity structure as a string
         """
+        sparsity_structure = None
+
         current_session = sparseml.active_session()
         stage_modifiers = current_session.lifecycle.modifiers
-        sparsity_structure = "unstructured"
+        if stage_modifiers:
+            sparsity_structure = infer_sparsity_structure_from_stage_modifiers(
+                stage_modifiers
+            )
 
-        # check for applied pruning modifiers
-        for stage in stage_modifiers:
-            if stage.applied:
-                for modifier in stage.modifiers:
-                    if hasattr(modifier, "mask_structure"):
-                        sparsity_structure = modifier.mask_structure
-                        break
+        if model and sparsity_structure is None:
+            sparsity_structure = infer_sparsity_structure_from_model(model)
 
-        return sparsity_structure
+        return sparsity_structure or "unstructured"
 
     @staticmethod
     def from_pretrained(
@@ -91,7 +103,9 @@ def from_pretrained(
         if global_sparsity < 0.05:
             return None
 
-        sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure()
+        sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
+            model=model
+        )
         if is_model_quantized(model):
             # compressing a sparse quantized model is not supported yet
             format = CompressionFormat.dense.value