diff --git a/sparsimony/pruners/unstructured.py b/sparsimony/pruners/unstructured.py
index 4ba40b7..342340f 100644
--- a/sparsimony/pruners/unstructured.py
+++ b/sparsimony/pruners/unstructured.py
@@ -5,6 +5,7 @@
 
 from sparsimony.pruners.base import BasePruner, BaseGrower
 
+_EPS=0.001
 
 class UnstructuredRandomPruner(BasePruner):
     """Pruning method that randomly prunes tensor."""
@@ -29,7 +30,7 @@ def calculate_mask(
         """
         n_drop = int(mask.sum() * prune_ratio)
         scores = torch.where(
-            mask == 1, torch.abs(torch.rand_like(mask)), torch.zeros_like(mask)
+            mask == 1, torch.abs(torch.rand_like(mask)+_EPS), torch.zeros_like(mask)
         )
         if dist.is_initialized():
             dist.all_reduce(scores, dist.ReduceOp.AVG, async_op=False)
@@ -75,7 +76,7 @@ def calculate_mask(
         n_grow = cls.get_n_grow(sparsity, mask)
         scores = torch.where(
             mask == 0,
-            torch.abs(torch.rand_like(mask) + 0.1),  # small eps for avoiding 0s
+            torch.abs(torch.rand_like(mask) + _EPS),  # small eps for avoiding 0s
             torch.zeros_like(mask),
         )
         if dist.is_initialized():
@@ -97,12 +98,12 @@ def calculate_mask(
     ) -> torch.Tensor:
         if grads is None:
             # Randomly grow
-            grads = torch.rand_like(mask)
+            grads = torch.rand_like(mask)+_EPS
         n_grow = cls.get_n_grow(sparsity, mask)
 
         # Set scores of active params to 0
         scores = torch.where(
-            mask == 0, torch.abs(grads), torch.full_like(grads, -1)
+            mask == 0, torch.abs(grads)+_EPS, torch.full_like(grads, -1)
         )
         if dist.is_initialized():
             dist.all_reduce(scores, dist.ReduceOp.AVG, async_op=False)
diff --git a/tests/sparsimony/test_dst_mixin.py b/tests/sparsimony/test_dst_mixin.py
index 6d56732..9b7dfd6 100644
--- a/tests/sparsimony/test_dst_mixin.py
+++ b/tests/sparsimony/test_dst_mixin.py
@@ -13,12 +13,14 @@
         (5, 5),  # 5x5 mask and initial sparsity of 20%
         # (32, 3, 3),  # 32x3x3 mask and initial sparsity of 90% # TODO: Conv
         (768, 3072),  # 768x3072 mask and initial sparsity of 99%
+        # (768, 670091),
     ],
     ids=[
         "10x10",
         "5x5",
         # "32x3x3",
         "768x3072",
+        # "768x670091",
     ],
 )
 def model(request):
@@ -38,7 +40,7 @@ def id_fn(sparsity):
 
 
 @pytest.mark.parametrize(
-    "sparsity", [0.0, 0.1, 0.5, 0.75, 0.9, 0.99], ids=id_fn
+    "sparsity", [0.0, 0.1, 0.5, 0.75, 0.83, 0.9, 0.99], ids=id_fn
 )
 def test_zero_inactive_param_momentum_buffers_sgd(model, sparsity):
     # Create a mock Linear layer and optimizer
@@ -76,7 +78,7 @@ def test_zero_inactive_param_momentum_buffers_sgd(model, sparsity):
 
 
 @pytest.mark.parametrize(
-    "sparsity", [0.0, 0.1, 0.5, 0.75, 0.9, 0.99], ids=id_fn
+    "sparsity", [0.0, 0.1, 0.5, 0.75, 0.83, 0.9, 0.99], ids=id_fn
 )
 def test_zero_inactive_param_momentum_buffers_adamw(model, sparsity):
     optimizer = optim.AdamW(model.parameters(), lr=0.1)