Skip to content

Commit

Permalink
RF RunningMean
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Jan 3, 2025
1 parent ed5db53 commit 0dd1b3f
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions returnn/frontend/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"reduce_argmin",
"reduce_argmax",
"reduce_out",
"RunningMean",
"top_k",
]

Expand Down Expand Up @@ -181,6 +182,57 @@ def reduce_out(
return out


class RunningMean(rf.Module):
"""
Running mean, using exponential moving average, using the formula::
# E.g. for some input [B,T,F], reduce to [F], when the mean vector is [F].
new_value = reduce_mean(new_value, axis=[d for d in x.dims if d not in mean.dims])
new_mean = alpha * new_value + (1 - alpha) * old_mean
= old_mean + alpha * (new_value - old_mean) # more numerically stable
(Like the TF :class:`AccumulateMeanLayer`.)
(Similar is also the running mean in :class:`BatchNorm`.)
"""

def __init__(
self,
in_dim: Union[Dim, Sequence[Dim]],
*,
alpha: float,
dtype: Optional[str] = None,
is_prob_distribution: Optional[bool] = None,
):
"""
:param in_dim: the dim of the mean vector, or the shape.
:param alpha: factor for new_value. 0.0 means no update, 1.0 means always the new value.
Also called momentum. E.g. 0.1 is a common value, or less, like 0.001.
:param dtype: the dtype of the mean vector
:param is_prob_distribution: if True, will initialize the mean vector with 1/in_dim.
"""
super().__init__()
self.in_dim = in_dim
self.shape = (in_dim,) if isinstance(in_dim, Dim) else in_dim
assert all(isinstance(d, Dim) for d in self.shape)
self.alpha = alpha
self.is_prob_distribution = is_prob_distribution
self.mean = rf.Parameter(self.shape, dtype=dtype, auxiliary=True)
if is_prob_distribution:
assert in_dim.dimension is not None
self.mean.initial = 1.0 / in_dim.dimension

def __call__(self, x: Tensor) -> Tensor:
"""
:param x: shape [..., F]
:return: shape [F]
"""
assert all(d in self.shape for d in x.dims)
x_ = rf.reduce_mean(x, axis=[d for d in x.dims if d not in self.shape])
self.mean.assign_add(self.alpha * (x_ - self.mean))
return self.mean


# noinspection PyShadowingBuiltins
def top_k(
source: Tensor,
Expand Down

0 comments on commit 0dd1b3f

Please sign in to comment.