Skip to content

Commit

Permalink
Merge pull request #5 from NorskRegnesentral/binary_segmentation
Browse files Browse the repository at this point in the history
Add Binary Segmentation type algorithms
  • Loading branch information
Tveten authored Jan 1, 2024
2 parents bfe5031 + 6be521f commit 623eac9
Show file tree
Hide file tree
Showing 21 changed files with 1,367 additions and 267 deletions.
2 changes: 1 addition & 1 deletion build_tools/make_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __init__(self, branch):
def action(self, context):
"""Carry out action."""
self.instruct(
f"Make sure you're on: {self.branch}, you're local "
f"Make sure you're on: {self.branch}, your local "
f"branch is up-to-date, and all new changes are merged "
f"in."
)
Expand Down
30 changes: 30 additions & 0 deletions interactive/explore_circular_binseg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import plotly.express as px
from streamchange.utils import Profiler

from skchange.anomaly_detectors.circular_binseg import (
CircularBinarySegmentation,
make_anomaly_intervals,
)
from skchange.datasets.generate import teeth

df = teeth(n_segments=3, mean=10, segment_length=20, p=1, random_state=7)
detector = CircularBinarySegmentation(
score="mean", growth_factor=1.5, min_segment_length=10
)
anomalies = detector.fit_predict(df)

df.plot(kind="line", backend="plotly")

px.scatter(detector.scores, x="argmax_anomaly_start", y="score")

# Test anomaly intervals
anomaly_intervals = make_anomaly_intervals(0, 5, 2)

# Profiling
n = int(1e5)
df = teeth(n_segments=1, mean=0, segment_length=n, p=1)
detector = CircularBinarySegmentation("mean", growth_factor=1.5)
profiler = Profiler()
profiler.start()
detector.fit_predict(df)
profiler.stop()
33 changes: 33 additions & 0 deletions interactive/explore_seeded_binseg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import plotly.express as px
from streamchange.utils import Profiler

from skchange.change_detectors.seeded_binseg import SeededBinarySegmentation
from skchange.datasets.generate import teeth

df = teeth(n_segments=2, mean=10, segment_length=20, p=1, random_state=7)
detector = SeededBinarySegmentation(score="mean", growth_factor=2)
detector.fit_predict(df)

df.plot(kind="line", backend="plotly")

px.scatter(detector.scores, x="maximizer", y="score", hover_data=["start", "end"])


# Profiling
n = int(1e6)
df = teeth(n_segments=1, mean=0, segment_length=n, p=1)
detector = SeededBinarySegmentation("mean", growth_factor=1.5, min_segment_length=10)
profiler = Profiler()
profiler.start()
detector.fit_predict(df)
profiler.stop()


# Test tuning
df_train = teeth(n_segments=1, mean=0, segment_length=10000, p=1, random_state=9)
df_test = teeth(n_segments=10, mean=5, segment_length=1000, p=1, random_state=5)
detector = SeededBinarySegmentation(
score="mean", threshold_scale=None, min_segment_length=10
)
detector.fit(df_train)
changepoints = detector.predict(df_test)
37 changes: 20 additions & 17 deletions skchange/anomaly_detectors/capa.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
from numba import njit
from sktime.annotation.base import BaseSeriesAnnotator

from skchange.anomaly_detectors.mvcapa import (
check_capa_input,
dense_capa_penalty,
run_base_capa,
)
from skchange.anomaly_detectors.mvcapa import dense_capa_penalty, run_base_capa
from skchange.anomaly_detectors.utils import format_anomaly_output
from skchange.costs.saving_factory import saving_factory
from skchange.utils.validation.data import check_data
from skchange.utils.validation.parameters import check_larger_than


@njit
Expand Down Expand Up @@ -127,10 +125,10 @@ def __init__(

self.saving_func, self.saving_init_func = saving_factory(self.saving)

if self.min_segment_length < 2:
raise ValueError("min_segment_length must be at least 2.")
if self.max_segment_length < self.min_segment_length:
raise ValueError("max_segment_length must be at least min_segment_length.")
check_larger_than(0, collective_penalty_scale, "collective_penalty_scale")
check_larger_than(0, point_penalty_scale, "point_penalty_scale")
check_larger_than(2, min_segment_length, "min_segment_length")
check_larger_than(min_segment_length, max_segment_length, "max_segment_length")

def _get_penalty_components(self, X: pd.DataFrame) -> Tuple[np.ndarray, float]:
# TODO: Add penalty tuning.
Expand Down Expand Up @@ -174,15 +172,17 @@ def _fit(self, X: pd.DataFrame, Y: Optional[pd.DataFrame] = None):
------------
creates fitted model (attributes ending in "_")
"""
X = check_capa_input(X, self.min_segment_length)
X = check_data(
X,
min_length=self.min_segment_length,
min_length_name="min_segment_length",
)
self.collective_penalty_, self.point_penalty_ = self._get_penalty_components(X)
return self

def _predict(self, X: Union[pd.DataFrame, pd.Series]) -> pd.Series:
"""Create annotations on test/deployment data.
core logic
Parameters
----------
X : pd.DataFrame - data to annotate, time series
Expand All @@ -192,7 +192,11 @@ def _predict(self, X: Union[pd.DataFrame, pd.Series]) -> pd.Series:
Y : pd.Series - annotations for sequence X
exact format depends on annotation type
"""
X = check_capa_input(X, self.min_segment_length)
X = check_data(
X,
min_length=self.min_segment_length,
min_length_name="min_segment_length",
)
opt_savings, self.collective_anomalies, self.point_anomalies = run_capa(
X.values,
self.saving_func,
Expand All @@ -202,15 +206,14 @@ def _predict(self, X: Union[pd.DataFrame, pd.Series]) -> pd.Series:
self.min_segment_length,
self.max_segment_length,
)
self.scores = np.diff(opt_savings, prepend=0.0)
self.scores = pd.Series(opt_savings, index=X.index, name="score")
anomalies = format_anomaly_output(
self.fmt,
self.labels,
X.shape[0],
X.index,
self.collective_anomalies,
self.point_anomalies if not self.ignore_point_anomalies else None,
X.index,
self.scores,
scores=self.scores,
)
return anomalies

Expand Down
Loading

0 comments on commit 623eac9

Please sign in to comment.