Merge pull request #241 from splunk/feature/risk-observable-matching

Enabling risk/observable matching
splunk · Sep 3, 2024 · 0862bc6 · 0862bc6
2 parents b498bd7 + 6af5d57
commit 0862bc6
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 112 deletions.
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -374,12 +374,6 @@ def execute(self):
                 return
 
             try:
-                # NOTE: (THIS CODE HAS MOVED) we handle skipping entire detections differently than
-                #   we do skipping individual test cases; we skip entire detections by excluding
-                #   them to an entirely separate queue, while we skip individual test cases via the
-                #   BaseTest.skip() method, such as when we are skipping all integration tests (see
-                #   DetectionBuilder.skipIntegrationTests)
-                # TODO: are we skipping by production status elsewhere?
                 detection = self.sync_obj.inputQueue.pop()
                 self.sync_obj.currentTestingQueue[self.get_name()] = detection
             except IndexError:

diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -322,12 +322,13 @@ def nes_fields(self) -> Optional[str]:
     @property
     def providing_technologies(self) -> List[ProvidingTechnology]:
         return ProvidingTechnology.getProvidingTechFromSearch(self.search)
-        
-
+
+    # TODO (#247): Refactor the risk property of detection_abstract
     @computed_field
     @property
     def risk(self) -> list[dict[str, Any]]:
         risk_objects: list[dict[str, str | int]] = []
+        # TODO (#246): "User Name" type should map to a "user" risk object and not "other"
         risk_object_user_types = {'user', 'username', 'email address'}
         risk_object_system_types = {'device', 'endpoint', 'hostname', 'ip address'}
         process_threat_object_types = {'process name', 'process'}

diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
@@ -575,10 +575,11 @@ def get_risk_events(self, force_update: bool = False) -> list[RiskEvent]:
             self.logger.debug(f"Using cached risk events ({len(self._risk_events)} total).")
             return self._risk_events
 
+        # TODO (#248): Refactor risk/notable querying to pin to a single savedsearch ID
         # Search for all risk events from a single scheduled search (indicated by orig_sid)
         query = (
             f'search index=risk search_name="{self.name}" [search index=risk search '
-            f'search_name="{self.name}" | head 1 | fields orig_sid] | tojson'
+            f'search_name="{self.name}" | tail 1 | fields orig_sid] | tojson'
         )
         result_iterator = self._search(query)
 
@@ -643,7 +644,7 @@ def get_notable_events(self, force_update: bool = False) -> list[NotableEvent]:
         # Search for all notable events from a single scheduled search (indicated by orig_sid)
         query = (
             f'search index=notable search_name="{self.name}" [search index=notable search '
-            f'search_name="{self.name}" | head 1 | fields orig_sid] | tojson'
+            f'search_name="{self.name}" | tail 1 | fields orig_sid] | tojson'
         )
         result_iterator = self._search(query)
 
@@ -686,15 +687,17 @@ def validate_risk_events(self) -> None:
             check the risks/notables
         :returns: an IntegrationTestResult on failure; None on success
         """
-        # TODO (PEX-433): Re-enable this check once we have refined the logic and reduced the false
-        #   positive rate in risk/obseravble matching
         # Create a mapping of the relevant observables to counters
-        # observables = CorrelationSearch._get_relevant_observables(self.detection.tags.observable)
-        # observable_counts: dict[str, int] = {str(x): 0 for x in observables}
-        # if len(observables) != len(observable_counts):
-        #     raise ClientError(
-        #         f"At least two observables in '{self.detection.name}' have the same name."
-        #     )
+        observables = CorrelationSearch._get_relevant_observables(self.detection.tags.observable)
+        observable_counts: dict[str, int] = {str(x): 0 for x in observables}
+
+        # NOTE: we intentionally want this to be an error state and not a failure state, as
+        #   ultimately this validation should be handled during the build process
+        if len(observables) != len(observable_counts):
+            raise ClientError(
+                f"At least two observables in '{self.detection.name}' have the same name; "
+                "each observable for a detection should be unique."
+            )
 
         # Get the risk events; note that we use the cached risk events, expecting they were
         # saved by a prior call to risk_event_exists
@@ -710,25 +713,29 @@ def validate_risk_events(self) -> None:
             )
             event.validate_against_detection(self.detection)
 
-            # TODO (PEX-433): Re-enable this check once we have refined the logic and reduced the
-            #   false positive rate in risk/obseravble matching
             # Update observable count based on match
-            # matched_observable = event.get_matched_observable(self.detection.tags.observable)
-            # self.logger.debug(
-            #     f"Matched risk event ({event.risk_object}, {event.risk_object_type}) to observable "
-            #     f"({matched_observable.name}, {matched_observable.type}, {matched_observable.role})"
-            # )
-            # observable_counts[str(matched_observable)] += 1
-
-        # TODO (PEX-433): test my new contentctl logic against an old ESCU build; my logic should
-        #   detect the faulty attacker events -> this was the issue from the 4.28/4.27 release;
-        #   recreate by testing against one of those old builds w/ the bad config
-        # TODO (PEX-433): Re-enable this check once we have refined the logic and reduced the false
-        #   positive
-        #   rate in risk/obseravble matching
-        # TODO (PEX-433): I foresee issues here if for example a parent and child process share a
-        #   name (matched observable could be either) -> these issues are confirmed to exist, e.g.
-        #   `Windows Steal Authentication Certificates Export Certificate`
+            matched_observable = event.get_matched_observable(self.detection.tags.observable)
+            self.logger.debug(
+                f"Matched risk event (object={event.risk_object}, type={event.risk_object_type}) "
+                f"to observable (name={matched_observable.name}, type={matched_observable.type}, "
+                f"role={matched_observable.role}) using the source field "
+                f"'{event.source_field_name}'"
+            )
+            observable_counts[str(matched_observable)] += 1
+
+        # Report any observables which did not have at least one match to a risk event
+        for observable in observables:
+            self.logger.debug(
+                f"Matched observable (name={observable.name}, type={observable.type}, "
+                f"role={observable.role}) to {observable_counts[str(observable)]} risk events."
+            )
+            if observable_counts[str(observable)] == 0:
+                raise ValidationFailed(
+                    f"Observable (name={observable.name}, type={observable.type}, "
+                    f"role={observable.role}) was not matched to any risk events."
+                )
+
+        # TODO (#250): Re-enable and refactor code that validates the specific risk counts
         # Validate risk events in aggregate; we should have an equal amount of risk events for each
         # relevant observable, and the total count should match the total number of events
         # individual_count: Optional[int] = None

diff --git a/contentctl/objects/detection_tags.py b/contentctl/objects/detection_tags.py
@@ -52,6 +52,8 @@ def risk_score(self) -> int:
 
     mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = []
     nist: list[NistCategory] = []
+
+    # TODO (#249): Add pydantic validator to ensure observables are unique within a detection
     observable: List[Observable] = []
     message: str = Field(...)
     product: list[SecurityContentProductName] = Field(..., min_length=1)