diff --git a/docs/providers/documentation/pagerduty-provider.mdx b/docs/providers/documentation/pagerduty-provider.mdx index c3037b645..6a246d059 100644 --- a/docs/providers/documentation/pagerduty-provider.mdx +++ b/docs/providers/documentation/pagerduty-provider.mdx @@ -30,6 +30,7 @@ To connect Keep to PagerDuty: - **Routing Key**: Use for event posting via the PagerDuty Events API. - **API Key**: Use for incident creation and management through the PagerDuty Incidents API. +- **Service Id** (Optional): If provided, keep operates within the service's scope. - **OAuth2**: Token management handled automatically by Keep. diff --git a/keep/api/models/alert.py b/keep/api/models/alert.py index dd05673c4..e0e0954ec 100644 --- a/keep/api/models/alert.py +++ b/keep/api/models/alert.py @@ -517,6 +517,7 @@ def from_db_incident(cls, db_incident: "Incident"): assignee=db_incident.assignee, services=db_incident.affected_services or [], rule_fingerprint=db_incident.rule_fingerprint, + fingerprint=db_incident.fingerprint, same_incident_in_the_past_id=db_incident.same_incident_in_the_past_id, merged_into_incident_id=db_incident.merged_into_incident_id, merged_by=db_incident.merged_by, @@ -550,6 +551,7 @@ def to_db_incident(self) -> "Incident": is_predicted=self.is_predicted, is_confirmed=self.is_confirmed, rule_fingerprint=self.rule_fingerprint, + fingerprint=self.fingerprint, same_incident_in_the_past_id=self.same_incident_in_the_past_id, merged_into_incident_id=self.merged_into_incident_id, merged_by=self.merged_by, diff --git a/keep/providers/pagerduty_provider/pagerduty_provider.py b/keep/providers/pagerduty_provider/pagerduty_provider.py index 5e9ea7e2a..1217ee1d7 100644 --- a/keep/providers/pagerduty_provider/pagerduty_provider.py +++ b/keep/providers/pagerduty_provider/pagerduty_provider.py @@ -60,6 +60,24 @@ class PagerdutyProviderAuthConfig: default="", ) + service_id: str | None = dataclasses.field( + metadata={ + "required": False, + "description": "Service Id (if provided, keep will only operate on this service)", + "sensitive": False, + }, + default=None, + ) + oauth_data: dict = dataclasses.field( + metadata={ + "description": "For oauth flow", + "required": False, + "sensitive": True, + "hidden": True, + }, + default="", + ) + class PagerdutyProvider(BaseTopologyProvider, BaseIncidentProvider): """Pull alerts and query incidents from PagerDuty.""" @@ -505,7 +523,14 @@ def setup_incident_webhook( "incident.triggered", "incident.unacknowledged", ], - "filter": {"type": "account_reference"}, + "filter": ( + { + "type": "service_reference", + "id": self.authentication_config.service_id, + } + if self.authentication_config.service_id + else {"type": "account_reference"} + ), }, } if webhook_exists: @@ -563,6 +588,67 @@ def _notify( return self._trigger_incident( service_id, title, alert_body, requester, incident_id ) + incident_alerts = [self._format_alert(alert) for alert in incident_alerts] + incident_dto._alerts = incident_alerts + incidents.append(incident_dto) + return incidents + + @staticmethod + def _get_incident_id(incident_id: str) -> str: + """ + Create a UUID from the incident id. + + Args: + incident_id (str): The original incident id + + Returns: + str: The UUID + """ + md5 = hashlib.md5() + md5.update(incident_id.encode("utf-8")) + return uuid.UUID(md5.hexdigest()) + + @staticmethod + def _format_incident( + event: dict, provider_instance: "BaseProvider" = None + ) -> IncidentDto | list[IncidentDto]: + + event = event["event"]["data"] + + # This will be the same for the same incident + original_incident_id = event.get("id", "ping") + + incident_id = PagerdutyProvider._get_incident_id(original_incident_id) + + status = PagerdutyProvider.INCIDENT_STATUS_MAP.get( + event.get("status", "firing"), IncidentStatus.FIRING + ) + priority_summary = (event.get("priority", {}) or {}).get("summary", "P4") + severity = PagerdutyProvider.INCIDENT_SEVERITIES_MAP.get( + priority_summary, IncidentSeverity.INFO + ) + service = event.pop("service", {}).get("summary", "unknown") + + created_at = event.get("created_at") + if created_at: + created_at = datetime.datetime.fromisoformat(created_at) + else: + created_at = datetime.datetime.now(tz=datetime.timezone.utc) + + return IncidentDto( + id=incident_id, + creation_time=created_at, + user_generated_name=f'PD-{event.get("title", "unknown")}-{original_incident_id}', + status=status, + severity=severity, + alert_sources=["pagerduty"], + alerts_count=event.get("alert_counts", {}).get("all", 0), + services=[service], + is_predicted=False, + is_confirmed=True, + # This is the reference to the incident in PagerDuty + fingerprint=original_incident_id, + ) def _query(self, incident_id: str = None): incidents = self.__get_all_incidents_or_alerts() @@ -678,14 +764,17 @@ def __get_all_incidents_or_alerts(self, incident_id: str = None): url += f"/{incident_id}/alerts" include = ["teams", "services"] resource = "alerts" + params = { + "include[]": include, + "offset": offset, + "limit": 100, + } + if not incident_id and self.authentication_config.service_id: + params["service_ids[]"] = [self.authentication_config.service_id] response = requests.get( url=url, headers=self.__get_headers(), - params={ - "include[]": include, - "offset": offset, - "limit": 100, - }, + params=params, ) response.raise_for_status() response = response.json() @@ -696,7 +785,7 @@ def __get_all_incidents_or_alerts(self, incident_id: str = None): paginated_response.extend(response.get(resource, [])) self.logger.info("Fetched incidents or alerts", extra={"offset": offset}) # No more results - if response.get("more", False) == False: + if not response.get("more", False): self.logger.info("No more incidents or alerts") break self.logger.info(