Skip to content

Commit

Permalink
Splunkpy cache incidents by window (demisto#32857)
Browse files Browse the repository at this point in the history
* Added solution

* Added Rns

* Updated docker image

* Added docstrings

* Bump pack from version SplunkPy to 3.1.20.

* Added logs and comments

---------

Co-authored-by: Content Bot <[email protected]>
  • Loading branch information
anas-yousef and Content Bot authored Feb 28, 2024
1 parent 0a8742f commit fbb1d57
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 21 deletions.
57 changes: 38 additions & 19 deletions Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,26 +230,44 @@ def extensive_log(message):
demisto.debug(message)


def remove_old_incident_ids(last_run_fetched_ids, current_epoch_time, occurred_look_behind):
"""Remove all the IDs of all the incidents that were found more than twice the look behind time frame,
to stop our IDs dict from becoming too large.
def remove_irrelevant_incident_ids(last_run_fetched_ids: dict[str, dict[str, str]], window_start_time: str,
window_end_time: str) -> dict[str, Any]:
"""Remove all the IDs of the fetched incidents that are no longer in the fetch window, to prevent our
last run object from becoming too large.
Args:
last_run_fetched_ids (list): All the event IDs that weren't out of date in the last run + all the new event IDs
from newly fetched events in this run.
current_epoch_time (int): The current time in epoch.
occurred_look_behind (int): The max look behind time (parameter, as defined by the user).
last_run_fetched_ids (dict[str, tuple]): The IDs incidents that were fetched in previous fetches.
window_start_time (str): The window start time.
window_end_time (str): The window end time.
Returns:
new_last_run_fetched_ids (list): The updated list of IDs, without old IDs.
dict[str, Any]: The updated list of IDs, without irrelevant IDs.
"""
new_last_run_fetched_ids = {}
for inc_id, addition_time in list(last_run_fetched_ids.items()):
max_look_behind_in_seconds = occurred_look_behind * 60
deletion_threshold_in_seconds = max_look_behind_in_seconds * 2
if current_epoch_time - addition_time < deletion_threshold_in_seconds:
new_last_run_fetched_ids[inc_id] = addition_time

new_last_run_fetched_ids: dict[str, dict[str, str]] = {}
window_start_datetime = datetime.strptime(window_start_time, SPLUNK_TIME_FORMAT)
demisto.debug(f'Beginning to filter irrelevant IDs with respect to window {window_start_time} - {window_end_time}')
for incident_id, incident_occurred_time in last_run_fetched_ids.items():
# We divided the handling of the last fetched IDs since we changed the handling of them
# The first implementation caused IDs to be removed from the cache, even though they were still relevant
# The second implementation now only removes the cached IDs that are not relevant to the fetch window
extensive_log(f'[SplunkPy] Checking if {incident_id} is relevant to fetch window')
if isinstance(incident_occurred_time, dict):
# To handle last fetched IDs
# Last fetched IDs hold the occurred time that they were seen, and will be deleted from
# the last fetched IDs once they pass the fetch window
incident_start_datetime = datetime.strptime(incident_occurred_time.get('occurred_time', ''), SPLUNK_TIME_FORMAT)
if incident_start_datetime >= window_start_datetime:
# We keep the incident, since it is still in the fetch window
extensive_log(f'[SplunkPy] Keeping {incident_id} as part of the last fetched IDs. {incident_start_datetime=}')
new_last_run_fetched_ids[incident_id] = incident_occurred_time
else:
extensive_log(f'[SplunkPy] Removing {incident_id} from the last fetched IDs')
else:
# To handle last fetched IDs before version 3_1_20
# Last fetched IDs held the epoch time of their appearance, they will now hold the
# new format, with an occurred time equal to the end of the window
extensive_log(f'[SplunkPy] {incident_id} was saved using old implementation, keeping')
new_last_run_fetched_ids[incident_id] = {'occurred_time': window_end_time}
return new_last_run_fetched_ids


Expand Down Expand Up @@ -349,7 +367,7 @@ def fetch_notables(service: client.Service, mapper: UserMappingObject, comment_t
latest_time = last_run_latest_time or now
kwargs_oneshot = build_fetch_kwargs(params, occured_start_time, latest_time, search_offset)
fetch_query = build_fetch_query(params)
last_run_fetched_ids = last_run_data.get('found_incidents_ids', {})
last_run_fetched_ids: dict[str, Any] = last_run_data.get('found_incidents_ids', {})
if late_indexed_pagination := last_run_data.get('late_indexed_pagination'):
# This is for handling the case when events get indexed late, and inserted in pages
# that we have already went through
Expand Down Expand Up @@ -389,12 +407,13 @@ def fetch_notables(service: client.Service, mapper: UserMappingObject, comment_t
num_of_dropped += 1
extensive_log(f'[SplunkPy] - Dropped incident {incident_id} due to duplication.')

current_epoch_time = int(time.time())
extensive_log(f'[SplunkPy] Size of last_run_fetched_ids before adding new IDs: {len(last_run_fetched_ids)}')
for incident_id in incident_ids_to_add:
last_run_fetched_ids[incident_id] = current_epoch_time
last_run_fetched_ids[incident_id] = {'occurred_time': occured_start_time}
extensive_log(f'[SplunkPy] Size of last_run_fetched_ids after adding new IDs: {len(last_run_fetched_ids)}')
last_run_fetched_ids = remove_old_incident_ids(last_run_fetched_ids, current_epoch_time, occurred_look_behind)

# New way to remove IDs
last_run_fetched_ids = remove_irrelevant_incident_ids(last_run_fetched_ids, occured_start_time, latest_time)
extensive_log('[SplunkPy] Size of last_run_fetched_ids after '
f'removing old IDs: {len(last_run_fetched_ids)}')
extensive_log(f'[SplunkPy] SplunkPy - incidents fetched on last run = {last_run_fetched_ids}')
Expand Down
2 changes: 1 addition & 1 deletion Packs/SplunkPy/Integrations/SplunkPy/SplunkPy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ script:
- contextPath: Splunk.UserMapping.SplunkUser
description: Splunk user mapping.
type: String
dockerimage: demisto/splunksdk-py3:1.0.0.86438
dockerimage: demisto/splunksdk-py3:1.0.0.87498
isfetch: true
ismappable: true
isremotesyncin: true
Expand Down
77 changes: 77 additions & 0 deletions Packs/SplunkPy/Integrations/SplunkPy/SplunkPy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,83 @@ def __init__(self):
assert output == expected_output


class TestFetchRemovingIrrelevantIncidents:

notable1 = {'status': '5', 'event_id': '3'}
notable2 = {'status': '6', 'event_id': '4'}

# In order to mock the service.jobs.oneshot() call in the fetch_notables function, we need to create
# the following two classes
class Jobs:
def __init__(self):
self.oneshot = lambda x, **kwargs: TestFetchForLateIndexedEvents.notable1

class Service:
def __init__(self):
self.jobs = TestFetchForLateIndexedEvents.Jobs()

def test_backwards_compatible(self, mocker: MockerFixture):
"""
Given
- Incident IDs that were fetched in the last fetch round with the epoch time of their occurrence
When
- Fetching notables
Then
- Make sure that the last fetched IDs now hold the start of the fetch window, and not the epoch time
"""
from SplunkPy import UserMappingObject

mocker.patch.object(demisto, 'setLastRun')
mock_last_run = {'time': '2024-02-12T10:00:00', 'latest_time': '2024-02-19T10:00:00',
'found_incidents_ids': {'1': 1700497516}}
mock_params = {'fetchQuery': '`notable` is cool', 'fetch_limit': 2}
mocker.patch('demistomock.getLastRun', return_value=mock_last_run)
mocker.patch('demistomock.params', return_value=mock_params)
mocker.patch('splunklib.results.JSONResultsReader', return_value=[self.notable1,
self.notable2])
service = self.Service()
set_last_run_mocker = mocker.patch('demistomock.setLastRun')
mapper = UserMappingObject(service, False)
splunk.fetch_incidents(service, mapper, 'from_xsoar', 'from_splunk')
last_fetched_ids = set_last_run_mocker.call_args_list[0][0][0]['found_incidents_ids']
assert last_fetched_ids == {'1': {'occurred_time': '2024-02-19T10:00:00'},
'3': {'occurred_time': '2024-02-12T10:00:00'},
'4': {'occurred_time': '2024-02-12T10:00:00'}}

def test_remove_irrelevant_fetched_incident_ids(self, mocker: MockerFixture):
"""
Given
- Incident IDs that were fetched in the last fetch round
When
- Fetching notables
Then
- Make sure that the fetched IDs that are no longer in the fetch window are removed
"""
from SplunkPy import UserMappingObject

mocker.patch.object(demisto, 'setLastRun')
mock_last_run = {'time': '2024-02-12T10:00:00', 'latest_time': '2024-02-19T10:00:00',
'found_incidents_ids': {'1': {'occurred_time': '2024-02-12T09:59:59'},
'2': {'occurred_time': '2024-02-12T10:00:00'}}}
mock_params = {'fetchQuery': '`notable` is cool', 'fetch_limit': 2}
mocker.patch('demistomock.getLastRun', return_value=mock_last_run)
mocker.patch('demistomock.params', return_value=mock_params)
mocker.patch('splunklib.results.JSONResultsReader', return_value=[self.notable1,
self.notable2])
service = self.Service()
set_last_run_mocker = mocker.patch('demistomock.setLastRun')
mapper = UserMappingObject(service, False)
splunk.fetch_incidents(service, mapper, 'from_xsoar', 'from_splunk')
last_fetched_ids = set_last_run_mocker.call_args_list[0][0][0]['found_incidents_ids']
assert last_fetched_ids == {'2': {'occurred_time': '2024-02-12T10:00:00'},
'3': {'occurred_time': '2024-02-12T10:00:00'},
'4': {'occurred_time': '2024-02-12T10:00:00'}}


class TestFetchForLateIndexedEvents:
notable1 = {'status': '5', 'event_id': 'id_1'}
notable2 = {'status': '6', 'event_id': 'id_2'}
Expand Down
7 changes: 7 additions & 0 deletions Packs/SplunkPy/ReleaseNotes/3_1_20.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

#### Integrations

##### SplunkPy

- Fixed an issue where the integration would fetch the same incident twice.
- Updated the Docker image to *demisto/splunksdk-py3:1.0.0.87498*.
2 changes: 1 addition & 1 deletion Packs/SplunkPy/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Splunk",
"description": "Run queries on Splunk servers.",
"support": "xsoar",
"currentVersion": "3.1.19",
"currentVersion": "3.1.20",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down

0 comments on commit fbb1d57

Please sign in to comment.