Skip to content

Commit

Permalink
Merge branch 'master' into fix-feature-extraction-data
Browse files Browse the repository at this point in the history
  • Loading branch information
jkppr authored Nov 13, 2023
2 parents beaa7d9 + d492c87 commit 9a78e6f
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 26 deletions.
10 changes: 5 additions & 5 deletions docs/developers/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ Switch to:
And execute the single test

```shell
! nosetests timesketch/lib/emojis_test.py -v
! python3 -m pytest timesketch/lib/emojis_test.py -v
```

Or all in one:

```bash
$ sudo docker exec -it $CONTAINER_ID nosetests /usr/local/src/timesketch/timesketch/lib/emojis_test.py -v
$ sudo docker exec -it $CONTAINER_ID python3 -m pytest /usr/local/src/timesketch/timesketch/lib/emojis_test.py -v
```

## Writing unittests
Expand All @@ -71,7 +71,7 @@ breakpoint()
And then within the docker container execute

```shell
! nosetests /usr/local/src/timesketchtimesketch/lib/emojis_test.py -s -pdb
! python3 -m pytest /usr/local/src/timesketch/timesketch/lib/emojis_test.py -s -pdb
```

## end2end tests
Expand Down Expand Up @@ -104,8 +104,8 @@ The following example is for changing / adding tests to `client_test.py`
```shell
$ export CONTAINER_ID="$(sudo -E docker container list -f name=e2e_timesketch -q)"
$ docker exec -it $CONTAINER_ID /bin/bash
! rm /usr/local/lib/python3.8/dist-packages/end_to_end_tests/client_test.py
! ln -s /usr/local/src/timesketch/end_to_end_tests/client_test.py /usr/local/lib/python3.8/dist-packages/end_to_end_tests/client_test.py
! rm /usr/local/lib/python3.10/dist-packages/end_to_end_tests/client_test.py
! ln -s /usr/local/src/timesketch/end_to_end_tests/client_test.py /usr/local/lib/python3.10/dist-packages/end_to_end_tests/client_test.py
```

From now on you can edit the `client_test.py` file outside of the docker instance and run it again with
Expand Down
39 changes: 32 additions & 7 deletions end_to_end_tests/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,35 @@ def __init__(self):
self._counter = collections.Counter()
self._imported_files = []

def import_timeline(self, filename, index_name=None):
def import_timeline(self, filename, index_name=None, sketch=None):
"""Import a Plaso, CSV or JSONL file.
Args:
filename (str): Filename of the file to be imported.
index_name (str): The OpenSearch index to store the documents in.
sketch (Sketch): Optional sketch object to add the timeline to.
if no sketch is provided, the default sketch is used.
Raises:
TimeoutError if import takes too long.
"""
if not sketch:
sketch = self.sketch
if filename in self._imported_files:
return
file_path = os.path.join(TEST_DATA_DIR, filename)
if not index_name:
index_name = uuid.uuid4().hex

with importer.ImportStreamer() as streamer:
streamer.set_sketch(self.sketch)
streamer.set_sketch(sketch)
streamer.set_timeline_name(file_path)
streamer.set_index_name(index_name)
streamer.set_provider("e2e test interface")
streamer.add_file(file_path)
timeline = streamer.timeline
if not timeline:
print("Error creating timeline, please try again.")

# Poll the timeline status and wait for the timeline to be ready
max_time_seconds = 600 # Timeout after 10min
Expand All @@ -91,8 +99,18 @@ def import_timeline(self, filename, index_name=None):
while True:
if retry_count >= max_retries:
raise TimeoutError
_ = timeline.lazyload_data(refresh_cache=True)
status = timeline.status

try:
if not timeline:
print("Error no timeline yet, trying to get the new one")
timeline = streamer.timeline
_ = timeline.lazyload_data(refresh_cache=True)
status = timeline.status
except AttributeError:
# The timeline is not ready yet, so we need to wait
retry_count += 1
time.sleep(sleep_time_seconds)
continue

if not timeline.index:
retry_count += 1
Expand All @@ -101,7 +119,9 @@ def import_timeline(self, filename, index_name=None):

if status == "fail" or timeline.index.status == "fail":
if retry_count > 3:
raise RuntimeError("Unable to import timeline.")
raise RuntimeError(
f"Unable to import timeline {timeline.index.id}."
)

if status == "ready" and timeline.index.status == "ready":
break
Expand Down Expand Up @@ -133,7 +153,8 @@ def import_directly_to_opensearch(self, filename, index_name):
raise ValueError("File [{0:s}] does not exist.".format(file_path))

es = opensearchpy.OpenSearch(
[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}], http_compress=True
[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
http_compress=True,
)

df = pd.read_csv(file_path, on_bad_lines="warn")
Expand All @@ -143,7 +164,11 @@ def import_directly_to_opensearch(self, filename, index_name):
def _pandas_to_opensearch(data_frame):
for _, row in data_frame.iterrows():
row.dropna(inplace=True)
yield {"_index": index_name, "_type": "_doc", "_source": row.to_dict()}
yield {
"_index": index_name,
"_type": "_doc",
"_source": row.to_dict(),
}

if os.path.isfile(OPENSEARCH_MAPPINGS_FILE):
mappings = {}
Expand Down
203 changes: 203 additions & 0 deletions end_to_end_tests/upload_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""End to end tests of Timesketch upload functionality."""
import os
import random

from timesketch_api_client import search
from . import interface
from . import manager

Expand All @@ -27,5 +30,205 @@ def test_invalid_index_name(self):
with self.assertions.assertRaises(RuntimeError):
self.import_timeline("evtx.plaso", index_name="/invalid/index/name")

def test_normal_upload_json(self):
"""Test the upload of a json file with a few events."""
# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test_normal_upload_json {rand}")
self.sketch = sketch

file_path = (
"/usr/local/src/timesketch/end_to_end_tests/test_data/sigma_events.jsonl"
)
self.import_timeline(file_path, index_name=rand, sketch=sketch)
timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

events = sketch.explore("*", as_pandas=True)
self.assertions.assertEqual(len(events), 4)

def test_large_upload_jsonl(self):
"""Test uploading a timeline with a lot of events as jsonl. The test
will create a temporary file with a large number of events and then
upload the file to Timesketch. The test will then check that the
number of events in the timeline is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test_large_upload_json {rand}")
self.sketch = sketch

file_path = "/tmp/large.jsonl"

with open(file_path, "w", encoding="utf-8") as file_object:
for i in range(4123):
string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjson"}}\n' # pylint: disable=line-too-long
file_object.write(string)

self.import_timeline("/tmp/large.jsonl", index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:foobarjson"
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 4123)

# check that the number of events is correct with a different method
events = sketch.explore("data_type:foobarjson", as_pandas=True)
self.assertions.assertEqual(len(events), 4123)

def test_very_large_upload_jsonl(self):
"""Test uploading a timeline with over 50 k events as jsonl. The test
will create a temporary file and then
upload the file to Timesketch. The test will check that the
number of events in the timeline is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test__very_large_upload_json {rand}")
self.sketch = sketch

file_path = "/tmp/verylarge.jsonl"

with open(file_path, "w", encoding="utf-8") as file_object:
for i in range(74251):
string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjsonverlarge"}}\n' # pylint: disable=line-too-long
file_object.write(string)

self.import_timeline(file_path, index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:foobarjsonverlarge"
search_obj.commit()

# normal max query limit
self.assertions.assertEqual(len(search_obj.table), 10000)
self.assertions.assertEqual(search_obj.expected_size, 74251)

# increase max entries returned:
search_obj.max_entries = 100000
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 74251)

# check that the number of events is correct with a different method
events = sketch.explore(
"data_type:foobarjsonverlarge", as_pandas=True, max_entries=100000
)
self.assertions.assertEqual(len(events), 74251)

def test_large_upload_csv(self):
"""Test uploading a timeline with an a lot of events.
The test will create a temporary file with a large number of events
and then upload the file to Timesketch.
The test will then check that the number of events in the timeline
is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=rand)
self.sketch = sketch

file_path = "/tmp/large.csv"

with open(file_path, "w", encoding="utf-8") as file_object:
file_object.write(
'"message","timestamp","datetime","timestamp_desc","data_type"\n'
)

for i in range(3251):
# write a line with random values for message
string = (
f'"CSV Count: {i} {rand}","123456789",'
'"2015-07-24T19:01:01+00:00","Write time","foobarcsv"\n'
)
file_object.write(string)

self.import_timeline("/tmp/large.csv", index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:foobarcsv"
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 3251)

# check that the number of events is correct with a different method
events = sketch.explore("data_type:foobarcsv", as_pandas=True)
self.assertions.assertEqual(len(events), 3251)

def test_large_upload_csv_over_flush_limit(self):
"""Test uploading a timeline with an a lot of events > 50 k.
The test will create a temporary file with a large number of events
and then upload the file to Timesketch.
The test will then check that the number of events in the timeline
is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=rand)
self.sketch = sketch

file_path = "/tmp/verylarge.csv"

with open(file_path, "w", encoding="utf-8") as file_object:
file_object.write(
'"message","timestamp","datetime","timestamp_desc","data_type"\n'
)

for i in range(73251):
# write a line with random values for message
string = (
f'"CSV Count: {i} {rand}","123456789",'
'"2015-07-24T19:01:01+00:00","Write time","73kcsv"\n'
)
file_object.write(string)

self.import_timeline("/tmp/verylarge.csv", index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:73kcsv"
search_obj.commit()

# normal max query limit
self.assertions.assertEqual(len(search_obj.table), 10000)
self.assertions.assertEqual(search_obj.expected_size, 73251)

# increase max entries returned:
search_obj.max_entries = 100000
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 73251)

# check that the number of events is correct with a different method
events = sketch.explore("data_type:73kcsv", as_pandas=True, max_entries=100000)
self.assertions.assertEqual(len(events), 73251)


manager.EndToEndTestManager.register_test(UploadTest)
Loading

0 comments on commit 9a78e6f

Please sign in to comment.