Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce (large) CSV import in e2e tests #2912

Merged
merged 20 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions importer_client/python/timesketch_import_client/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,8 @@ def _fix_data_frame(self, data_frame):
data_frame["datetime"] = date.dt.strftime("%Y-%m-%dT%H:%M:%S%z")
except Exception: # pylint: disable=broad-except
logger.error(
"Unable to change datetime, is it badly formed?", exc_info=True
"Unable to change datetime, is it badly formed?",
berggren marked this conversation as resolved.
Show resolved Hide resolved
exc_info=True,
)

# TODO: Support labels in uploads/imports.
Expand Down Expand Up @@ -245,7 +246,6 @@ def _upload_data_buffer(self, end_stream, retry_count=0):
if not self._data_lines:
return None

start_time = time.time()
data = {
"name": self._timeline_name,
"sketch_id": self._sketch.id,
Expand All @@ -260,19 +260,13 @@ def _upload_data_buffer(self, end_stream, retry_count=0):
if self._upload_context:
data["context"] = self._upload_context

logger.debug(
"Data buffer ready for upload, took {0:.2f} seconds to "
"prepare.".format(time.time() - start_time)
)

response = self._sketch.api.session.post(self._resource_url, data=data)

# TODO: Investigate why the sleep is needed, fix the underlying issue
# and get rid of it here.
# To prevent unexpected errors with connection refusal adding a quick
# sleep.
time.sleep(2)

if response.status_code not in definitions.HTTP_STATUS_CODE_20X:
if retry_count >= self.DEFAULT_RETRY_LIMIT:
raise RuntimeError(
Expand All @@ -296,11 +290,6 @@ def _upload_data_buffer(self, end_stream, retry_count=0):
end_stream=end_stream, retry_count=retry_count + 1
)

logger.debug(
"Data buffer nr. {0:d} uploaded, total time: {1:.2f}s".format(
self._chunk, time.time() - start_time
)
)
self._chunk += 1
response_dict = response.json()
object_dict = response_dict.get("objects", [{}])[0]
Expand Down Expand Up @@ -449,7 +438,10 @@ def _upload_binary_file(self, file_path):
logger.warning(
"Error uploading data chunk {0:d}/{1:d}, retry "
"attempt {2:d}/{3:d}".format(
index, chunks, retry_count, self.DEFAULT_RETRY_LIMIT
index,
chunks,
retry_count,
self.DEFAULT_RETRY_LIMIT,
)
)

Expand Down Expand Up @@ -723,8 +715,14 @@ def close(self):
except ValueError:
return

if self._data_lines:
self.flush(end_stream=True)
# TODO(jaegeral): End of 11/2023 from now remove the code below
# if no new bugs occur
# Commenting out because of https://github.com/google/timesketch/issues/2796
# testing it didn't seem to cause any issues as if flush is called with
# end_stream=True it will just return so there is no reason
# to call it here.
# if self._data_lines:
# self.flush(end_stream=True)
jaegeral marked this conversation as resolved.
Show resolved Hide resolved

# Trigger auto analyzer pipeline to kick in.
pipe_resource = "{0:s}/sketches/{1:d}/analyzer/".format(
Expand Down
2 changes: 1 addition & 1 deletion importer_client/python/timesketch_import_client/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
"""Version information for Timesketch Import Client."""

__version__ = "20230721"
__version__ = "20230926"


def get_version():
Expand Down