Skip to content

Commit

Permalink
fix format of running jobs added from resumen hook
Browse files Browse the repository at this point in the history
  • Loading branch information
kalessin committed Aug 21, 2023
1 parent 3d3d20b commit 29d5099
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
7 changes: 5 additions & 2 deletions shub_workflow/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ def workflow_loop(self) -> bool:

def resume_running_job_hook(self, job: JobDict):
key = job["key"]
job_args_override = cast(JobParams, job.get("spider_args", {}).copy())
job_args_override["tags"] = job["tags"]
spider_args = job.get("spider_args", {}).copy()
job_args_override = JobParams({
"tags": job["tags"],
"spider_args": spider_args,
})
self._running_job_keys[key] = job["spider"], job_args_override
_LOG.info(f"added running job {key}")

Expand Down
7 changes: 7 additions & 0 deletions tests/test_crawl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ def test_schedule_spider_with_resume(self, mocked_super_schedule_spider, mocked_
manager._on_start()
self.assertTrue(manager.is_resumed)
self.assertEqual(len(manager._running_job_keys), 1)

for v in manager._running_job_keys.values():
self.assertEqual(set(v[1].keys()), {"spider_args", "tags"})

self.assertEqual(mocked_get_jobs.call_count, len(mocked_get_jobs_side_effect))
mocked_add_job_tags.assert_any_call(tags=["FLOW_ID=3a20", "NAME=test", "OTHER=other"])

Expand Down Expand Up @@ -499,6 +503,9 @@ def set_parameters_gen(self):
self.assertTrue(manager.is_resumed)
self.assertEqual(len(manager._running_job_keys), 1)

for v in manager._running_job_keys.values():
self.assertEqual(set(v[1].keys()), {"spider_args", "tags"})

# first loop: acquire running job.
manager.is_finished = lambda x: None
result = next(manager._run_loops())
Expand Down

0 comments on commit 29d5099

Please sign in to comment.