Skip to content

Commit c4a93da

Browse files
Copilotnolan1999
andauthored
Fix job deletion error handling with specific JobDeletedException (#69)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: nolan1999 <54246789+nolan1999@users.noreply.github.com> Co-authored-by: nolan1999 <arthur.jaques@hispeed.ch>
1 parent ea02ac2 commit c4a93da

File tree

7 files changed

+72
-19
lines changed

7 files changed

+72
-19
lines changed

tests/integration/endpoints/test_jobs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from workerfacing_api.core.filesystem import FileSystem, LocalFilesystem, S3Filesystem
1414
from workerfacing_api.core.queue import RDSJobQueue
1515
from workerfacing_api.crud import job_tracking
16+
from workerfacing_api.exceptions import JobDeletedException
1617
from workerfacing_api.schemas.queue_jobs import (
1718
AppSpecs,
1819
EnvironmentTypes,
@@ -277,7 +278,7 @@ def test_put_job_status_canceled(
277278
client.get(self.endpoint, params={"memory": 1})
278279

279280
def mock_update_job(*args: Any, **kwargs: Any) -> None:
280-
raise ValueError("Job not found")
281+
raise JobDeletedException("Job not found")
281282

282283
monkeypatch.setattr(job_tracking, "update_job", mock_update_job)
283284
res = client.put(f"{self.endpoint}/1/status", params={"status": "running"})

tests/unit/crud/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Empty init file
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""Tests for job_tracking module."""
2+
3+
from unittest.mock import MagicMock, patch
4+
5+
import pytest
6+
7+
from workerfacing_api.crud.job_tracking import update_job
8+
from workerfacing_api.exceptions import JobDeletedException
9+
from workerfacing_api.schemas.rds_models import JobStates
10+
11+
12+
@patch("workerfacing_api.crud.job_tracking.requests.put")
13+
def test_update_job_raises_job_deleted_exception(mock_put: MagicMock) -> None:
14+
"""Test that update_job raises JobDeletedException on 404 response."""
15+
# Mock a 404 response
16+
mock_response = MagicMock()
17+
mock_response.status_code = 404
18+
mock_put.return_value = mock_response
19+
20+
job_id = 789
21+
22+
with pytest.raises(JobDeletedException) as exc_info:
23+
update_job(job_id, JobStates.running)
24+
25+
assert f"Job {job_id} not found; it was probably deleted by the user." in str(
26+
exc_info.value
27+
)

workerfacing_api/core/queue.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from workerfacing_api import settings
2020
from workerfacing_api.crud import job_tracking
21+
from workerfacing_api.exceptions import JobDeletedException, JobNotAssignedException
2122
from workerfacing_api.schemas.queue_jobs import (
2223
EnvironmentTypes,
2324
JobFilter,
@@ -457,7 +458,7 @@ def pop(self, environment: EnvironmentTypes, receipt_handle: str) -> bool:
457458
job.workers = ";".join(job.workers.split(";") + [hostname])
458459
try:
459460
self._update_job_status(session, job, status=JobStates.pulled)
460-
except ValueError:
461+
except JobDeletedException:
461462
# job probably deleted by user
462463
return False
463464
return True
@@ -484,7 +485,7 @@ def get_job(
484485
if hostname:
485486
workers = job.workers.split(";")
486487
if not workers or hostname != workers[-1]:
487-
raise ValueError(
488+
raise JobNotAssignedException(
488489
f"Job with id {job_id} is not assigned to worker {hostname}"
489490
)
490491
return job
@@ -505,11 +506,11 @@ def _update_job_status(
505506
job_id = job.job["meta"]["job_id"]
506507
assert isinstance(job_id, int)
507508
job_tracking.update_job(job_id, status, runtime_details)
508-
except ValueError as e:
509+
except JobDeletedException as e:
509510
# job probably deleted by user
510511
session.delete(job)
511512
session.commit()
512-
raise ValueError(f"Could not update job, probably deleted by user: {e}")
513+
raise e from e
513514

514515
def update_job_status(
515516
self,
@@ -545,19 +546,27 @@ def handle_timeouts(
545546
# TODO: increase priority?
546547
job.num_retries += 1
547548
session.add(job)
548-
self.update_job_status(
549-
job.id,
550-
JobStates.queued,
551-
f"timeout {job.num_retries} (workers tried: {job.workers})",
552-
)
553-
n_retry += 1
549+
try:
550+
self.update_job_status(
551+
job.id,
552+
JobStates.queued,
553+
f"timeout {job.num_retries} (workers tried: {job.workers})",
554+
)
555+
n_retry += 1
556+
except JobDeletedException:
557+
# job probably deleted by user, skip updating status
558+
pass
554559
jobs_failed = jobs_timeout.filter(QueuedJob.num_retries >= max_retries)
555560
for job in jobs_failed:
556-
self.update_job_status(
557-
job.id,
558-
JobStates.error,
559-
"max retries reached",
560-
)
561-
n_failed += 1
561+
try:
562+
self.update_job_status(
563+
job.id,
564+
JobStates.error,
565+
"max retries reached",
566+
)
567+
n_failed += 1
568+
except JobDeletedException:
569+
# job probably deleted by user, skip updating status
570+
pass
562571
session.commit()
563572
return n_retry, n_failed

workerfacing_api/crud/job_tracking.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from fastapi.encoders import jsonable_encoder
33

44
import workerfacing_api.settings as settings
5+
from workerfacing_api.exceptions import JobDeletedException
56
from workerfacing_api.schemas.rds_models import JobStates
67

78

@@ -19,7 +20,7 @@ def update_job(
1920
headers={"x-api-key": settings.internal_api_key_secret},
2021
)
2122
if resp.status_code == 404:
22-
raise ValueError(
23+
raise JobDeletedException(
2324
f"Job {job_id} not found; it was probably deleted by the user."
2425
)
2526
resp.raise_for_status()

workerfacing_api/endpoints/jobs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from workerfacing_api.core.filesystem import FileSystem
1818
from workerfacing_api.core.queue import RDSJobQueue
1919
from workerfacing_api.dependencies import filesystem_dep, queue_dep
20+
from workerfacing_api.exceptions import JobDeletedException, JobNotAssignedException
2021
from workerfacing_api.schemas.files import FileHTTPRequest
2122
from workerfacing_api.schemas.queue_jobs import (
2223
EnvironmentTypes,
@@ -111,7 +112,7 @@ async def put_job_status(
111112
hostname = request.state.current_user.username
112113
try:
113114
queue.update_job_status(job_id, status, runtime_details, hostname=hostname)
114-
except ValueError:
115+
except (JobDeletedException, JobNotAssignedException):
115116
# acts as a "cancel job" signal to worker
116117
raise HTTPException(status_code=httpstatus.HTTP_404_NOT_FOUND)
117118

workerfacing_api/exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"""Custom exceptions for the workerfacing API."""
2+
3+
4+
class JobDeletedException(Exception):
5+
"""Exception raised when a job has been deleted by the user."""
6+
7+
pass
8+
9+
10+
class JobNotAssignedException(Exception):
11+
"""Exception raised when a job is not assigned to the current user."""
12+
13+
pass

0 commit comments

Comments
 (0)