Skip to content

Commit 554d07e

Browse files
committed
Add health check endpoint for flight totals refresh task
- Celery task sets cache timestamp on completion - Health endpoint checks cache instead of querying DB - Returns 503 if task hasn't run in 20+ minutes
1 parent 8339637 commit 554d07e

File tree

4 files changed

+139
-0
lines changed

4 files changed

+139
-0
lines changed

adserver/tasks.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.conf import settings
88
from django.contrib.sites.shortcuts import get_current_site
99
from django.core import mail
10+
from django.core.cache import cache
1011
from django.db.models import Count
1112
from django.db.models import F
1213
from django.db.models import FloatField
@@ -963,6 +964,13 @@ def refresh_flight_denormalized_totals():
963964
duration,
964965
)
965966

967+
# Update cache with last successful run timestamp
968+
cache.set(
969+
"flight_totals_last_refresh",
970+
timezone.now().isoformat(),
971+
timeout=None, # Never expire
972+
)
973+
966974
# Alert if there are significant failures
967975
if error_count > 0 and error_count / max(total_flights, 1) > 0.1:
968976
slack_message(
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""Tests for health check endpoints."""
2+
3+
from datetime import timedelta
4+
5+
from django.core.cache import cache
6+
from django.test import TestCase
7+
from django.urls import reverse
8+
from django.utils import timezone
9+
10+
11+
class FlightTotalsHealthTest(TestCase):
12+
"""Tests for the flight totals health check endpoint."""
13+
14+
def setUp(self):
15+
"""Clear cache before each test."""
16+
cache.clear()
17+
18+
def test_health_check_no_cache(self):
19+
"""Test health check returns 503 when task has never run."""
20+
response = self.client.get(reverse("health-flight-totals"))
21+
self.assertEqual(response.status_code, 503)
22+
data = response.json()
23+
self.assertEqual(data["status"], "error")
24+
self.assertIn("never run", data["message"])
25+
26+
def test_health_check_recent_run(self):
27+
"""Test health check returns 200 when task ran recently."""
28+
# Set cache to indicate task ran 5 minutes ago
29+
cache.set("flight_totals_last_refresh", timezone.now().isoformat())
30+
31+
response = self.client.get(reverse("health-flight-totals"))
32+
self.assertEqual(response.status_code, 200)
33+
data = response.json()
34+
self.assertEqual(data["status"], "ok")
35+
self.assertLessEqual(data["minutes_since_refresh"], 1)
36+
37+
def test_health_check_stale_run(self):
38+
"""Test health check returns 503 when task hasn't run in a while."""
39+
# Set cache to indicate task ran 30 minutes ago (stale)
40+
stale_time = timezone.now() - timedelta(minutes=30)
41+
cache.set("flight_totals_last_refresh", stale_time.isoformat())
42+
43+
response = self.client.get(reverse("health-flight-totals"))
44+
self.assertEqual(response.status_code, 503)
45+
data = response.json()
46+
self.assertEqual(data["status"], "error")
47+
self.assertGreater(data["minutes_since_refresh"], 20)
48+
49+
def test_health_check_invalid_timestamp(self):
50+
"""Test health check returns 503 when cache contains invalid data."""
51+
cache.set("flight_totals_last_refresh", "invalid-timestamp")
52+
53+
response = self.client.get(reverse("health-flight-totals"))
54+
self.assertEqual(response.status_code, 503)
55+
data = response.json()
56+
self.assertEqual(data["status"], "error")
57+
self.assertIn("Invalid timestamp", data["message"])

adserver/urls.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
from .views import dashboard
6868
from .views import do_not_track
6969
from .views import do_not_track_policy
70+
from .views import flight_totals_health
7071

7172

7273
urlpatterns = [
@@ -82,6 +83,12 @@
8283
# Do not Track
8384
path(r".well-known/dnt/", do_not_track, name="dnt-status"),
8485
path(r".well-known/dnt-policy.txt", do_not_track_policy, name="dnt-policy"),
86+
# Health checks
87+
path(
88+
r"health/flight-totals/",
89+
flight_totals_health,
90+
name="health-flight-totals",
91+
),
8592
# Ad API
8693
path(r"api/v1/", include("adserver.api.urls")),
8794
# Staff interface

adserver/views.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,73 @@ def do_not_track(request):
159159
return JsonResponse(data, content_type="application/tracking-status+json")
160160

161161

162+
def flight_totals_health(request):
163+
"""
164+
Health check endpoint for Flight denormalized totals refresh task.
165+
166+
Returns JSON with task status and HTTP 200 if the task has run recently,
167+
or HTTP 503 if the task hasn't run within the expected interval.
168+
169+
This monitors the periodic Celery task that refreshes denormalized totals
170+
(total_views, total_clicks) on the Flight model.
171+
"""
172+
from datetime import timedelta
173+
174+
from django.core.cache import cache
175+
from django.utils.dateparse import parse_datetime
176+
177+
# Get the last time the task successfully ran from cache
178+
last_refresh = cache.get("flight_totals_last_refresh")
179+
180+
if not last_refresh:
181+
return JsonResponse(
182+
{
183+
"status": "error",
184+
"message": "Task has never run or cache was cleared",
185+
},
186+
status=503,
187+
)
188+
189+
# Parse the ISO timestamp
190+
last_refresh_time = parse_datetime(last_refresh)
191+
if not last_refresh_time:
192+
return JsonResponse(
193+
{
194+
"status": "error",
195+
"message": "Invalid timestamp in cache",
196+
},
197+
status=503,
198+
)
199+
200+
now = timezone.now() if settings.USE_TZ else datetime.now()
201+
202+
# Calculate how long ago the task ran
203+
time_since_refresh = now - last_refresh_time
204+
205+
# Task runs every 10 minutes, so alert if it hasn't run in 20 minutes
206+
max_staleness = timedelta(minutes=20)
207+
208+
if time_since_refresh > max_staleness:
209+
return JsonResponse(
210+
{
211+
"status": "error",
212+
"last_refresh": last_refresh,
213+
"minutes_since_refresh": int(time_since_refresh.total_seconds() / 60),
214+
"max_minutes": int(max_staleness.total_seconds() / 60),
215+
},
216+
status=503,
217+
)
218+
219+
return JsonResponse(
220+
{
221+
"status": "ok",
222+
"last_refresh": last_refresh,
223+
"minutes_since_refresh": int(time_since_refresh.total_seconds() / 60),
224+
},
225+
status=200,
226+
)
227+
228+
162229
def do_not_track_policy(request):
163230
"""
164231
Returns the Do Not Track policy.

0 commit comments

Comments
 (0)