Skip to content

Commit f9e2a05

Browse files
Fixed bug that caused reports to be skipped even when skip_existing is False (#3142)
* Fixed bug that caused reports to be skipped even when skip_existing is False * Updated django and dependencies
1 parent a92a3a4 commit f9e2a05

File tree

3 files changed

+55
-45
lines changed

3 files changed

+55
-45
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ django-storages = { extras = ["s3"], version = "==1.14.6" }
4747
django-opensearch-dsl = "==0.7.0"
4848
django-structlog = {extras = ["celery"], version = "==9.1.1"}
4949
locust = "==2.42.1"
50-
django = "==4.2.26"
5150
defusedxml = "==0.7.1"
5251
django-ninja = "==1.4.3"
5352
urllib3 = "==2.5.0"
@@ -59,6 +58,7 @@ botocore = "==1.39.17"
5958
cron-descriptor = "~=1.4"
6059
certifi = "==2025.7.14"
6160
websocket-client = "<1.8.0"
61+
django = "==4.2.27"
6262

6363
[dev-packages]
6464
invoke = "==2.2.0"

Pipfile.lock

Lines changed: 13 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

concordia/tasks/reports/backfill.py

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,13 @@ def backfill_assets_started_for_site_reports(self, skip_existing: bool = True) -
4747
4848
* By default, rows that already have a non-null ``assets_started`` value
4949
are skipped (``skip_existing=True``), so the task can be re-run to
50-
resume where it left off.
50+
resume where it left off. In this mode, only series that still contain
51+
at least one snapshot with ``assets_started`` set to ``NULL`` are
52+
processed.
5153
* To recompute all rows, for example after changing the formula, call the
52-
task with ``skip_existing=False``.
54+
task with ``skip_existing=False``. In this mode, any series that has at
55+
least one snapshot is processed, even if all snapshots already have
56+
non-null ``assets_started`` values.
5357
5458
Args:
5559
skip_existing: If true, skip rows where ``assets_started`` is already
@@ -148,7 +152,8 @@ def process_series_queryset(
148152
current.save(update_fields=["assets_started"])
149153
changed += 1
150154

151-
# Per-row progress log for monitoring while the one-off task runs.
155+
# Per-row progress log for monitoring while the one-off task
156+
# runs.
152157
structured_logger.info(
153158
"Backfilled assets_started for SiteReport.",
154159
event_code="assets_started_backfill_row",
@@ -191,53 +196,59 @@ def process_series_queryset(
191196
return changed
192197

193198
# Site-wide TOTAL
194-
if SiteReport.objects.filter(
199+
total_base_qs = SiteReport.objects.filter(
195200
report_name=SiteReport.ReportName.TOTAL,
196201
campaign__isnull=True,
197202
topic__isnull=True,
198-
assets_started__isnull=True,
199-
).exists():
200-
total_qs = SiteReport.objects.filter(
201-
report_name=SiteReport.ReportName.TOTAL,
202-
campaign__isnull=True,
203-
topic__isnull=True,
204-
).order_by("created_on", "pk")
203+
)
204+
total_exists_qs = total_base_qs
205+
if skip_existing:
206+
total_exists_qs = total_exists_qs.filter(assets_started__isnull=True)
207+
208+
if total_exists_qs.exists():
209+
total_qs = total_base_qs.order_by("created_on", "pk")
205210
updated_count += process_series_queryset(total_qs, series_label="TOTAL")
206211

207212
# Site-wide RETIRED_TOTAL
208-
if SiteReport.objects.filter(
209-
report_name=SiteReport.ReportName.RETIRED_TOTAL,
210-
assets_started__isnull=True,
211-
).exists():
212-
retired_total_qs = SiteReport.objects.filter(
213-
report_name=SiteReport.ReportName.RETIRED_TOTAL
214-
).order_by("created_on", "pk")
213+
retired_base_qs = SiteReport.objects.filter(
214+
report_name=SiteReport.ReportName.RETIRED_TOTAL
215+
)
216+
retired_exists_qs = retired_base_qs
217+
if skip_existing:
218+
retired_exists_qs = retired_exists_qs.filter(assets_started__isnull=True)
219+
220+
if retired_exists_qs.exists():
221+
retired_total_qs = retired_base_qs.order_by("created_on", "pk")
215222
updated_count += process_series_queryset(
216223
retired_total_qs, series_label="RETIRED_TOTAL"
217224
)
218225

219226
# Per-campaign (includes retired campaigns; their historical reports remain)
220-
campaign_ids = (
221-
SiteReport.objects.filter(campaign__isnull=False, assets_started__isnull=True)
222-
.values_list("campaign_id", flat=True)
223-
.distinct()
224-
)
227+
campaign_base_qs = SiteReport.objects.filter(campaign__isnull=False)
228+
if skip_existing:
229+
campaign_ids_source = campaign_base_qs.filter(assets_started__isnull=True)
230+
else:
231+
campaign_ids_source = campaign_base_qs
232+
233+
campaign_ids = campaign_ids_source.values_list("campaign_id", flat=True).distinct()
225234
for campaign_id in campaign_ids.iterator():
226-
campaign_series = SiteReport.objects.filter(campaign_id=campaign_id).order_by(
235+
campaign_series = campaign_base_qs.filter(campaign_id=campaign_id).order_by(
227236
"created_on", "pk"
228237
)
229238
updated_count += process_series_queryset(
230239
campaign_series, series_label=f"CAMPAIGN:{campaign_id}"
231240
)
232241

233242
# Per-topic
234-
topic_ids = (
235-
SiteReport.objects.filter(topic__isnull=False, assets_started__isnull=True)
236-
.values_list("topic_id", flat=True)
237-
.distinct()
238-
)
243+
topic_base_qs = SiteReport.objects.filter(topic__isnull=False)
244+
if skip_existing:
245+
topic_ids_source = topic_base_qs.filter(assets_started__isnull=True)
246+
else:
247+
topic_ids_source = topic_base_qs
248+
249+
topic_ids = topic_ids_source.values_list("topic_id", flat=True).distinct()
239250
for topic_id in topic_ids.iterator():
240-
topic_series = SiteReport.objects.filter(topic_id=topic_id).order_by(
251+
topic_series = topic_base_qs.filter(topic_id=topic_id).order_by(
241252
"created_on", "pk"
242253
)
243254
updated_count += process_series_queryset(

0 commit comments

Comments
 (0)