Skip to content
2 changes: 2 additions & 0 deletions optimap/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@
EMAIL_USE_SSL = env('OPTIMAP_EMAIL_USE_SSL', default=False)
BASE_URL = env("OPTIMAP_BASE_URL", default="http://localhost:8000")
EMAIL_IMAP_SENT_FOLDER = env('OPTIMAP_EMAIL_IMAP_SENT_FOLDER', default='')
OAI_USERNAME = env("OPTIMAP_OAI_USERNAME", default="")
OAI_PASSWORD = env("OPTIMAP_OAI_PASSWORD", default="")
EMAIL_SEND_DELAY = 2

MIDDLEWARE = [
Expand Down
49 changes: 48 additions & 1 deletion publications/admin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from django.contrib import admin, messages
from leaflet.admin import LeafletGeoAdmin
from publications.models import Publication, BlockedEmail, BlockedDomain
from publications.models import Publication, Source, HarvestingEvent, BlockedEmail, BlockedDomain
from import_export.admin import ImportExportModelAdmin
from publications.tasks import harvest_oai_endpoint
from django_q.models import Schedule
from django.utils.timezone import now
from django_q.tasks import schedule
from publications.models import EmailLog, UserProfile
from publications.tasks import send_monthly_email, schedule_monthly_email_task
from django.contrib.auth import get_user_model
Expand All @@ -15,6 +19,35 @@ def make_public(modeladmin, request, queryset):
def make_draft(modeladmin, request, queryset):
queryset.update(status="d")

@admin.action(description="Trigger harvesting for selected sources")
def trigger_harvesting_for_specific(modeladmin, request, queryset):
for source in queryset:
harvest_oai_endpoint(source.id)

@admin.action(description="Trigger harvesting for all sources")
def trigger_harvesting_for_all(modeladmin, request, queryset):
all_sources = Source.objects.all()
for source in all_sources:
harvest_oai_endpoint(source.id)

@admin.action(description="Schedule harvesting for selected sources")
def schedule_harvesting(modeladmin, request, queryset):
"""Admin action to manually schedule harvesting via Django-Q."""
for source in queryset:
existing_schedule = Schedule.objects.filter(name=f"Manual Harvest Source {source.id}")
if existing_schedule.exists():
modeladmin.message_user(request, f"Harvesting is already scheduled for Source {source.id}. Skipping.")
continue # Skip if already scheduled

Schedule.objects.create(
func='publications.tasks.harvest_oai_endpoint',
args=str(source.id),
schedule_type=Schedule.ONCE,
next_run=now(),
name=f"Manual Harvest Source {source.id}",
)
modeladmin.message_user(request, f"Harvesting scheduled for {queryset.count()} sources!")

@admin.action(description="Send Monthly Manuscript Email")
def trigger_monthly_email(modeladmin, request, queryset):
"""
Expand Down Expand Up @@ -61,6 +94,20 @@ class PublicationAdmin(LeafletGeoAdmin, ImportExportModelAdmin):

actions = [make_public,make_draft]

@admin.register(Source)
class SourceAdmin(admin.ModelAdmin):
list_display = ("id", "url_field", "harvest_interval_minutes", "last_harvest")
list_filter = ("harvest_interval_minutes",)
search_fields = ("url_field",)
actions = [trigger_harvesting_for_specific,trigger_harvesting_for_all, schedule_harvesting]

@admin.register(HarvestingEvent)
class HarvestingEventAdmin(admin.ModelAdmin):
list_display = ("id", "source", "status", "started_at", "completed_at")
list_filter = ("status", "started_at", "completed_at")
search_fields = ("source__url",)


class EmailLogAdmin(admin.ModelAdmin):
list_display = (
"recipient_email",
Expand Down
59 changes: 36 additions & 23 deletions publications/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.1.7 on 2025-03-19 14:42
# Generated by Django 5.1.7 on 2025-03-19 19:34

import django.contrib.auth.models
import django.contrib.auth.validators
Expand Down Expand Up @@ -97,6 +97,38 @@ class Migration(migrations.Migration):
('sent_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='Source',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('creationDate', models.DateTimeField(auto_now_add=True)),
('lastUpdate', models.DateTimeField(auto_now=True)),
('url_field', models.URLField(max_length=999)),
('harvest_interval_minutes', models.IntegerField(default=4320)),
('last_harvest', models.DateTimeField(auto_now_add=True, null=True)),
('created_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Created by')),
('updated_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, on_update=True, related_name='%(app_label)s_%(class)s_updater', to=settings.AUTH_USER_MODEL, verbose_name='Updated by')),
],
),
migrations.CreateModel(
name='HarvestingEvent',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('started_at', models.DateTimeField(auto_now_add=True)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('status', models.CharField(choices=[('pending', 'Pending'), ('in_progress', 'In Progress'), ('completed', 'Completed'), ('failed', 'Failed')], default='pending', max_length=16)),
('user', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)),
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='harvesting_events', to='publications.source')),
],
),
migrations.CreateModel(
name='UserProfile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('notify_new_manuscripts', models.BooleanField(default=False)),
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='Publication',
fields=[
Expand All @@ -110,36 +142,17 @@ class Migration(migrations.Migration):
('provenance', models.TextField(blank=True, null=True)),
('publicationDate', models.DateField(blank=True, null=True)),
('abstract', models.TextField(blank=True, null=True)),
('url', models.URLField(blank=True, max_length=1024, null=True)),
('url', models.URLField(blank=True, max_length=1024, null=True, unique=True)),
('geometry', django.contrib.gis.db.models.fields.GeometryCollectionField(blank=True, null=True, srid=4326, verbose_name='Publication geometry/ies')),
('timeperiod_startdate', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=1024, null=True), blank=True, null=True, size=None)),
('timeperiod_enddate', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=1024, null=True), blank=True, null=True, size=None)),
('created_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Created by')),
('job', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='publications', to='publications.harvestingevent')),
('updated_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, on_update=True, related_name='%(app_label)s_%(class)s_updater', to=settings.AUTH_USER_MODEL, verbose_name='Updated by')),
],
options={
'ordering': ['-id'],
'constraints': [models.UniqueConstraint(fields=('doi', 'url'), name='unique_publication_entry')],
},
),
migrations.CreateModel(
name='Source',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('creationDate', models.DateTimeField(auto_now_add=True)),
('lastUpdate', models.DateTimeField(auto_now=True)),
('url_field', models.URLField(max_length=999)),
('harvest_interval_minutes', models.IntegerField(default=4320)),
('last_harvest', models.DateTimeField(auto_now_add=True, null=True)),
('created_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Created by')),
('updated_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, on_update=True, related_name='%(app_label)s_%(class)s_updater', to=settings.AUTH_USER_MODEL, verbose_name='Updated by')),
],
),
migrations.CreateModel(
name='UserProfile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('notify_new_manuscripts', models.BooleanField(default=False)),
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
]
56 changes: 52 additions & 4 deletions publications/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib.gis.db import models
from django.contrib.postgres.fields import ArrayField
from django_currentuser.db.models import CurrentUserField
from django_q.models import Schedule
from django.utils.timezone import now
from django.contrib.auth.models import AbstractUser, Group, Permission
import uuid
Expand Down Expand Up @@ -38,7 +39,7 @@ def restore(self):
logger.info(f"User {self.username} (ID: {self.id}) was restored.")

class Publication(models.Model):
# required fields
# required fields
title = models.TextField()
status = models.CharField(max_length=1, choices=STATUS_CHOICES, default="d")
created_by = CurrentUserField( # see useful hint at https://github.com/zsoldosp/django-currentuser/issues/69
Expand All @@ -61,21 +62,35 @@ class Publication(models.Model):
provenance = models.TextField(null=True, blank=True)
publicationDate = models.DateField(null=True, blank=True)
abstract = models.TextField(null=True, blank=True)
url = models.URLField(max_length=1024, null=True, blank=True)
url = models.URLField(max_length=1024, null=True, blank=True, unique=True)
geometry = models.GeometryCollectionField(verbose_name='Publication geometry/ies', srid = 4326, null=True, blank=True)# https://docs.openalex.org/api-entities/sources
timeperiod_startdate = ArrayField(models.CharField(max_length=1024, null=True), null=True, blank=True)
timeperiod_enddate = ArrayField(models.CharField(max_length=1024, null=True), null=True, blank=True)

# Linking to HarvestingEvent as "job"
job = models.ForeignKey(
'HarvestingEvent',
on_delete=models.CASCADE,
related_name='publications',
null=True,
blank=True
)


def get_absolute_url(self):
return "/api/v1/publications/%i.json" % self.id
# http://localhost:8000/api/v1/publications/5.json

class Meta:
ordering = ['-id']
constraints = [
models.UniqueConstraint(fields=['doi', 'url'], name='unique_publication_entry')
]


def __str__(self):
"""Return string representation."""
return self.doi
return self.title

class Source(models.Model):
# automatic fields
Expand All @@ -94,7 +109,20 @@ class Source(models.Model):
url_field = models.URLField(max_length = 999)
harvest_interval_minutes = models.IntegerField(default=60*24*3)
last_harvest = models.DateTimeField(auto_now_add=True,null=True)


def save(self, *args, **kwargs):
super().save(*args, **kwargs)
Schedule.objects.filter(name=f"Harvest Source {self.id}").delete() # Avoid duplicates
Schedule.objects.create(
func='publications.tasks.harvest_oai_endpoint',
args=str(self.id),
schedule_type=Schedule.MINUTES,
minutes=self.harvest_interval_minutes,
name=f"Harvest Source {self.id}",
)



class Subscription(models.Model):
name = models.CharField(max_length=4096)
search_term = models.CharField(max_length=4096,null=True)
Expand Down Expand Up @@ -169,6 +197,26 @@ class Meta:
model = Publication
fields = ('created_by','updated_by',)

class HarvestingEvent(models.Model):
source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='harvesting_events')
user = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
started_at = models.DateTimeField(auto_now_add=True)
completed_at = models.DateTimeField(null=True, blank=True)
status = models.CharField(
max_length=16,
choices=(
('pending', 'Pending'),
('in_progress', 'In Progress'),
('completed', 'Completed'),
('failed', 'Failed'),
),
default='pending'
)

def __str__(self):
return f"Harvesting Event ({self.status}) for {self.source.url_field} at {self.started_at}"


class UserProfile(models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE)
notify_new_manuscripts = models.BooleanField(default=False)
Expand Down
Loading
Loading