|
1 | 1 | import logging |
2 | 2 | logger = logging.getLogger(__name__) |
3 | 3 |
|
4 | | -from django_q.models import Schedule |
5 | | -from publications.models import Publication, HarvestingEvent, Source |
6 | | -from bs4 import BeautifulSoup |
| 4 | +import os |
7 | 5 | import json |
| 6 | +import subprocess |
| 7 | +import gzip |
| 8 | +import re |
| 9 | +import tempfile |
| 10 | +import time |
| 11 | +import calendar |
| 12 | +from datetime import datetime, timedelta |
8 | 13 | import xml.dom.minidom |
9 | | -from django.contrib.gis.geos import GEOSGeometry |
10 | 14 | import requests |
11 | | -from django.core.mail import send_mail, EmailMessage |
12 | | -from django.utils import timezone |
| 15 | +from bs4 import BeautifulSoup |
13 | 16 | from requests.auth import HTTPBasicAuth |
14 | | -import os |
| 17 | +from urllib.parse import quote |
15 | 18 | from django.conf import settings |
16 | | -from django.utils.timezone import now |
| 19 | +from django.core.mail import send_mail, EmailMessage |
| 20 | +from django.core.serializers import serialize |
| 21 | +from django.contrib.gis.geos import GEOSGeometry, GeometryCollection |
| 22 | +from django.utils import timezone |
| 23 | +from publications.models import Publication, HarvestingEvent, Source |
| 24 | +from .models import EmailLog, Subscription |
17 | 25 | from django.contrib.auth import get_user_model |
18 | 26 | User = get_user_model() |
19 | | -from .models import EmailLog, Subscription |
20 | | -from datetime import datetime, timedelta |
21 | 27 | from django.urls import reverse |
22 | 28 | from urllib.parse import quote |
23 | | -from datetime import datetime |
24 | | -from django_q.tasks import schedule |
25 | | -from django.utils import timezone |
26 | 29 | from django_q.tasks import schedule |
27 | 30 | from django_q.models import Schedule |
28 | | -import time |
29 | | -import calendar |
30 | | -import re |
31 | | -from django.contrib.gis.geos import GeometryCollection |
32 | 31 |
|
33 | 32 | BASE_URL = settings.BASE_URL |
| 33 | +DOI_REGEX = re.compile(r'10\.\d{4,9}/[-._;()/:A-Z0-9]+', re.IGNORECASE) |
34 | 34 |
|
35 | 35 | def extract_geometry_from_html(content): |
36 | 36 | for tag in content.find_all("meta"): |
@@ -211,7 +211,7 @@ def harvest_oai_endpoint(source_id, user=None): |
211 | 211 |
|
212 | 212 | def send_monthly_email(trigger_source='manual', sent_by=None): |
213 | 213 | recipients = User.objects.filter(userprofile__notify_new_manuscripts=True).values_list('email', flat=True) |
214 | | - last_month = now().replace(day=1) - timedelta(days=1) |
| 214 | + last_month = timezone.now().replace(day=1) - timedelta(days=1) |
215 | 215 | new_manuscripts = Publication.objects.filter(creationDate__month=last_month.month) |
216 | 216 |
|
217 | 217 | if not recipients.exists() or not new_manuscripts.exists(): |
@@ -319,4 +319,46 @@ def schedule_subscription_email_task(sent_by=None): |
319 | 319 | kwargs={'trigger_source': 'scheduled', 'sent_by': sent_by.id if sent_by else None} |
320 | 320 | ) |
321 | 321 | logger.info(f"Scheduled 'send_subscription_based_email' for {next_run_date}") |
| 322 | + |
| 323 | +def regenerate_geojson_cache(): |
| 324 | + cache_dir = os.path.join(tempfile.gettempdir(), "optimap_cache") |
| 325 | + os.makedirs(cache_dir, exist_ok=True) |
| 326 | + |
| 327 | + json_path = os.path.join(cache_dir, 'geojson_cache.json') |
| 328 | + with open(json_path, 'w') as f: |
| 329 | + serialize( |
| 330 | + 'geojson', |
| 331 | + Publication.objects.filter(status="p"), |
| 332 | + geometry_field='geometry', |
| 333 | + srid=4326, |
| 334 | + stream=f |
| 335 | + ) |
| 336 | + |
| 337 | + gzip_path = json_path + '.gz' |
| 338 | + with open(json_path, 'rb') as fin, gzip.open(gzip_path, 'wb') as fout: |
| 339 | + fout.writelines(fin) |
| 340 | + |
| 341 | + size = os.path.getsize(json_path) |
| 342 | + logger.info("Cached GeoJSON at %s (%d bytes), gzipped at %s", json_path, size, gzip_path) |
| 343 | + return json_path |
| 344 | + |
| 345 | +def convert_geojson_to_geopackage(geojson_path): |
| 346 | + cache_dir = os.path.dirname(geojson_path) |
| 347 | + gpkg_path = os.path.join(cache_dir, 'publications.gpkg') |
| 348 | + try: |
| 349 | + output = subprocess.check_output( |
| 350 | + ["ogr2ogr", "-f", "GPKG", gpkg_path, geojson_path], |
| 351 | + stderr=subprocess.STDOUT, |
| 352 | + text=True, |
| 353 | + ) |
| 354 | + logger.info("ogr2ogr output:\n%s", output) |
| 355 | + return gpkg_path |
| 356 | + except subprocess.CalledProcessError as e: |
| 357 | + return None |
| 358 | + # on success, return the filename so callers can stream it or inspect it |
| 359 | + return gpkg_path |
| 360 | + |
322 | 361 |
|
| 362 | +def regenerate_geopackage_cache(): |
| 363 | + geojson_path = regenerate_geojson_cache() |
| 364 | + return convert_geojson_to_geopackage(geojson_path) |
0 commit comments