Skip to content

Commit f627940

Browse files
authored
Merge pull request #107 from nuest/fix-ci
Fix CI
2 parents 03844fc + 4c05b33 commit f627940

16 files changed

+193
-77
lines changed

.github/workflows/django.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ jobs:
1515
max-parallel: 4
1616
matrix:
1717
python-version: [
18-
3.8,
19-
#3.9
18+
3.11,
19+
#3.13
2020
]
2121

2222
services:
@@ -75,7 +75,7 @@ jobs:
7575

7676
- name: Upload screenshots
7777
if: always()
78-
uses: actions/upload-artifact@v2
78+
uses: actions/upload-artifact@v4
7979
with:
8080
name: screenshots for Python ${{ matrix.python-version }}
8181
path: |

README.md

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,15 @@ source .venv/bin/activate
109109
# Confirm Python path
110110
which python
111111

112-
# Install required dependencies
112+
# Instal GDAL and the Python GDAL bindings, see Dockerfile for example on Ubuntu
113+
gdalinfo --version
114+
115+
# Install non-GDAL Python dependencies
113116
pip install -r requirements.txt
114117

115-
# Create a local database container (once only)
116-
docker run --name optimapDB -p 5432:5432 -e POSTGRES_USER=optimap -e POSTGRES_PASSWORD=optimap -e POSTGRES_DB=optimap -d postgis/postgis:15-3.4
118+
# create local DB container (once)
119+
# docker run --name optimapDB -p 5432:5432 -e POSTGRES_USER=optimap -e POSTGRES_PASSWORD=optimap -e POSTGRES_DB=optimap -d postgis/postgis:14-3.3
120+
# get a clean one later: docker rm -f optimapDB
117121

118122
# Start the database container
119123
docker start optimapDB
@@ -190,22 +194,6 @@ Configuration for debugging with VS Code:
190194
"justMyCode": true
191195
}
192196
]
193-
"version": "0.2.0",
194-
"configurations": [
195-
{
196-
"name": "Python: Django Run",
197-
"type": "python",
198-
"request": "launch",
199-
"program": "${workspaceFolder}/manage.py",
200-
"args": ["runserver"],
201-
"env": {
202-
"OPTIMAP_DEBUG": "True",
203-
"OPTIMAP_CACHE": "dummy"
204-
},
205-
"django": true,
206-
"justMyCode": true
207-
}
208-
]
209197
}
210198
```
211199

@@ -306,17 +294,6 @@ A configuration to debug the test code and also print deprecation warnings:
306294
},
307295
"django": true,
308296
"justMyCode": true
309-
"name": "Python: Django Test",
310-
"type": "python",
311-
"request": "launch",
312-
"pythonArgs": ["-Wa"],
313-
"program": "${workspaceFolder}/manage.py",
314-
"args": ["test", "tests"],
315-
"env": {
316-
"OPTIMAP_DEBUG": "True"
317-
},
318-
"django": true,
319-
"justMyCode": true
320297
}
321298
```
322299

optimap/urls.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
from django.contrib import admin
1717
from django.urls import path, re_path, include
1818
from django.contrib.sitemaps import views as sitemaps_views
19-
from django.http import HttpResponse
2019
from publications.sitemaps import PublicationsSitemap, StaticViewSitemap
20+
from publications.views import RobotsView
2121

2222
sitemaps = {
2323
"static": StaticViewSitemap,
@@ -39,7 +39,7 @@
3939
{"sitemaps": sitemaps},
4040
name="django.contrib.sitemaps.views.sitemap",
4141
),
42-
re_path(r'^robots.txt', lambda request: HttpResponse("User-Agent: *\nDisallow:\nSitemap: %s://%s/sitemap.xml" % (request.scheme, request.site.domain), content_type="text/plain"), name="robots_file"),
42+
re_path(r'^robots.txt', RobotsView.as_view(), name="robots_file"),
4343
]
4444

4545
# https://stackoverflow.com/a/18272203/261210
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Generated by Django 5.1.7 on 2025-03-17 14:58
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('publications', '0004_blockeddomain_blockedemail'),
10+
('publications', '0005_sentemaillog_sent_by'),
11+
]
12+
13+
operations = [
14+
]
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Generated by Django 5.1.7 on 2025-03-17 14:59
2+
3+
import django.db.models.deletion
4+
from django.conf import settings
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
('publications', '0006_merge_20250317_1458'),
12+
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
13+
]
14+
15+
operations = [
16+
migrations.AddField(
17+
model_name='blockeddomain',
18+
name='blocked_by',
19+
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='blocked_domains', to=settings.AUTH_USER_MODEL),
20+
),
21+
migrations.AddField(
22+
model_name='blockedemail',
23+
name='blocked_by',
24+
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='blocked_emails', to=settings.AUTH_USER_MODEL),
25+
),
26+
migrations.CreateModel(
27+
name='EmailLog',
28+
fields=[
29+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
30+
('recipient_email', models.EmailField(max_length=254)),
31+
('subject', models.CharField(max_length=255)),
32+
('sent_at', models.DateTimeField(auto_now_add=True)),
33+
('email_content', models.TextField(blank=True, null=True)),
34+
('trigger_source', models.CharField(choices=[('admin', 'Admin Panel'), ('scheduled', 'Scheduled Task'), ('manual', 'Manually Triggered')], default='manual', max_length=50)),
35+
('status', models.CharField(choices=[('d', 'Draft'), ('p', 'Published'), ('t', 'Testing'), ('w', 'Withdrawn'), ('h', 'Harvested')], default='success', max_length=10)),
36+
('error_message', models.TextField(blank=True, null=True)),
37+
('sent_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)),
38+
],
39+
),
40+
migrations.CreateModel(
41+
name='UserProfile',
42+
fields=[
43+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
44+
('notify_new_manuscripts', models.BooleanField(default=False)),
45+
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
46+
],
47+
),
48+
migrations.DeleteModel(
49+
name='SentEmailLog',
50+
),
51+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.1.7 on 2025-03-17 15:35
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('publications', '0007_blockeddomain_blocked_by_blockedemail_blocked_by_and_more'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='publication',
15+
name='doi',
16+
field=models.CharField(blank=True, max_length=1024, unique=True),
17+
),
18+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.1.7 on 2025-03-17 15:37
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('publications', '0008_alter_publication_doi'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='publication',
15+
name='doi',
16+
field=models.CharField(blank=True, max_length=1024, null=True, unique=True),
17+
),
18+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.1.7 on 2025-03-17 23:46
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('publications', '0009_alter_publication_doi'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='publication',
15+
name='title',
16+
field=models.TextField(),
17+
),
18+
]

publications/models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
class Publication(models.Model):
1818
# required fields
19-
doi = models.CharField(max_length=1024, unique=True)
19+
title = models.TextField()
2020
status = models.CharField(max_length=1, choices=STATUS_CHOICES, default="d")
2121
created_by = CurrentUserField( # see useful hint at https://github.com/zsoldosp/django-currentuser/issues/69
2222
verbose_name=("Created by"),
@@ -33,10 +33,10 @@ class Publication(models.Model):
3333
)
3434

3535
# optional fields
36+
doi = models.CharField(max_length=1024, unique=True, blank=True, null=True)
3637
source = models.CharField(max_length=4096, null=True, blank=True) # journal, conference, preprint repo, ..
3738
provenance = models.TextField(null=True, blank=True)
38-
publicationDate = models.DateField(null=True,blank=True)
39-
title = models.TextField(null=True, blank=True)
39+
publicationDate = models.DateField(null=True, blank=True)
4040
abstract = models.TextField(null=True, blank=True)
4141
url = models.URLField(max_length=1024, null=True, blank=True)
4242
geometry = models.GeometryCollectionField(verbose_name='Publication geometry/ies', srid = 4326, null=True, blank=True)# https://docs.openalex.org/api-entities/sources

publications/tasks.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def extract_geometry_from_html(content):
2828
geom = json.loads(data)
2929

3030
geom_data = geom["features"][0]["geometry"]
31-
# preparing geometry data in accordance to geosAPI fields
31+
# preparing geometry data in accordance to geos API fields
3232
type_geom= {'type': 'GeometryCollection'}
3333
geom_content = {"geometries" : [geom_data]}
3434
type_geom.update(geom_content)
@@ -37,10 +37,10 @@ def extract_geometry_from_html(content):
3737
geom_object = GEOSGeometry(geom_data_string) # GeometryCollection object
3838
logging.debug('Found geometry: %s', geom_object)
3939
return geom_object
40-
except :
41-
print("Invalid Geometry")
40+
except Exception as e:
41+
logger.error("Cannot create geometry from string '%s': %s", geom_data_string, e)
4242
except ValueError as e:
43-
print("Not a valid GeoJSON")
43+
logger.error("Error loading JSON from %s: %s", tag.get("name"), e)
4444

4545
def extract_timeperiod_from_html(content):
4646
period = [None, None]
@@ -61,13 +61,20 @@ def parse_oai_xml_and_save_publications(content):
6161
for i in range(articles_count_in_journal):
6262
identifier = collection.getElementsByTagName("dc:identifier")
6363
identifier_value = identifier[i].firstChild.nodeValue
64+
logger.debug("Retrieving %s", identifier_value)
65+
6466
if identifier_value.startswith('http'):
6567

66-
with requests.get(identifier_value) as response:
67-
soup = BeautifulSoup(response.content, 'html.parser')
68+
try:
69+
with requests.get(identifier_value) as response:
70+
soup = BeautifulSoup(response.content, 'html.parser')
6871

69-
geom_object = extract_geometry_from_html(soup)
70-
period_start, period_end = extract_timeperiod_from_html(soup)
72+
geom_object = extract_geometry_from_html(soup)
73+
period_start, period_end = extract_timeperiod_from_html(soup)
74+
except Exception as e:
75+
logger.error("Error retrieving and extracting geometadata from URL %s: %s", identifier_value, e)
76+
logger.error("Continueing with the next article...")
77+
continue
7178

7279
else:
7380
geom_object = None
@@ -100,20 +107,20 @@ def parse_oai_xml_and_save_publications(content):
100107
abstract = abstract_text,
101108
publicationDate = date_value,
102109
url = identifier_value,
103-
journal = journal_value,
110+
source = journal_value,
104111
geometry = geom_object,
105112
timeperiod_startdate = period_start,
106113
timeperiod_enddate = period_end)
107114
publication.save()
108-
logger.info('Saved new publication for %s: %s', identifier_value, publication)
115+
logger.info('Saved new publication for %s: %s', identifier_value, publication.get_absolute_url())
109116

110117
def harvest_oai_endpoint(url):
111118
try:
112119
with requests.Session() as s:
113120
response = s.get(url)
114121
parse_oai_xml_and_save_publications(response.content)
115122
except requests.exceptions.RequestException as e:
116-
print ("The requested URL is invalid or has bad connection.Please change the URL")
123+
logger.error("The requested URL is invalid or has bad connection. Please check the URL: %s", url)
117124

118125
def send_monthly_email(trigger_source='manual', sent_by=None):
119126
recipients = User.objects.filter(userprofile__notify_new_manuscripts=True).values_list('email', flat=True)

0 commit comments

Comments
 (0)