Skip to content

Commit af54bde

Browse files
Merge pull request #104 from pitangainnovare/impl/optimize-index
Impl/optimize index
2 parents ae307b2 + 5fd7686 commit af54bde

36 files changed

+1365
-1197
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.12.1
1+
1.13.0

article/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from django.core.management.base import BaseCommand
2+
3+
from article.tasks import task_load_article_from_opac, task_load_article_from_article_meta
4+
5+
6+
class Command(BaseCommand):
7+
help = 'Generate task requests for loading article data from Article Meta for each year from 1900 to 2025'
8+
9+
def add_arguments(self, parser):
10+
parser.add_argument(
11+
'--start-year',
12+
type=int,
13+
default=1990,
14+
help='Start year (default: 1990)'
15+
)
16+
parser.add_argument(
17+
'--end-year',
18+
type=int,
19+
default=2025,
20+
help='End year (default: 2025)'
21+
)
22+
parser.add_argument(
23+
'--collection',
24+
type=str,
25+
default='scl',
26+
help='Collection code (default: scl)'
27+
)
28+
parser.add_argument(
29+
'--task',
30+
choices=['load_article_from_opac', 'load_article_from_article_meta'],
31+
default='load_article_from_opac',
32+
help='Task to execute (default: load_article_from_opac)',
33+
)
34+
35+
def handle(self, *args, **options):
36+
start_year = options['start_year']
37+
end_year = options['end_year']
38+
collection = options['collection']
39+
40+
self.stdout.write(
41+
self.style.SUCCESS(
42+
f'Generating task requests from {start_year} to {end_year} for collection: {collection}'
43+
)
44+
)
45+
46+
total_tasks = 0
47+
48+
for year in range(start_year, end_year + 1):
49+
from_date = f'{year}-01-01'
50+
until_date = f'{year}-12-31'
51+
52+
self.stdout.write(f'Queuing task for year {year}...')
53+
54+
# Queue the task for each year
55+
if options['task'] == 'load_article_from_article_meta':
56+
task_result = task_load_article_from_article_meta.delay(
57+
from_date=from_date,
58+
until_date=until_date,
59+
collection=collection
60+
)
61+
else:
62+
task_result = task_load_article_from_opac.delay(
63+
from_date=from_date,
64+
until_date=until_date,
65+
collection=collection
66+
)
67+
68+
total_tasks += 1
69+
70+
self.stdout.write(
71+
self.style.SUCCESS(
72+
f'✓ Task queued for year {year}: {from_date} to {until_date} (Task ID: {task_result.id})'
73+
)
74+
)
75+
76+
self.stdout.write(
77+
self.style.SUCCESS(
78+
f'\nCompleted! {total_tasks} tasks have been queued successfully.'
79+
)
80+
)

article/models.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def metadata(cls, collection=None):
114114

115115
for a in qs.iterator():
116116
yield {
117-
'id': a.id,
118117
'collection': a.collection.acron3,
119118
'default_lang': a.default_lang,
120119
'files': a.files,

article/tasks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def task_load_article_from_article_meta(self, from_date=None, until_date=None, d
6060
try:
6161
article, created = models.Article.objects.get_or_create(collection=col_obj, scielo_issn=jou.scielo_issn, pid_v2=obj.get('code'))
6262
if created or force_update:
63-
article.files = obj.get('files') or {}
63+
article.files = obj.get('pdfs') or {}
6464
article.processing_date = obj.get('processing_date') or ''
6565
article.publication_date = obj.get('publication_date') or ''
6666
article.publication_year = obj.get('publication_year') or ''
@@ -121,8 +121,8 @@ def task_load_article_from_opac(self, collection='scl', from_date=None, until_da
121121
article.pid_v3 = doc.get('pid_v3') or ''
122122
if not created:
123123
article.pid_v2 = doc.get('pid_v2') or ''
124-
article.publication_date = doc.get('publication_date') or ''
125-
article.default_lang = doc.get('default_language') or ''
124+
article.publication_date = doc.get('publication_date') or article.publication_date or ''
125+
article.default_lang = doc.get('default_language') or article.default_lang or ''
126126

127127
try:
128128
article.publication_year = article.publication_date[:4]

article/wagtail_hooks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class ArticleSnippetViewSet(SnippetViewSet):
2222
"pid_v3",
2323
"pid_generic",
2424
"files",
25-
"publication_date",
25+
"publication_year",
2626
)
2727
list_filter = (
2828
"collection",

compose/local/django/celery/worker/start

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ set -o errexit
44
set -o nounset
55

66

7-
watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=4
7+
watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1

compose/production/django/celery/worker/start

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ set -o pipefail
55
set -o nounset
66

77

8-
exec celery -A config.celery_app worker -l INFO --concurrency=4
8+
exec celery -A config.celery_app worker -l INFO --concurrency=1

config/settings/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@
407407
# Elasticsearch
408408
# ------------------------------------------------------------------------------
409409
ES_URL = env("ES_URL", default="http://192.168.0.33:9200/")
410-
ES_INDEX_NAME = env("ES_INDEX_NAME", default="usage-daily")
410+
ES_INDEX_NAME = env("ES_INDEX_NAME", default="usage")
411411
ES_API_KEY = env("ES_API_KEY", default="")
412412
ES_BASIC_AUTH = env("ES_BASIC_AUTH", default=("elastic", "iHktg66E"))
413413
ES_VERIFY_CERTS = env.bool("ES_VERIFY_CERTS", default=False)

0 commit comments

Comments
 (0)