Skip to content

Commit 0294700

Browse files
Cria método para extrair dados, lidando com problemas de padronização
1 parent f38caa2 commit 0294700

File tree

1 file changed

+31
-11
lines changed

1 file changed

+31
-11
lines changed

metrics/tasks.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -277,18 +277,13 @@ def _process_line(line, utm, log_file):
277277
except Exception as e:
278278
_log_discarded_line(log_file, line, tracker_choices.LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, str(e))
279279
return False
280+
281+
try:
282+
item_access_data = _extract_item_access_data(log_file.collection.acron3, translated_url)
283+
except Exception as e:
284+
_log_discarded_line(log_file, line, tracker_choices.LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, str(e))
285+
return False
280286

281-
item_access_data = {
282-
'collection': log_file.collection.acron3,
283-
'scielo_issn': translated_url.get('scielo_issn'),
284-
'pid_v2': standardizer.standardize_pid_v2(translated_url.get('pid_v2')),
285-
'pid_v3': standardizer.standardize_pid_v3(translated_url.get('pid_v3')),
286-
'pid_generic': standardizer.standardize_pid_generic(translated_url.get('pid_generic')),
287-
'media_language': standardizer.standardize_language_code(translated_url.get('media_language')),
288-
'media_format': translated_url.get('media_format'),
289-
'content_type': translated_url.get('content_type'),
290-
}
291-
292287
if not is_valid_item_access_data(item_access_data):
293288
_log_discarded_line(
294289
log_file, line,
@@ -324,6 +319,31 @@ def _process_line(line, utm, log_file):
324319
return True
325320

326321

322+
def _extract_item_access_data(collection_acron3, translated_url):
323+
"""
324+
Extracts item access data from the translated URL and standardizes it.
325+
326+
Args:
327+
collection_acron3 (str): The acronym of the collection.
328+
translated_url (dict): The translated URL containing metadata.
329+
330+
Returns:
331+
dict: A dictionary containing standardized item access data, or None if the data is invalid.
332+
"""
333+
item_access_data = {
334+
'collection': collection_acron3,
335+
'scielo_issn': translated_url.get('scielo_issn'),
336+
'pid_v2': standardizer.standardize_pid_v2(translated_url.get('pid_v2')),
337+
'pid_v3': standardizer.standardize_pid_v3(translated_url.get('pid_v3')),
338+
'pid_generic': standardizer.standardize_pid_generic(translated_url.get('pid_generic')),
339+
'media_language': standardizer.standardize_language_code(translated_url.get('media_language')),
340+
'media_format': translated_url.get('media_format'),
341+
'content_type': translated_url.get('content_type'),
342+
}
343+
344+
return item_access_data
345+
346+
327347
def _register_item_access(item_access_data, line, jou_id, art_id):
328348
"""
329349
Registers an item access in the database, creating necessary objects if they do not exist.

0 commit comments

Comments
 (0)