@@ -277,18 +277,13 @@ def _process_line(line, utm, log_file):
277277 except Exception as e :
278278 _log_discarded_line (log_file , line , tracker_choices .LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION , str (e ))
279279 return False
280+
281+ try :
282+ item_access_data = _extract_item_access_data (log_file .collection .acron3 , translated_url )
283+ except Exception as e :
284+ _log_discarded_line (log_file , line , tracker_choices .LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION , str (e ))
285+ return False
280286
281- item_access_data = {
282- 'collection' : log_file .collection .acron3 ,
283- 'scielo_issn' : translated_url .get ('scielo_issn' ),
284- 'pid_v2' : standardizer .standardize_pid_v2 (translated_url .get ('pid_v2' )),
285- 'pid_v3' : standardizer .standardize_pid_v3 (translated_url .get ('pid_v3' )),
286- 'pid_generic' : standardizer .standardize_pid_generic (translated_url .get ('pid_generic' )),
287- 'media_language' : standardizer .standardize_language_code (translated_url .get ('media_language' )),
288- 'media_format' : translated_url .get ('media_format' ),
289- 'content_type' : translated_url .get ('content_type' ),
290- }
291-
292287 if not is_valid_item_access_data (item_access_data ):
293288 _log_discarded_line (
294289 log_file , line ,
@@ -324,6 +319,31 @@ def _process_line(line, utm, log_file):
324319 return True
325320
326321
322+ def _extract_item_access_data (collection_acron3 , translated_url ):
323+ """
324+ Extracts item access data from the translated URL and standardizes it.
325+
326+ Args:
327+ collection_acron3 (str): The acronym of the collection.
328+ translated_url (dict): The translated URL containing metadata.
329+
330+ Returns:
331+ dict: A dictionary containing standardized item access data, or None if the data is invalid.
332+ """
333+ item_access_data = {
334+ 'collection' : collection_acron3 ,
335+ 'scielo_issn' : translated_url .get ('scielo_issn' ),
336+ 'pid_v2' : standardizer .standardize_pid_v2 (translated_url .get ('pid_v2' )),
337+ 'pid_v3' : standardizer .standardize_pid_v3 (translated_url .get ('pid_v3' )),
338+ 'pid_generic' : standardizer .standardize_pid_generic (translated_url .get ('pid_generic' )),
339+ 'media_language' : standardizer .standardize_language_code (translated_url .get ('media_language' )),
340+ 'media_format' : translated_url .get ('media_format' ),
341+ 'content_type' : translated_url .get ('content_type' ),
342+ }
343+
344+ return item_access_data
345+
346+
327347def _register_item_access (item_access_data , line , jou_id , art_id ):
328348 """
329349 Registers an item access in the database, creating necessary objects if they do not exist.
0 commit comments