Coverage for oc_ds_converter / metadata_manager.py: 16%

56 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5 

6import importlib 

7from urllib.parse import quote 

8 

9from oc_ds_converter.oc_idmanager.isbn import ISBNManager 

10from oc_ds_converter.oc_idmanager.issn import ISSNManager 

11from oc_ds_converter.oc_idmanager.orcid import ORCIDManager 

12 

13 

14class MetadataManager(): 

15 def __init__(self, metadata_provider:str, api_response:dict, orcid_doi_filepath:str): 

16 self.metadata_provider = metadata_provider 

17 self.api_response = api_response 

18 self.orcid_doi_filepath = orcid_doi_filepath 

19 self._issnm = ISSNManager() 

20 self._isbnm = ISBNManager() 

21 self._om = ORCIDManager() 

22 from oc_ds_converter.oc_idmanager.doi import DOIManager 

23 self.doi_manager = DOIManager() 

24 self._have_api = ['crossref', 'datacite', 'medra', 'jalc'] 

25 

26 def extract_metadata(self) -> None: 

27 metadata = {'ra': self.metadata_provider} 

28 if self.metadata_provider is None or self.api_response is None: 

29 return metadata 

30 if self.metadata_provider == 'unknown': 

31 return self.extract_from_unknown() 

32 elif self.metadata_provider in self._have_api: 

33 module = importlib.import_module(f'oc_ds_converter.{self.metadata_provider}.{self.metadata_provider}_processing') 

34 class_ = getattr(module, f'{self.metadata_provider.title()}Processing') 

35 metadata_processor = class_(orcid_index=self.orcid_doi_filepath) 

36 api_response = self.api_response['data'] if self.metadata_provider == 'datacite' else self.api_response 

37 metadata.update(getattr(metadata_processor, 'csv_creator')(api_response)) 

38 return metadata 

39 

40 def extract_from_unknown(self) -> None: 

41 metadata = dict() 

42 api_response: dict = self.api_response[0] 

43 if api_response.get('status') == 'Error': 

44 metadata['ra'] = 'unknown' 

45 return metadata 

46 elif api_response.get('status') == 'DOI does not exist': 

47 metadata['ra'] = 'invalid' 

48 return metadata 

49 registration_agency = self.api_response[0]['RA'].lower() 

50 metadata['ra'] = registration_agency 

51 doi = self.api_response[0]['DOI'] 

52 api_registration_agency = getattr(self.doi_manager, f'_api_{registration_agency}') 

53 if api_registration_agency: 

54 from oc_ds_converter.lib.file_manager import call_api 

55 url = api_registration_agency + quote(doi) 

56 r_format = 'xml' if registration_agency == 'medra' else 'json' 

57 extra_api_result = call_api(url=url, headers=self.doi_manager._headers, r_format=r_format) 

58 self.metadata_provider = registration_agency 

59 self.api_response = extra_api_result 

60 try: 

61 metadata.update(self.extract_metadata()) 

62 except Exception as e: 

63 print(doi, registration_agency) 

64 print(e) 

65 raise(Exception) 

66 return metadata