Coverage for oc_ds_converter / metadata_manager.py: 16%
56 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
6import importlib
7from urllib.parse import quote
9from oc_ds_converter.oc_idmanager.isbn import ISBNManager
10from oc_ds_converter.oc_idmanager.issn import ISSNManager
11from oc_ds_converter.oc_idmanager.orcid import ORCIDManager
14class MetadataManager():
15 def __init__(self, metadata_provider:str, api_response:dict, orcid_doi_filepath:str):
16 self.metadata_provider = metadata_provider
17 self.api_response = api_response
18 self.orcid_doi_filepath = orcid_doi_filepath
19 self._issnm = ISSNManager()
20 self._isbnm = ISBNManager()
21 self._om = ORCIDManager()
22 from oc_ds_converter.oc_idmanager.doi import DOIManager
23 self.doi_manager = DOIManager()
24 self._have_api = ['crossref', 'datacite', 'medra', 'jalc']
26 def extract_metadata(self) -> None:
27 metadata = {'ra': self.metadata_provider}
28 if self.metadata_provider is None or self.api_response is None:
29 return metadata
30 if self.metadata_provider == 'unknown':
31 return self.extract_from_unknown()
32 elif self.metadata_provider in self._have_api:
33 module = importlib.import_module(f'oc_ds_converter.{self.metadata_provider}.{self.metadata_provider}_processing')
34 class_ = getattr(module, f'{self.metadata_provider.title()}Processing')
35 metadata_processor = class_(orcid_index=self.orcid_doi_filepath)
36 api_response = self.api_response['data'] if self.metadata_provider == 'datacite' else self.api_response
37 metadata.update(getattr(metadata_processor, 'csv_creator')(api_response))
38 return metadata
40 def extract_from_unknown(self) -> None:
41 metadata = dict()
42 api_response: dict = self.api_response[0]
43 if api_response.get('status') == 'Error':
44 metadata['ra'] = 'unknown'
45 return metadata
46 elif api_response.get('status') == 'DOI does not exist':
47 metadata['ra'] = 'invalid'
48 return metadata
49 registration_agency = self.api_response[0]['RA'].lower()
50 metadata['ra'] = registration_agency
51 doi = self.api_response[0]['DOI']
52 api_registration_agency = getattr(self.doi_manager, f'_api_{registration_agency}')
53 if api_registration_agency:
54 from oc_ds_converter.lib.file_manager import call_api
55 url = api_registration_agency + quote(doi)
56 r_format = 'xml' if registration_agency == 'medra' else 'json'
57 extra_api_result = call_api(url=url, headers=self.doi_manager._headers, r_format=r_format)
58 self.metadata_provider = registration_agency
59 self.api_response = extra_api_result
60 try:
61 metadata.update(self.extract_metadata())
62 except Exception as e:
63 print(doi, registration_agency)
64 print(e)
65 raise(Exception)
66 return metadata