Coverage for oc_ds_converter / oc_idmanager / crossref.py: 71%
84 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it>
2# SPDX-FileCopyrightText: 2024-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7from re import match, sub
9from oc_ds_converter.oc_idmanager.base import IdentifierManager
10from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager
11from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager
12from oc_ds_converter.oc_idmanager.support import call_api
15class CrossrefManager(IdentifierManager):
16 """This class implements an identifier manager for Crossref member identifier"""
18 def __init__(self, use_api_service: bool = True, storage_manager: StorageManager | None = None, testing: bool = True) -> None:
19 """Crossref member ID manager constructor."""
20 super(CrossrefManager, self).__init__()
21 if storage_manager is None:
22 self.storage_manager = RedisStorageManager(testing=testing)
23 else:
24 self.storage_manager = storage_manager
25 self._api = "https://api.crossref.org/members/"
26 self._api_funders = "https://api.crossref.org/funders/"
27 self._api_works_route = r"https://api.openalex.org/works/"
28 self._api_sources_route = r"https://api.openalex.org/sources/"
29 self._use_api_service = use_api_service
30 self._p = "crossref:"
31 self._url_id_pref = "https://openalex.org/"
34 def validated_as_id(self, id_string):
35 crossref_validation_value = self.storage_manager.get_value(id_string)
36 if isinstance(crossref_validation_value, bool):
37 return crossref_validation_value
38 else:
39 return None
41 def is_valid(self, cr_member_id, get_extra_info=False):
42 cr_member_id = self.normalise(cr_member_id, include_prefix=True)
44 if cr_member_id is None:
45 if get_extra_info:
46 return False, {"id": id_string, "valid": False}
47 return False
49 id_validation_value = self.storage_manager.get_value(cr_member_id)
50 if isinstance(id_validation_value, bool):
51 if get_extra_info:
52 return id_validation_value, {"id": cr_member_id, "valid": id_validation_value}
53 return id_validation_value
55 if get_extra_info:
56 result = self.exists(cr_member_id, get_extra_info=True)
57 if isinstance(result, tuple):
58 valid, info = result
59 info_dict: dict[str, str | bool | object] = dict(info)
60 self.storage_manager.set_full_value(cr_member_id, info_dict)
61 return valid and self.syntax_ok(cr_member_id), info
62 return False, {"id": cr_member_id, "valid": False}
64 exists_result = self.exists(cr_member_id)
65 validity_check = self.syntax_ok(cr_member_id) and bool(exists_result)
66 self.storage_manager.set_value(cr_member_id, validity_check)
67 return validity_check
69 def normalise(self, id_string: str, include_prefix: bool = False) -> str | None:
70 try:
71 if id_string.startswith(self._p):
72 oal_string = id_string[len(self._p):]
73 else:
74 oal_string = id_string
76 oal_string = sub(r"\D", "", oal_string)
78 return "%s%s" % (
79 self._p if include_prefix else "",
80 oal_string.strip(),
81 )
82 except Exception:
83 return None
85 def syntax_ok(self, id_string: str) -> bool:
86 if not id_string.startswith("crossref:"):
87 id_string = self._p + id_string
88 return bool(match(r"^crossref:\d+$", id_string))
90 def exists(
91 self,
92 id_string: str,
93 get_extra_info: bool = False,
94 allow_extra_api: str | None = None,
95 ) -> bool | tuple[bool, dict[str, str | bool]]:
96 valid_bool = True
97 cr_member_id_full = self._p + id_string if not id_string.startswith(self._p) else id_string
99 if self._use_api_service:
100 cr_member_id = self.normalise(cr_member_id_full)
101 if cr_member_id is None:
102 if get_extra_info:
103 return False, {"id": cr_member_id_full, "valid": False}
104 return False
106 pref_cr_member_id = self._p + cr_member_id
107 json_res = call_api(url=self._api + cr_member_id, headers=self._headers)
108 if json_res and isinstance(json_res, dict):
109 message = json_res.get("message")
110 if isinstance(message, dict):
111 valid_bool = str(message.get("id", "")) == cr_member_id
112 if get_extra_info:
113 return valid_bool, {"id": pref_cr_member_id, "valid": valid_bool}
114 return valid_bool
115 valid_bool = False
117 if get_extra_info:
118 return valid_bool, {"id": cr_member_id_full, "valid": valid_bool}
119 return valid_bool
121 def extra_info(
122 self,
123 api_response: dict[str, object],
124 choose_api: str | None = None,
125 info_dict: dict[str, object] | None = None,
126 ) -> dict[str, object]:
127 result: dict[str, object] = {"valid": True}
128 return result