Coverage for oc_ds_converter / oc_idmanager / crossref.py: 71%

84 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it> 

2# SPDX-FileCopyrightText: 2024-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7from re import match, sub 

8 

9from oc_ds_converter.oc_idmanager.base import IdentifierManager 

10from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager 

11from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager 

12from oc_ds_converter.oc_idmanager.support import call_api 

13 

14 

15class CrossrefManager(IdentifierManager): 

16 """This class implements an identifier manager for Crossref member identifier""" 

17 

18 def __init__(self, use_api_service: bool = True, storage_manager: StorageManager | None = None, testing: bool = True) -> None: 

19 """Crossref member ID manager constructor.""" 

20 super(CrossrefManager, self).__init__() 

21 if storage_manager is None: 

22 self.storage_manager = RedisStorageManager(testing=testing) 

23 else: 

24 self.storage_manager = storage_manager 

25 self._api = "https://api.crossref.org/members/" 

26 self._api_funders = "https://api.crossref.org/funders/" 

27 self._api_works_route = r"https://api.openalex.org/works/" 

28 self._api_sources_route = r"https://api.openalex.org/sources/" 

29 self._use_api_service = use_api_service 

30 self._p = "crossref:" 

31 self._url_id_pref = "https://openalex.org/" 

32 

33 

34 def validated_as_id(self, id_string): 

35 crossref_validation_value = self.storage_manager.get_value(id_string) 

36 if isinstance(crossref_validation_value, bool): 

37 return crossref_validation_value 

38 else: 

39 return None 

40 

41 def is_valid(self, cr_member_id, get_extra_info=False): 

42 cr_member_id = self.normalise(cr_member_id, include_prefix=True) 

43 

44 if cr_member_id is None: 

45 if get_extra_info: 

46 return False, {"id": id_string, "valid": False} 

47 return False 

48 

49 id_validation_value = self.storage_manager.get_value(cr_member_id) 

50 if isinstance(id_validation_value, bool): 

51 if get_extra_info: 

52 return id_validation_value, {"id": cr_member_id, "valid": id_validation_value} 

53 return id_validation_value 

54 

55 if get_extra_info: 

56 result = self.exists(cr_member_id, get_extra_info=True) 

57 if isinstance(result, tuple): 

58 valid, info = result 

59 info_dict: dict[str, str | bool | object] = dict(info) 

60 self.storage_manager.set_full_value(cr_member_id, info_dict) 

61 return valid and self.syntax_ok(cr_member_id), info 

62 return False, {"id": cr_member_id, "valid": False} 

63 

64 exists_result = self.exists(cr_member_id) 

65 validity_check = self.syntax_ok(cr_member_id) and bool(exists_result) 

66 self.storage_manager.set_value(cr_member_id, validity_check) 

67 return validity_check 

68 

69 def normalise(self, id_string: str, include_prefix: bool = False) -> str | None: 

70 try: 

71 if id_string.startswith(self._p): 

72 oal_string = id_string[len(self._p):] 

73 else: 

74 oal_string = id_string 

75 

76 oal_string = sub(r"\D", "", oal_string) 

77 

78 return "%s%s" % ( 

79 self._p if include_prefix else "", 

80 oal_string.strip(), 

81 ) 

82 except Exception: 

83 return None 

84 

85 def syntax_ok(self, id_string: str) -> bool: 

86 if not id_string.startswith("crossref:"): 

87 id_string = self._p + id_string 

88 return bool(match(r"^crossref:\d+$", id_string)) 

89 

90 def exists( 

91 self, 

92 id_string: str, 

93 get_extra_info: bool = False, 

94 allow_extra_api: str | None = None, 

95 ) -> bool | tuple[bool, dict[str, str | bool]]: 

96 valid_bool = True 

97 cr_member_id_full = self._p + id_string if not id_string.startswith(self._p) else id_string 

98 

99 if self._use_api_service: 

100 cr_member_id = self.normalise(cr_member_id_full) 

101 if cr_member_id is None: 

102 if get_extra_info: 

103 return False, {"id": cr_member_id_full, "valid": False} 

104 return False 

105 

106 pref_cr_member_id = self._p + cr_member_id 

107 json_res = call_api(url=self._api + cr_member_id, headers=self._headers) 

108 if json_res and isinstance(json_res, dict): 

109 message = json_res.get("message") 

110 if isinstance(message, dict): 

111 valid_bool = str(message.get("id", "")) == cr_member_id 

112 if get_extra_info: 

113 return valid_bool, {"id": pref_cr_member_id, "valid": valid_bool} 

114 return valid_bool 

115 valid_bool = False 

116 

117 if get_extra_info: 

118 return valid_bool, {"id": cr_member_id_full, "valid": valid_bool} 

119 return valid_bool 

120 

121 def extra_info( 

122 self, 

123 api_response: dict[str, object], 

124 choose_api: str | None = None, 

125 info_dict: dict[str, object] | None = None, 

126 ) -> dict[str, object]: 

127 result: dict[str, object] = {"valid": True} 

128 return result