Coverage for heritrace / apis / orcid.py: 95%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-07-02 10:16 +0000

1# SPDX-FileCopyrightText: 2024-2025 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5from functools import lru_cache 

6from http import HTTPStatus 

7from urllib.parse import urlparse 

8 

9import requests 

10from flask import current_app 

11from rdflib import URIRef 

12 

13_ORCID_ID_LENGTH = 19 

14_ORCID_HYPHEN_COUNT = 3 

15 

16 

17def is_orcid_url(url: str) -> bool: 

18 """Check if a URL is an ORCID URL.""" 

19 if not isinstance(url, str): 

20 return False 

21 return urlparse(url).netloc == "orcid.org" 

22 

23 

24def extract_orcid_id(url: str) -> str | None: 

25 """Extract ORCID ID from URL.""" 

26 if not isinstance(url, str): 

27 return None 

28 path = urlparse(url).path.strip("/") 

29 return path.removeprefix("https://orcid.org/") 

30 

31 

32@lru_cache(maxsize=1000) 

33def get_orcid_data(orcid_id: str) -> dict | None: 

34 """ 

35 Fetch researcher data from ORCID API with caching. 

36 

37 In demo mode, this function returns synthetic data without calling the external API. 

38 

39 Args: 

40 orcid_id (str): The ORCID identifier 

41 

42 Returns: 

43 dict: Researcher data including name and other details 

44 """ 

45 if current_app.config.get("ENV") == "demo": 

46 return { 

47 "name": f"Demo User ({orcid_id})", 

48 "other_names": [], 

49 "biography": "This is a synthetic user account for demo purposes.", 

50 "orcid": orcid_id, 

51 } 

52 

53 headers = {"Accept": "application/json"} 

54 

55 try: 

56 response = requests.get( 

57 f"https://pub.orcid.org/v3.0/{orcid_id}/person", headers=headers, timeout=5 

58 ) 

59 

60 if response.status_code == HTTPStatus.OK: 

61 data = response.json() 

62 

63 # Extract relevant information 

64 result = { 

65 "name": None, 

66 "other_names": [], 

67 "biography": None, 

68 "orcid": orcid_id, 

69 } 

70 

71 # Get main name 

72 if "name" in data: 

73 given_name = data["name"].get("given-names", {}).get("value", "") 

74 family_name = data["name"].get("family-name", {}).get("value", "") 

75 if given_name or family_name: 

76 result["name"] = f"{given_name} {family_name}".strip() 

77 

78 # Get other names 

79 if "other-names" in data and "other-name" in data["other-names"]: 

80 result["other_names"] = [ 

81 name.get("content", "") 

82 for name in data["other-names"]["other-name"] 

83 if "content" in name 

84 ] 

85 

86 # Get biography 

87 if data.get("biography"): 

88 result["biography"] = data["biography"].get("content", "") 

89 

90 return result 

91 

92 except requests.RequestException: 

93 return None 

94 

95 return None 

96 

97 

98def get_responsible_agent_uri(user_identifier: str) -> URIRef: 

99 if user_identifier.startswith(("http://", "https://")): 

100 return URIRef(user_identifier) 

101 

102 if ( 

103 len(user_identifier) == _ORCID_ID_LENGTH 

104 and user_identifier.count("-") == _ORCID_HYPHEN_COUNT 

105 ): 

106 return URIRef(f"https://orcid.org/{user_identifier}") 

107 

108 return URIRef(user_identifier) 

109 

110 

111def format_orcid_attribution(url: str) -> str: 

112 """ 

113 Format ORCID attribution for display. 

114 

115 Args: 

116 url (str): The ORCID URL 

117 

118 Returns: 

119 str: Formatted HTML for displaying ORCID attribution 

120 """ 

121 

122 orcid_id = extract_orcid_id(url) 

123 if not orcid_id: 

124 return f'<a href="{url}" target="_blank">{url}</a>' 

125 

126 researcher_data = get_orcid_data(orcid_id) 

127 if not researcher_data: 

128 return f'<a href="{url}" target="_blank">{url}</a>' 

129 

130 name = researcher_data["name"] or url 

131 

132 html = f'<a href="{url}" target="_blank" class="orcid-attribution">' 

133 html += ( 

134 '<img src="/static/images/orcid-logo.png"' 

135 ' alt="ORCID iD" class="orcid-icon mx-1 mb-1"' 

136 ' style="width: 16px; height: 16px;">' 

137 ) 

138 html += f"{name} [orcid:{orcid_id}]</a>" 

139 

140 return html