Coverage for test/ResourceFinder_test.py: 91%

129 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1import os 

2import unittest 

3 

4from rdflib import Graph 

5from SPARQLWrapper import POST, SPARQLWrapper 

6 

7from oc_meta.lib.finder import ResourceFinder 

8from rdflib import URIRef, ConjunctiveGraph, Graph 

9 

10 

11def get_path(path:str) -> str: 

12 # absolute_path:str = os.path.abspath(path) 

13 universal_path = path.replace('\\', '/') 

14 return universal_path 

15 

16def add_data_ts(server, data_path, batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")): 

17 f_path = get_path(data_path) 

18 

19 # Determina il formato del file 

20 file_extension = os.path.splitext(f_path)[1].lower() 

21 if file_extension == '.nt': 

22 g = Graph() 

23 g.parse(location=f_path, format='nt') 

24 elif file_extension == '.nq': 

25 g = ConjunctiveGraph() 

26 g.parse(location=f_path, format='nquads') 

27 elif file_extension == '.ttl': 

28 g = Graph() 

29 g.parse(location=f_path, format='turtle') 

30 else: 

31 raise ValueError(f"Unsupported file extension: {file_extension}") 

32 

33 triples_list = [] 

34 if file_extension == '.nt': 

35 for subj, pred, obj in g: 

36 triples_list.append((subj, pred, obj, default_graph_uri)) 

37 elif file_extension == '.nq': 

38 for subj, pred, obj, ctx in g.quads((None, None, None, None)): 

39 triples_list.append((subj, pred, obj, ctx)) 

40 

41 for i in range(0, len(triples_list), batch_size): 

42 batch_triples = triples_list[i:i + batch_size] 

43 

44 triples_str = "" 

45 for subj, pred, obj, ctx in batch_triples: 

46 if ctx: 

47 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} " 

48 else: 

49 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . " 

50 

51 query = f"INSERT DATA {{ {triples_str} }}" 

52 

53 ts = SPARQLWrapper(server) 

54 ts.setQuery(query) 

55 ts.setMethod(POST) 

56 ts.query() 

57 

58def reset_server(server) -> None: 

59 ts = SPARQLWrapper(server) 

60 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/'}: 

61 ts.setQuery(f'CLEAR GRAPH <{graph}>') 

62 ts.setMethod(POST) 

63 ts.query() 

64 

65class TestResourceFinder(unittest.TestCase): 

66 @classmethod 

67 def setUpClass(cls): 

68 ENDPOINT = 'http://127.0.0.1:8805/sparql' 

69 BASE_IRI = 'https://w3id.org/oc/meta/' 

70 REAL_DATA_FILE = os.path.join('test', 'testcases', 'ts', 'real_data.nt') 

71 local_g = Graph() 

72 cls.finder = ResourceFinder(ENDPOINT, BASE_IRI, local_g) 

73 # Clear ts 

74 reset_server(server=ENDPOINT) 

75 # Upload data 

76 add_data_ts(server=ENDPOINT, data_path=REAL_DATA_FILE) 

77 cls.finder.get_everything_about_res(metavals={'omid:br/2373', 'omid:br/2380', 'omid:br/2730', 'omid:br/2374', 'omid:br/4435', 'omid:br/4436', 'omid:br/4437', 'omid:br/4438', 'omid:br/0604750', 'omid:br/0605379', 'omid:br/0606696'}, identifiers={'doi:10.1001/.391', 'orcid:0000-0001-6994-8412'}, vvis={}) 

78 

79 def test_retrieve_br_from_id(self): 

80 value = '10.1001/.391' 

81 schema = 'doi' 

82 output = self.finder.retrieve_br_from_id(schema, value) 

83 expected_output = [( 

84 '2373', 

85 'Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis', 

86 [('2239', 'doi:10.1001/.391')] 

87 )] 

88 self.assertEqual(output, expected_output) 

89 

90 def test_retrieve_br_from_id_multiple_ids(self): 

91 value = '10.1001/.405' 

92 schema = 'doi' 

93 output = self.finder.retrieve_br_from_id(schema, value) 

94 expected_output = [( 

95 '2374', 

96 "Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study", 

97 [('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')] 

98 )] 

99 self.assertEqual(output, expected_output) 

100 

101 def test_retrieve_br_from_meta(self): 

102 metaid = '2373' 

103 output = self.finder.retrieve_br_from_meta(metaid) 

104 expected_output = ('Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis', [('2239', 'doi:10.1001/.391')], True) 

105 self.assertEqual(output, expected_output) 

106 

107 def test_retrieve_br_from_meta_multiple_ids(self): 

108 metaid = '2374' 

109 output = self.finder.retrieve_br_from_meta(metaid) 

110 output = (output[0], set(output[1])) 

111 expected_output = ("Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study", {('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')}) 

112 self.assertEqual(output, expected_output) 

113 

114 def test_retrieve_metaid_from_id(self): 

115 schema = 'doi' 

116 value = '10.1001/.391' 

117 output = self.finder.retrieve_metaid_from_id(schema, value) 

118 expected_output = '2239' 

119 self.assertEqual(output, expected_output) 

120 

121 def test_retrieve_ra_from_meta(self): 

122 metaid = '3308' 

123 output = self.finder.retrieve_ra_from_meta(metaid) 

124 expected_output = ('Dezee, K. J.', [], True) 

125 self.assertEqual(output, expected_output) 

126 

127 def test_retrieve_ra_from_meta_with_orcid(self): 

128 metaid = '4940' 

129 output = self.finder.retrieve_ra_from_meta(metaid) 

130 expected_output = ('Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')], True) 

131 self.assertEqual(output, expected_output) 

132 

133 def test_retrieve_ra_from_meta_if_publisher(self): 

134 metaid = '3309' 

135 output = self.finder.retrieve_ra_from_meta(metaid) 

136 expected_output = ('American Medical Association (ama)', [('4274', 'crossref:10')], True) 

137 self.assertEqual(output, expected_output) 

138 

139 def test_retrieve_ra_from_id(self): 

140 schema = 'orcid' 

141 value = '0000-0001-6994-8412' 

142 output = self.finder.retrieve_ra_from_id(schema, value, publisher=False) 

143 expected_output = [ 

144 ('1000000', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')]), 

145 ('4940', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')]) 

146 ] 

147 self.assertEqual(sorted(output), expected_output) 

148 

149 def test_retrieve_ra_from_id_if_publisher(self): 

150 schema = 'crossref' 

151 value = '10' 

152 output = self.finder.retrieve_ra_from_id(schema, value, publisher=True) 

153 expected_output = [('3309', 'American Medical Association (ama)', [('4274', 'crossref:10')])] 

154 self.assertEqual(output, expected_output) 

155 

156 def test_retrieve_ra_sequence_from_br_meta(self): 

157 metaid = '2380' 

158 output = self.finder.retrieve_ra_sequence_from_br_meta(metaid, 'author') 

159 expected_output = [ 

160 {'5343': ('Hodge, James G.', [], '3316')}, 

161 {'5344': ('Anderson, Evan D.', [], '3317')}, 

162 {'5345': ('Kirsch, Thomas D.', [], '3318')}, 

163 {'5346': ('Kelen, Gabor D.', [('4278', 'orcid:0000-0002-3236-8286')], '3319')} 

164 ] 

165 self.assertEqual(output, expected_output) 

166 

167 def test_retrieve_re_from_br_meta(self): 

168 metaid = '2373' 

169 output = self.finder.retrieve_re_from_br_meta(metaid) 

170 expected_output = ('2011', '391-397') 

171 self.assertEqual(output, expected_output) 

172 

173 def test_retrieve_br_info_from_meta(self): 

174 metaid = '2373' 

175 output = self.finder.retrieve_br_info_from_meta(metaid) 

176 expected_output = { 

177 'pub_date': '2006-02-27', 

178 'type': 'journal article', 

179 'page': ('2011', '391-397'), 

180 'issue': '4', 

181 'volume': '166', 

182 'venue': 'Archives Of Internal Medicine [omid:br/4387 issn:0003-9926]' 

183 } 

184 self.assertEqual(output, expected_output) 

185 

186 

187if __name__ == '__main__': # pragma: no cover 

188 unittest.main()