Coverage for test / test_archive_manager.py: 100%

130 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-21 14:31 +0000

1# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import json 

6import os 

7import shutil 

8import tempfile 

9import unittest 

10from unittest.mock import MagicMock, patch 

11from datetime import datetime 

12 

13import yaml 

14from crowdsourcing.archive_manager import ArchiveManager 

15import requests 

16 

17 

18class TestArchiveManager(unittest.TestCase): 

19 def setUp(self): 

20 """Set up test environment before each test.""" 

21 # Create a temporary directory for test files 

22 self.test_dir = tempfile.mkdtemp() 

23 self.reports_dir = os.path.join(self.test_dir, "docs/validation_reports") 

24 os.makedirs(self.reports_dir) 

25 

26 # Create test config file 

27 self.config = { 

28 "validation_reports": { 

29 "max_reports_before_archive": 3, 

30 "reports_dir": self.reports_dir, 

31 "index_file": os.path.join(self.reports_dir, "index.json"), 

32 }, 

33 "zenodo": { 

34 "metadata_template": { 

35 "title": "Test Archive", 

36 "description": "Test Description", 

37 "creators": [{"name": "Test Creator"}], 

38 "access_right": "open", 

39 "upload_type": "dataset", 

40 "publication_date": datetime.now().strftime("%Y-%m-%d"), 

41 "prereserve_doi": True, 

42 "license": "CC0-1.0", 

43 "keywords": ["OpenCitations", "crowdsourcing", "test"], 

44 } 

45 }, 

46 } 

47 self.config_path = os.path.join(self.test_dir, "test_config.yaml") 

48 with open(self.config_path, "w") as f: 

49 yaml.dump(self.config, f) 

50 

51 # Initialize archive manager with test config 

52 self.manager = ArchiveManager(config_path=self.config_path) 

53 

54 def tearDown(self): 

55 """Clean up after each test.""" 

56 shutil.rmtree(self.test_dir) 

57 

58 def test_init_creates_index(self): 

59 """Test that initialization creates the index file if it doesn't exist.""" 

60 self.assertTrue(os.path.exists(self.manager.index_path)) 

61 with open(self.manager.index_path) as f: 

62 index_data = json.load(f) 

63 self.assertEqual(index_data["github_reports"], {}) 

64 self.assertEqual(index_data["zenodo_reports"], {}) 

65 self.assertIsNone(index_data["last_archive"]) 

66 

67 def test_add_report(self): 

68 """Test adding a new report to the index.""" 

69 report_name = "test_report.html" 

70 github_url = "https://example.com/report" 

71 self.manager.add_report(report_name, github_url) 

72 

73 with open(self.manager.index_path) as f: 

74 index_data = json.load(f) 

75 self.assertEqual(index_data["github_reports"][report_name], github_url) 

76 

77 def test_get_report_url_github(self): 

78 """Test getting URL for a report that's on GitHub.""" 

79 report_name = "test_report.html" 

80 github_url = "https://example.com/report" 

81 self.manager.add_report(report_name, github_url) 

82 

83 url = self.manager.get_report_url(report_name) 

84 self.assertEqual(url, github_url) 

85 

86 @patch.dict( 

87 os.environ, {"ENVIRONMENT": "development", "ZENODO_SANDBOX": "fake-token"} 

88 ) 

89 @patch("crowdsourcing.zenodo_utils.create_deposition_resource") 

90 @patch("crowdsourcing.zenodo_utils.get_zenodo_token") 

91 @patch("requests.put") 

92 @patch("requests.post") 

93 def test_archive_reports( 

94 self, mock_post, mock_put, mock_get_token, mock_create_deposition 

95 ): 

96 """Test archiving reports to Zenodo.""" 

97 # Setup mocks 

98 mock_response = {"id": "123", "links": {"bucket": "http://bucket-url"}} 

99 

100 # Mock the POST request to create deposition 

101 mock_deposition_response = MagicMock() 

102 mock_deposition_response.json.return_value = mock_response 

103 mock_deposition_response.raise_for_status = MagicMock() 

104 mock_post.return_value = mock_deposition_response 

105 

106 # Mock the publish response 

107 mock_publish_response = MagicMock() 

108 mock_publish_response.json.return_value = {"doi": "10.5281/zenodo.123"} 

109 mock_publish_response.raise_for_status = MagicMock() 

110 mock_post.side_effect = [mock_deposition_response, mock_publish_response] 

111 

112 mock_get_token.return_value = "fake-token" 

113 mock_put.return_value.raise_for_status = MagicMock() 

114 

115 # Create test reports 

116 for i in range(4): 

117 report_name = f"validation_issue_{i}.html" 

118 report_path = os.path.join(self.reports_dir, report_name) 

119 with open(report_path, "w") as f: 

120 f.write(f"Test report {i}") 

121 self.manager.add_report(report_name, f"https://example.com/{report_name}") 

122 

123 # Check that all reports are in github_reports 

124 with open(self.manager.index_path) as f: 

125 index_data = json.load(f) 

126 self.assertEqual(len(index_data["github_reports"]), 4) 

127 self.assertEqual(len(index_data["zenodo_reports"]), 0) 

128 

129 # Verify that archival is needed 

130 self.assertTrue(self.manager.needs_archival()) 

131 

132 # Explicitly trigger archival 

133 self.manager.archive_reports() 

134 

135 # Verify that all reports were archived 

136 with open(self.manager.index_path) as f: 

137 index_data = json.load(f) 

138 self.assertEqual(len(index_data["github_reports"]), 0) 

139 self.assertEqual(len(index_data["zenodo_reports"]), 4) 

140 self.assertTrue( 

141 all( 

142 report_data["doi"].startswith("https://doi.org/") 

143 and report_data["url"].startswith( 

144 "https://sandbox.zenodo.org/api/records/" 

145 ) 

146 for report_data in index_data["zenodo_reports"].values() 

147 ) 

148 ) 

149 

150 def test_get_report_url_zenodo(self): 

151 """Test getting URL for a report that's been archived to Zenodo.""" 

152 # Add a report to zenodo_reports directly 

153 report_name = "archived_report.html" 

154 zenodo_data = { 

155 "url": "https://sandbox.zenodo.org/record/123/files/archived_report.html", 

156 "doi": "https://doi.org/10.5281/zenodo.123", 

157 } 

158 

159 index_data = self.manager._load_index() 

160 index_data["zenodo_reports"][report_name] = zenodo_data 

161 self.manager._save_index(index_data) 

162 

163 url = self.manager.get_report_url(report_name) 

164 self.assertEqual(url, zenodo_data["url"]) # Should return the direct URL 

165 

166 def test_get_report_url_not_found(self): 

167 """Test getting URL for a non-existent report.""" 

168 url = self.manager.get_report_url("non_existent.html") 

169 self.assertIsNone(url) 

170 

171 def test_init_creates_directories(self): 

172 """Test that initialization creates necessary directories.""" 

173 # Remove test directories 

174 shutil.rmtree(self.test_dir) 

175 

176 # Create config file with test paths 

177 os.makedirs(os.path.dirname(self.config_path), exist_ok=True) 

178 with open(self.config_path, "w") as f: 

179 yaml.dump(self.config, f) 

180 

181 # Reinitialize manager (should create all necessary directories and files) 

182 self.manager = ArchiveManager(config_path=self.config_path) 

183 

184 # Check that directories were created 

185 self.assertTrue(os.path.exists(self.reports_dir)) 

186 self.assertTrue(os.path.exists(os.path.dirname(self.manager.index_path))) 

187 self.assertTrue(os.path.exists(self.config_path)) 

188 

189 def test_add_report_creates_directories(self): 

190 """Test that adding a report creates necessary directories if they don't exist.""" 

191 # Remove test directories 

192 shutil.rmtree(self.test_dir) 

193 

194 # Add a report (should create directories) 

195 self.manager.add_report("test.html", "http://example.com/test") 

196 

197 # Check that directories were created 

198 self.assertTrue(os.path.exists(self.reports_dir)) 

199 self.assertTrue(os.path.exists(self.manager.index_path)) 

200 

201 def test_archive_reports_no_reports(self): 

202 """Test that archive_reports returns None when there are no reports to archive.""" 

203 # Initialize empty index 

204 self.manager._init_index() 

205 

206 # Call archive_reports with no reports 

207 result = self.manager.archive_reports() 

208 

209 # Verify that None is returned 

210 self.assertIsNone(result) 

211 

212 # Verify index remains unchanged 

213 with open(self.manager.index_path) as f: 

214 index_data = json.load(f) 

215 self.assertEqual(index_data["github_reports"], {}) 

216 self.assertEqual(index_data["zenodo_reports"], {}) 

217 self.assertIsNone(index_data["last_archive"]) 

218 

219 @patch.dict( 

220 os.environ, {"ENVIRONMENT": "development", "ZENODO_SANDBOX": "fake-token"} 

221 ) 

222 @patch("crowdsourcing.zenodo_utils.create_deposition_resource") 

223 @patch("crowdsourcing.zenodo_utils.get_zenodo_token") 

224 @patch("crowdsourcing.archive_manager.logger") 

225 def test_archive_reports_error( 

226 self, mock_logger, mock_get_token, mock_create_deposition 

227 ): 

228 """Test that archive_reports properly handles and logs errors.""" 

229 # Setup error to be raised 

230 test_error = requests.exceptions.HTTPError( 

231 "403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions" 

232 ) 

233 mock_create_deposition.side_effect = test_error 

234 

235 # Create a test report to archive 

236 report_name = "test_report.html" 

237 report_path = os.path.join(self.reports_dir, report_name) 

238 with open(report_path, "w") as f: 

239 f.write("Test report") 

240 self.manager.add_report(report_name, "http://example.com/report") 

241 

242 # Verify that exception is raised and error is logged 

243 with self.assertRaises(requests.exceptions.HTTPError) as context: 

244 self.manager.archive_reports() 

245 

246 self.assertEqual( 

247 str(context.exception), 

248 "403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions", 

249 ) 

250 mock_logger.error.assert_called_once_with( 

251 "Failed to archive reports: 403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions" 

252 ) 

253 

254 # Verify that index remains unchanged 

255 with open(self.manager.index_path) as f: 

256 index_data = json.load(f) 

257 self.assertIn(report_name, index_data["github_reports"]) 

258 self.assertEqual(len(index_data["zenodo_reports"]), 0) 

259 

260 

261if __name__ == "__main__": # pragma: no cover 

262 unittest.main()