Coverage for test / test_archive_manager.py: 100%
130 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-21 14:31 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-21 14:31 +0000
1# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import json
6import os
7import shutil
8import tempfile
9import unittest
10from unittest.mock import MagicMock, patch
11from datetime import datetime
13import yaml
14from crowdsourcing.archive_manager import ArchiveManager
15import requests
18class TestArchiveManager(unittest.TestCase):
19 def setUp(self):
20 """Set up test environment before each test."""
21 # Create a temporary directory for test files
22 self.test_dir = tempfile.mkdtemp()
23 self.reports_dir = os.path.join(self.test_dir, "docs/validation_reports")
24 os.makedirs(self.reports_dir)
26 # Create test config file
27 self.config = {
28 "validation_reports": {
29 "max_reports_before_archive": 3,
30 "reports_dir": self.reports_dir,
31 "index_file": os.path.join(self.reports_dir, "index.json"),
32 },
33 "zenodo": {
34 "metadata_template": {
35 "title": "Test Archive",
36 "description": "Test Description",
37 "creators": [{"name": "Test Creator"}],
38 "access_right": "open",
39 "upload_type": "dataset",
40 "publication_date": datetime.now().strftime("%Y-%m-%d"),
41 "prereserve_doi": True,
42 "license": "CC0-1.0",
43 "keywords": ["OpenCitations", "crowdsourcing", "test"],
44 }
45 },
46 }
47 self.config_path = os.path.join(self.test_dir, "test_config.yaml")
48 with open(self.config_path, "w") as f:
49 yaml.dump(self.config, f)
51 # Initialize archive manager with test config
52 self.manager = ArchiveManager(config_path=self.config_path)
54 def tearDown(self):
55 """Clean up after each test."""
56 shutil.rmtree(self.test_dir)
58 def test_init_creates_index(self):
59 """Test that initialization creates the index file if it doesn't exist."""
60 self.assertTrue(os.path.exists(self.manager.index_path))
61 with open(self.manager.index_path) as f:
62 index_data = json.load(f)
63 self.assertEqual(index_data["github_reports"], {})
64 self.assertEqual(index_data["zenodo_reports"], {})
65 self.assertIsNone(index_data["last_archive"])
67 def test_add_report(self):
68 """Test adding a new report to the index."""
69 report_name = "test_report.html"
70 github_url = "https://example.com/report"
71 self.manager.add_report(report_name, github_url)
73 with open(self.manager.index_path) as f:
74 index_data = json.load(f)
75 self.assertEqual(index_data["github_reports"][report_name], github_url)
77 def test_get_report_url_github(self):
78 """Test getting URL for a report that's on GitHub."""
79 report_name = "test_report.html"
80 github_url = "https://example.com/report"
81 self.manager.add_report(report_name, github_url)
83 url = self.manager.get_report_url(report_name)
84 self.assertEqual(url, github_url)
86 @patch.dict(
87 os.environ, {"ENVIRONMENT": "development", "ZENODO_SANDBOX": "fake-token"}
88 )
89 @patch("crowdsourcing.zenodo_utils.create_deposition_resource")
90 @patch("crowdsourcing.zenodo_utils.get_zenodo_token")
91 @patch("requests.put")
92 @patch("requests.post")
93 def test_archive_reports(
94 self, mock_post, mock_put, mock_get_token, mock_create_deposition
95 ):
96 """Test archiving reports to Zenodo."""
97 # Setup mocks
98 mock_response = {"id": "123", "links": {"bucket": "http://bucket-url"}}
100 # Mock the POST request to create deposition
101 mock_deposition_response = MagicMock()
102 mock_deposition_response.json.return_value = mock_response
103 mock_deposition_response.raise_for_status = MagicMock()
104 mock_post.return_value = mock_deposition_response
106 # Mock the publish response
107 mock_publish_response = MagicMock()
108 mock_publish_response.json.return_value = {"doi": "10.5281/zenodo.123"}
109 mock_publish_response.raise_for_status = MagicMock()
110 mock_post.side_effect = [mock_deposition_response, mock_publish_response]
112 mock_get_token.return_value = "fake-token"
113 mock_put.return_value.raise_for_status = MagicMock()
115 # Create test reports
116 for i in range(4):
117 report_name = f"validation_issue_{i}.html"
118 report_path = os.path.join(self.reports_dir, report_name)
119 with open(report_path, "w") as f:
120 f.write(f"Test report {i}")
121 self.manager.add_report(report_name, f"https://example.com/{report_name}")
123 # Check that all reports are in github_reports
124 with open(self.manager.index_path) as f:
125 index_data = json.load(f)
126 self.assertEqual(len(index_data["github_reports"]), 4)
127 self.assertEqual(len(index_data["zenodo_reports"]), 0)
129 # Verify that archival is needed
130 self.assertTrue(self.manager.needs_archival())
132 # Explicitly trigger archival
133 self.manager.archive_reports()
135 # Verify that all reports were archived
136 with open(self.manager.index_path) as f:
137 index_data = json.load(f)
138 self.assertEqual(len(index_data["github_reports"]), 0)
139 self.assertEqual(len(index_data["zenodo_reports"]), 4)
140 self.assertTrue(
141 all(
142 report_data["doi"].startswith("https://doi.org/")
143 and report_data["url"].startswith(
144 "https://sandbox.zenodo.org/api/records/"
145 )
146 for report_data in index_data["zenodo_reports"].values()
147 )
148 )
150 def test_get_report_url_zenodo(self):
151 """Test getting URL for a report that's been archived to Zenodo."""
152 # Add a report to zenodo_reports directly
153 report_name = "archived_report.html"
154 zenodo_data = {
155 "url": "https://sandbox.zenodo.org/record/123/files/archived_report.html",
156 "doi": "https://doi.org/10.5281/zenodo.123",
157 }
159 index_data = self.manager._load_index()
160 index_data["zenodo_reports"][report_name] = zenodo_data
161 self.manager._save_index(index_data)
163 url = self.manager.get_report_url(report_name)
164 self.assertEqual(url, zenodo_data["url"]) # Should return the direct URL
166 def test_get_report_url_not_found(self):
167 """Test getting URL for a non-existent report."""
168 url = self.manager.get_report_url("non_existent.html")
169 self.assertIsNone(url)
171 def test_init_creates_directories(self):
172 """Test that initialization creates necessary directories."""
173 # Remove test directories
174 shutil.rmtree(self.test_dir)
176 # Create config file with test paths
177 os.makedirs(os.path.dirname(self.config_path), exist_ok=True)
178 with open(self.config_path, "w") as f:
179 yaml.dump(self.config, f)
181 # Reinitialize manager (should create all necessary directories and files)
182 self.manager = ArchiveManager(config_path=self.config_path)
184 # Check that directories were created
185 self.assertTrue(os.path.exists(self.reports_dir))
186 self.assertTrue(os.path.exists(os.path.dirname(self.manager.index_path)))
187 self.assertTrue(os.path.exists(self.config_path))
189 def test_add_report_creates_directories(self):
190 """Test that adding a report creates necessary directories if they don't exist."""
191 # Remove test directories
192 shutil.rmtree(self.test_dir)
194 # Add a report (should create directories)
195 self.manager.add_report("test.html", "http://example.com/test")
197 # Check that directories were created
198 self.assertTrue(os.path.exists(self.reports_dir))
199 self.assertTrue(os.path.exists(self.manager.index_path))
201 def test_archive_reports_no_reports(self):
202 """Test that archive_reports returns None when there are no reports to archive."""
203 # Initialize empty index
204 self.manager._init_index()
206 # Call archive_reports with no reports
207 result = self.manager.archive_reports()
209 # Verify that None is returned
210 self.assertIsNone(result)
212 # Verify index remains unchanged
213 with open(self.manager.index_path) as f:
214 index_data = json.load(f)
215 self.assertEqual(index_data["github_reports"], {})
216 self.assertEqual(index_data["zenodo_reports"], {})
217 self.assertIsNone(index_data["last_archive"])
219 @patch.dict(
220 os.environ, {"ENVIRONMENT": "development", "ZENODO_SANDBOX": "fake-token"}
221 )
222 @patch("crowdsourcing.zenodo_utils.create_deposition_resource")
223 @patch("crowdsourcing.zenodo_utils.get_zenodo_token")
224 @patch("crowdsourcing.archive_manager.logger")
225 def test_archive_reports_error(
226 self, mock_logger, mock_get_token, mock_create_deposition
227 ):
228 """Test that archive_reports properly handles and logs errors."""
229 # Setup error to be raised
230 test_error = requests.exceptions.HTTPError(
231 "403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions"
232 )
233 mock_create_deposition.side_effect = test_error
235 # Create a test report to archive
236 report_name = "test_report.html"
237 report_path = os.path.join(self.reports_dir, report_name)
238 with open(report_path, "w") as f:
239 f.write("Test report")
240 self.manager.add_report(report_name, "http://example.com/report")
242 # Verify that exception is raised and error is logged
243 with self.assertRaises(requests.exceptions.HTTPError) as context:
244 self.manager.archive_reports()
246 self.assertEqual(
247 str(context.exception),
248 "403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions",
249 )
250 mock_logger.error.assert_called_once_with(
251 "Failed to archive reports: 403 Client Error: FORBIDDEN for url: https://sandbox.zenodo.org/api/deposit/depositions"
252 )
254 # Verify that index remains unchanged
255 with open(self.manager.index_path) as f:
256 index_data = json.load(f)
257 self.assertIn(report_name, index_data["github_reports"])
258 self.assertEqual(len(index_data["zenodo_reports"]), 0)
261if __name__ == "__main__": # pragma: no cover
262 unittest.main()