Coverage for test / test_process_issues.py: 100%

736 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-21 14:31 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2022-2025 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7import json 

8import os 

9import shutil 

10import unittest 

11from datetime import datetime 

12from unittest.mock import MagicMock, patch 

13 

14import requests 

15import yaml 

16from dotenv import load_dotenv 

17from crowdsourcing.process_issues import ( 

18 _create_deposition_resource, 

19 _get_zenodo_token, 

20 _upload_data, 

21 _validate_title, 

22 answer, 

23 deposit_on_zenodo, 

24 get_data_to_store, 

25 get_open_issues, 

26 get_user_id, 

27 is_in_safe_list, 

28 process_open_issues, 

29 validate, 

30) 

31from requests.exceptions import RequestException 

32 

33load_dotenv() # Carica le variabili dal file .env 

34 

35 

36class TestTitleValidation(unittest.TestCase): 

37 def test_valid_doi_title(self): 

38 """Test that a valid DOI title is accepted""" 

39 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

40 is_valid, message = _validate_title(title) 

41 self.assertTrue(is_valid) 

42 self.assertEqual(message, "") 

43 

44 def test_valid_isbn_title(self): 

45 """Test that a valid ISBN title is accepted""" 

46 title = "deposit publisher.com isbn:9780134093413" 

47 is_valid, message = _validate_title(title) 

48 self.assertTrue(is_valid) 

49 self.assertEqual(message, "") 

50 

51 def test_missing_deposit_keyword(self): 

52 """Test that title without 'deposit' keyword is rejected""" 

53 title = "submit journal.com doi:10.1007/s42835-022-01029-y" 

54 is_valid, message = _validate_title(title) 

55 self.assertFalse(is_valid) 

56 self.assertIn("title of the issue was not structured correctly", message) 

57 

58 def test_unsupported_identifier(self): 

59 """Test that unsupported identifier types are rejected""" 

60 title = "deposit journal.com arxiv:2203.01234" 

61 is_valid, message = _validate_title(title) 

62 self.assertFalse(is_valid) 

63 self.assertEqual(message, "The identifier schema 'arxiv' is not supported") 

64 

65 def test_invalid_doi(self): 

66 """Test that invalid DOI format is rejected""" 

67 title = "deposit journal.com doi:invalid-doi-format" 

68 is_valid, message = _validate_title(title) 

69 self.assertFalse(is_valid) 

70 self.assertIn("is not a valid DOI", message) 

71 

72 def test_malformed_title(self): 

73 """Test that malformed title structure is rejected""" 

74 title = "deposit doi:10.1007/s42835-022-01029-y" # missing domain 

75 is_valid, message = _validate_title(title) 

76 self.assertFalse(is_valid) 

77 self.assertIn("title of the issue was not structured correctly", message) 

78 

79 def test_unsupported_schema(self): 

80 """Test that an unsupported identifier schema returns appropriate error""" 

81 title = "deposit journal.com issn:1234-5678" # issn is not in supported schemas 

82 is_valid, message = _validate_title(title) 

83 self.assertFalse(is_valid) 

84 print("message", message) 

85 self.assertEqual(message, "The identifier schema 'issn' is not supported") 

86 

87 def test_valid_temp_id_title(self): 

88 """Test that a valid temporary ID title is accepted""" 

89 title = "deposit journal.com temp:12345" 

90 is_valid, message = _validate_title(title) 

91 self.assertTrue(is_valid) 

92 self.assertEqual(message, "") 

93 

94 def test_valid_local_id_title(self): 

95 """Test that a valid local ID title is accepted""" 

96 title = "deposit journal.com local:record123" 

97 is_valid, message = _validate_title(title) 

98 self.assertTrue(is_valid) 

99 self.assertEqual(message, "") 

100 

101 def test_invalid_temp_id_format(self): 

102 """Test that invalid temporary ID format is rejected""" 

103 title = "deposit journal.com temp12345" # Missing colon 

104 is_valid, message = _validate_title(title) 

105 self.assertFalse(is_valid) 

106 self.assertIn("title of the issue was not structured correctly", message) 

107 

108 def test_invalid_local_id_format(self): 

109 """Test that invalid local ID format is rejected""" 

110 title = "deposit journal.com local.record123" # Wrong separator 

111 is_valid, message = _validate_title(title) 

112 self.assertFalse(is_valid) 

113 self.assertIn("title of the issue was not structured correctly", message) 

114 

115 

116class TestValidation(unittest.TestCase): 

117 def setUp(self): 

118 """Set up test environment before each test""" 

119 # Create temporary test directory 

120 self.test_dir = os.path.join(os.path.dirname(__file__), "temp_test_dir") 

121 self.validation_output = os.path.join(self.test_dir, "validation_output") 

122 self.validation_reports = os.path.join(self.test_dir, "validation_reports") 

123 

124 os.makedirs(self.validation_output, exist_ok=True) 

125 os.makedirs(self.validation_reports, exist_ok=True) 

126 

127 # Setup environment variables 

128 self.env_patcher = patch.dict( 

129 "os.environ", 

130 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"}, 

131 ) 

132 self.env_patcher.start() 

133 

134 def tearDown(self): 

135 """Clean up after each test""" 

136 # Stop environment patcher 

137 self.env_patcher.stop() 

138 

139 # Clean up test directory 

140 if os.path.exists(self.test_dir): 

141 shutil.rmtree(self.test_dir) 

142 

143 def test_valid_issue(self): 

144 """Test that a valid issue with correct title and CSV data is accepted""" 

145 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

146 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

147"doi:10.1007/s42835-022-01029-y","A Study on Electric Properties","Smith, John","2024","Journal of Physics","5","2","100-120","journal article","Test Publisher","" 

148"doi:10.1007/978-3-662-07918-8_3","Influence of Dielectric Properties, State, and Electrodes on Electric Strength","Ushakov, Vasily Y.","2004","Insulation of High-Voltage Equipment [isbn:9783642058530 isbn:9783662079188]","","","27-82","book chapter","Springer Science and Business Media LLC [crossref:297]","" 

149"doi:10.1016/0021-9991(73)90147-2","Flux-corrected transport. I. SHASTA, a fluid transport algorithm that works","Boris, Jay P; Book, David L","1973-01","Journal of Computational Physics [issn:0021-9991]","11","1","38-69","journal article","Elsevier BV [crossref:78]","" 

150===###===@@@=== 

151"citing_id","cited_id" 

152"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3" 

153"doi:10.1007/s42835-022-01029-y","doi:10.1016/0021-9991(73)90147-2\"""" 

154 is_valid, message = validate( 

155 title, 

156 body, 

157 "123", 

158 validation_output_dir=self.validation_output, 

159 validation_reports_dir=self.validation_reports, 

160 ) 

161 self.assertTrue(is_valid) 

162 self.assertIn("Thank you for your contribution", message) 

163 

164 def test_invalid_separator(self): 

165 """Test that issue with incorrect separator is rejected""" 

166 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

167 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

168"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher","" 

169WRONG_SEPARATOR 

170"citing_id","cited_id" 

171"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\"""" 

172 is_valid, message = validate( 

173 title, 

174 body, 

175 "124", 

176 validation_output_dir=self.validation_output, 

177 validation_reports_dir=self.validation_reports, 

178 ) 

179 self.assertFalse(is_valid) 

180 self.assertIn("Please use the separator", message) 

181 

182 def test_invalid_title_valid_body(self): 

183 """Test that issue with invalid title but valid body is rejected""" 

184 title = "invalid title format" 

185 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

186"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher","" 

187===###===@@@=== 

188"citing_id","cited_id" 

189"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\"""" 

190 is_valid, message = validate( 

191 title, 

192 body, 

193 "125", 

194 validation_output_dir=self.validation_output, 

195 validation_reports_dir=self.validation_reports, 

196 ) 

197 self.assertFalse(is_valid) 

198 self.assertIn("title of the issue was not structured correctly", message) 

199 

200 def test_invalid_csv_structure(self): 

201 """Test that CSV with wrong column structure returns appropriate error""" 

202 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

203 body = """"wrong","column","headers" 

204"data1","data2","data3" 

205===###===@@@=== 

206"wrong","citation","headers" 

207"cite1","cite2","cite3"\"""" 

208 is_valid, message = validate( 

209 title, 

210 body, 

211 "126", 

212 validation_output_dir=self.validation_output, 

213 validation_reports_dir=self.validation_reports, 

214 ) 

215 self.assertFalse(is_valid) 

216 self.assertIn( 

217 "Please ensure both metadata and citations are valid CSVs following the required format.", 

218 message, 

219 ) 

220 

221 def test_get_data_to_store_valid_input(self): 

222 """Test get_data_to_store with valid input data""" 

223 title = "deposit journal.com doi:10.1234/test" 

224 body = """"id","title" 

225"1","Test Title" 

226===###===@@@=== 

227"citing","cited" 

228"id1","id2"\"""" 

229 created_at = "2024-01-01T00:00:00Z" 

230 had_primary_source = "https://github.com/test/1" 

231 user_id = 12345 

232 

233 result = get_data_to_store(title, body, created_at, had_primary_source, user_id) 

234 

235 self.assertEqual(result["data"]["title"], title) 

236 self.assertEqual(result["data"]["domain"], "journal.com") 

237 self.assertEqual(len(result["data"]["metadata"]), 1) 

238 self.assertEqual(len(result["data"]["citations"]), 1) 

239 self.assertEqual(result["provenance"]["generatedAtTime"], created_at) 

240 self.assertEqual( 

241 result["provenance"]["wasAttributedTo"], 

242 f"https://api.github.com/user/{user_id}", 

243 ) 

244 self.assertEqual(result["provenance"]["hadPrimarySource"], had_primary_source) 

245 

246 def test_get_data_to_store_invalid_csv(self): 

247 """Test get_data_to_store with invalid CSV format""" 

248 title = "deposit journal.com doi:10.1234/test" 

249 # CSV con una sola sezione (manca il separatore) 

250 body = """"id","title" 

251"1","Test Title"\"""" 

252 

253 with self.assertRaises(ValueError) as context: 

254 get_data_to_store( 

255 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345 

256 ) 

257 

258 # Verifichiamo che l'errore contenga il messaggio corretto 

259 self.assertIn("Failed to process issue data", str(context.exception)) 

260 

261 def test_get_data_to_store_empty_sections(self): 

262 """Test get_data_to_store with empty metadata or citations sections""" 

263 title = "deposit journal.com doi:10.1234/test" 

264 body = """"id","title" 

265===###===@@@=== 

266"citing","cited"\"""" 

267 

268 with self.assertRaises(ValueError) as context: 

269 get_data_to_store( 

270 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345 

271 ) 

272 

273 self.assertIn("Empty metadata or citations section", str(context.exception)) 

274 

275 def test_get_data_to_store_invalid_separator(self): 

276 """Test get_data_to_store with invalid separator in body""" 

277 title = "deposit journal.com doi:10.1234/test" 

278 body = """"id","title" 

279INVALID_SEPARATOR 

280"citing","cited"\"""" 

281 

282 with self.assertRaises(ValueError) as context: 

283 get_data_to_store( 

284 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345 

285 ) 

286 

287 self.assertIn("Failed to process issue data", str(context.exception)) 

288 

289 @patch("crowdsourcing.process_issues.get_open_issues") 

290 @patch("crowdsourcing.process_issues.get_user_id") 

291 @patch("crowdsourcing.process_issues.is_in_safe_list") 

292 @patch("crowdsourcing.process_issues.validate") 

293 @patch("crowdsourcing.process_issues.get_data_to_store") 

294 @patch("crowdsourcing.process_issues.answer") 

295 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

296 @patch("crowdsourcing.process_issues.archive_manager") 

297 def test_validation_with_validator(self, mock_archive_manager, *args): 

298 """Test validation using the oc_validator library""" 

299 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

300 # CSV con errori di validazione intenzionali 

301 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

302"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","","" 

303===###===@@@=== 

304"citing_id","cited_id" 

305"doi:10.1007/s42835-022-01029-y","invalid_doi\"""" 

306 

307 # Run validation 

308 is_valid, message = validate( 

309 title, 

310 body, 

311 "127", 

312 validation_output_dir=self.validation_output, 

313 validation_reports_dir=self.validation_reports, 

314 ) 

315 

316 # Verify validation failed 

317 self.assertFalse(is_valid) 

318 self.assertIn("Validation errors found in", message) 

319 self.assertIn("metadata and citations", message) 

320 

321 # Verify final report was generated in validation_reports 

322 report_files = os.listdir(self.validation_reports) 

323 self.assertTrue( 

324 any( 

325 f.startswith("validation_") and f.endswith(".html") 

326 for f in report_files 

327 ) 

328 ) 

329 

330 # Verify archive_manager.add_report was called 

331 mock_archive_manager.add_report.assert_called_once() 

332 

333 def test_validation_with_metadata_validation_file(self): 

334 """Test validation when metadata validation file contains errors""" 

335 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

336 # Invalid metadata CSV with missing required fields 

337 body = """"wrong_field","another_wrong" 

338"value1","value2" 

339===###===@@@=== 

340"citing_id","cited_id" 

341"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\"""" 

342 

343 is_valid, message = validate( 

344 title, 

345 body, 

346 "128", 

347 validation_output_dir=self.validation_output, 

348 validation_reports_dir=self.validation_reports, 

349 ) 

350 

351 self.assertFalse(is_valid) 

352 self.assertIn( 

353 "Please ensure both metadata and citations are valid CSVs", message 

354 ) 

355 self.assertIn("check our guide", message) 

356 

357 def test_validation_with_both_validation_files(self): 

358 """Test validation when both metadata and citations have validation errors""" 

359 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

360 # Invalid metadata missing fields and invalid citation identifiers 

361 body = """"id","title" 

362"doi:invalid","Test Title" 

363===###===@@@=== 

364"citing_id","cited_id" 

365"invalid:123","another:456"\"""" 

366 

367 is_valid, message = validate( 

368 title, 

369 body, 

370 "129", 

371 validation_output_dir=self.validation_output, 

372 validation_reports_dir=self.validation_reports, 

373 ) 

374 

375 self.assertFalse(is_valid) 

376 self.assertIn( 

377 "Please ensure both metadata and citations are valid CSVs", message 

378 ) 

379 self.assertIn("check our guide", message) 

380 

381 @patch("crowdsourcing.process_issues.archive_manager") 

382 def test_validation_reads_validation_files(self, mock_archive_manager): 

383 """Test that validation properly reads and processes validation files""" 

384 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

385 # CSV con errori di validazione intenzionali 

386 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

387"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","","" 

388"doi:10.1162/qss_a_00292","","","","","","","","journal article","","" 

389===###===@@@=== 

390"citing_id","cited_id" 

391"doi:10.1007/s42835-022-01029-y","invalid_doi" 

392"doi:10.1162/qss_a_00292","doi:10.1007/s42835-022-01029-y"\"""" 

393 

394 # Run validation 

395 is_valid, message = validate( 

396 title, 

397 body, 

398 "130", 

399 validation_output_dir=self.validation_output, 

400 validation_reports_dir=self.validation_reports, 

401 ) 

402 

403 # Verify validation failed 

404 self.assertFalse(is_valid) 

405 self.assertIn("Validation errors found in metadata and citations", message) 

406 self.assertIn("Please check the detailed validation report:", message) 

407 self.assertIn( 

408 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_", 

409 message, 

410 ) 

411 self.assertIn(".html", message) 

412 

413 # Verify final report was generated in validation_reports 

414 report_files = os.listdir(self.validation_reports) 

415 self.assertTrue( 

416 any( 

417 f.startswith("validation_") and f.endswith(".html") 

418 for f in report_files 

419 ) 

420 ) 

421 

422 # Verify archive_manager.add_report was called 

423 mock_archive_manager.add_report.assert_called_once() 

424 

425 @patch("crowdsourcing.process_issues.archive_manager") 

426 def test_validation_html_report_generation(self, mock_archive_manager): 

427 """Test that HTML validation reports are properly generated when validation fails""" 

428 # Clean up any existing directories from previous tests 

429 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

430 # Invalid data that will fail validation 

431 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

432"INVALID_DOI","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

433===###===@@@=== 

434"citing_id","cited_id" 

435"INVALID_DOI","doi:10.1007/978-3-030-00668-6_8\"""" 

436 

437 # Run validation 

438 is_valid, message = validate( 

439 title, 

440 body, 

441 "131", 

442 validation_output_dir=self.validation_output, 

443 validation_reports_dir=self.validation_reports, 

444 ) 

445 

446 # Verify validation failed 

447 self.assertFalse(is_valid) 

448 

449 # Check that merged report exists in validation_reports 

450 report_files = os.listdir(self.validation_reports) 

451 self.assertTrue( 

452 any( 

453 f.startswith("validation_") and f.endswith(".html") 

454 for f in report_files 

455 ) 

456 ) 

457 

458 # Verify report URL is in the error message 

459 self.assertIn("Please check the detailed validation report:", message) 

460 self.assertIn( 

461 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_", 

462 message, 

463 ) 

464 self.assertIn(".html", message) 

465 

466 @patch("crowdsourcing.process_issues.archive_manager") 

467 def test_validation_html_report_generation_only_metadata_errors( 

468 self, mock_archive_manager 

469 ): 

470 """Test HTML report generation when only metadata has validation errors""" 

471 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

472 # CSV with invalid metadata but valid citations 

473 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

474"doi:10.1007/s42835-022-01029-y","Test Title","","","","","","","invalid_type","","" 

475"doi:10.1162/qss_a_00292","Test Title","","","","","","","journal article","","" 

476===###===@@@=== 

477"citing_id","cited_id" 

478"doi:10.1007/s42835-022-01029-y","doi:10.1162/qss_a_00292\"""" 

479 

480 # Run validation 

481 is_valid, message = validate( 

482 title, 

483 body, 

484 "132", 

485 validation_output_dir=self.validation_output, 

486 validation_reports_dir=self.validation_reports, 

487 ) 

488 

489 # Verify validation failed 

490 self.assertFalse(is_valid) 

491 

492 # Check that final report exists 

493 report_files = [ 

494 f for f in os.listdir(self.validation_reports) if f.endswith(".html") 

495 ] 

496 self.assertEqual(len(report_files), 1, "Should be exactly one final report") 

497 final_report = report_files[0] 

498 self.assertTrue(final_report.startswith("validation_")) 

499 

500 # Verify archive_manager.add_report was called with correct parameters 

501 mock_archive_manager.add_report.assert_called_once_with( 

502 final_report, 

503 f"https://test-org.github.io/test-repo/validation_reports/{final_report}", 

504 ) 

505 

506 @patch("crowdsourcing.process_issues.archive_manager") 

507 def test_validation_html_report_generation_only_citations_errors( 

508 self, mock_archive_manager 

509 ): 

510 """Test HTML report generation when only citations have validation errors""" 

511 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

512 # CSV with valid metadata but invalid citations 

513 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

514"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

515===###===@@@=== 

516"citing_id","cited_id" 

517"INVALID_DOI","ANOTHER_INVALID_DOI"\"""" 

518 

519 # Run validation 

520 is_valid, message = validate( 

521 title, 

522 body, 

523 "133", 

524 validation_output_dir=self.validation_output, 

525 validation_reports_dir=self.validation_reports, 

526 ) 

527 

528 # Verify validation failed 

529 self.assertFalse(is_valid) 

530 

531 # Check that final report exists 

532 report_files = [ 

533 f for f in os.listdir(self.validation_reports) if f.endswith(".html") 

534 ] 

535 self.assertEqual(len(report_files), 1, "Should be exactly one final report") 

536 final_report = report_files[0] 

537 self.assertTrue(final_report.startswith("validation_")) 

538 

539 # Verify archive_manager.add_report was called with correct parameters 

540 mock_archive_manager.add_report.assert_called_once_with( 

541 final_report, 

542 f"https://test-org.github.io/test-repo/validation_reports/{final_report}", 

543 ) 

544 

545 def test_validate_empty_body(self): 

546 """Test validate() with empty body content""" 

547 title = "deposit journal.com doi:10.1162/qss_a_00292" 

548 body = None 

549 

550 is_valid, message = validate( 

551 title, 

552 body, 

553 "134", 

554 validation_output_dir=self.validation_output, 

555 validation_reports_dir=self.validation_reports, 

556 ) 

557 

558 self.assertFalse(is_valid) 

559 self.assertIn("The issue body cannot be empty", message) 

560 self.assertIn( 

561 "https://github.com/opencitations/crowdsourcing/blob/main/README.md", 

562 message, 

563 ) 

564 

565 def test_validate_empty_string_body(self): 

566 """Test validate() with empty string body content""" 

567 title = "deposit journal.com doi:10.1162/qss_a_00292" 

568 body = "" 

569 

570 is_valid, message = validate( 

571 title, 

572 body, 

573 "135", 

574 validation_output_dir=self.validation_output, 

575 validation_reports_dir=self.validation_reports, 

576 ) 

577 

578 self.assertFalse(is_valid) 

579 self.assertIn("The issue body cannot be empty", message) 

580 self.assertIn( 

581 "https://github.com/opencitations/crowdsourcing/blob/main/README.md", 

582 message, 

583 ) 

584 

585 @patch("crowdsourcing.process_issues.archive_manager") 

586 def test_validation_report_issue_number(self, mock_archive_manager): 

587 """Test that validation report filename contains correct issue number""" 

588 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

589 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

590"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","","" 

591===###===@@@=== 

592"citing_id","cited_id" 

593"doi:10.1007/s42835-022-01029-y","invalid_doi"\"""" 

594 

595 test_issue_number = "42" 

596 

597 # Run validation 

598 is_valid, message = validate( 

599 title, 

600 body, 

601 test_issue_number, 

602 validation_output_dir=self.validation_output, 

603 validation_reports_dir=self.validation_reports, 

604 ) 

605 

606 # Verify validation failed and generated a report 

607 self.assertFalse(is_valid) 

608 

609 # Check that the report file exists with correct issue number 

610 report_files = os.listdir(self.validation_reports) 

611 matching_files = [ 

612 f 

613 for f in report_files 

614 if f.startswith(f"validation_issue_{test_issue_number}") 

615 ] 

616 self.assertEqual( 

617 len(matching_files), 1, "Should find exactly one matching report file" 

618 ) 

619 self.assertTrue( 

620 matching_files[0].endswith(".html"), "Report file should be HTML" 

621 ) 

622 self.assertEqual( 

623 matching_files[0], f"validation_issue_{test_issue_number}.html" 

624 ) 

625 

626 def test_valid_temp_ids_in_csv(self): 

627 """Test that CSV data with temporary IDs is accepted""" 

628 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

629 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

630"temp:123","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

631"temp:456","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher","" 

632===###===@@@=== 

633"citing_id","cited_id" 

634"temp:123","temp:456\"""" 

635 is_valid, message = validate( 

636 title, 

637 body, 

638 "136", 

639 validation_output_dir=self.validation_output, 

640 validation_reports_dir=self.validation_reports, 

641 ) 

642 self.assertTrue(is_valid) 

643 self.assertIn("Thank you for your contribution", message) 

644 

645 def test_valid_local_ids_in_csv(self): 

646 """Test that CSV data with local IDs is accepted""" 

647 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

648 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

649"local:rec1","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

650"local:rec2","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher","" 

651===###===@@@=== 

652"citing_id","cited_id" 

653"local:rec1","local:rec2\"""" 

654 is_valid, message = validate( 

655 title, 

656 body, 

657 "137", 

658 validation_output_dir=self.validation_output, 

659 validation_reports_dir=self.validation_reports, 

660 ) 

661 self.assertTrue(is_valid) 

662 self.assertIn("Thank you for your contribution", message) 

663 

664 def test_mixed_identifier_types_in_csv(self): 

665 """Test that CSV data with mixed identifier types (DOI, temp, local) is accepted""" 

666 title = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

667 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

668"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

669"temp:123","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher","" 

670"local:rec1","Third Title","Third Author","2024","Test Journal","1","1","21-30","journal article","Test Publisher","" 

671===###===@@@=== 

672"citing_id","cited_id" 

673"doi:10.1007/s42835-022-01029-y","temp:123" 

674"temp:123","local:rec1\"""" 

675 is_valid, message = validate( 

676 title, 

677 body, 

678 "138", 

679 validation_output_dir=self.validation_output, 

680 validation_reports_dir=self.validation_reports, 

681 ) 

682 self.assertTrue(is_valid) 

683 self.assertIn("Thank you for your contribution", message) 

684 

685 

686class TestUserValidation(unittest.TestCase): 

687 def setUp(self): 

688 """Set up test environment before each test""" 

689 # Create a temporary test safe list file 

690 self.test_safe_list_path = "test_safe_list.yaml" 

691 test_safe_list = { 

692 "users": [ 

693 {"id": 3869247, "name": "Silvio Peroni"}, 

694 {"id": 42008604, "name": "Arcangelo Massari"}, 

695 ] 

696 } 

697 with open(self.test_safe_list_path, "w") as f: 

698 yaml.dump(test_safe_list, f) 

699 

700 # Create patcher to use test file instead of real one 

701 self.safe_list_patcher = patch( 

702 "crowdsourcing.process_issues.SAFE_LIST_PATH", self.test_safe_list_path 

703 ) 

704 self.safe_list_patcher.start() 

705 

706 def tearDown(self): 

707 """Clean up after each test""" 

708 # Remove temporary file 

709 if os.path.exists(self.test_safe_list_path): 

710 os.remove(self.test_safe_list_path) 

711 

712 # Stop patcher 

713 self.safe_list_patcher.stop() 

714 

715 def test_get_user_id_real_user(self): 

716 """Test getting ID of a real GitHub user""" 

717 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

718 with patch("requests.get") as mock_get: 

719 mock_response = MagicMock() 

720 mock_response.status_code = 200 

721 mock_response.json.return_value = {"id": 42008604} 

722 mock_get.return_value = mock_response 

723 

724 user_id = get_user_id("arcangelo7") 

725 self.assertEqual(user_id, 42008604) 

726 

727 def test_get_user_id_nonexistent_user(self): 

728 """Test getting ID of a nonexistent GitHub user""" 

729 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

730 with patch("requests.get") as mock_get: 

731 mock_response = MagicMock() 

732 mock_response.status_code = 404 

733 mock_get.return_value = mock_response 

734 

735 user_id = get_user_id("this_user_definitely_does_not_exist_123456789") 

736 self.assertIsNone(user_id) 

737 

738 def test_is_in_safe_list_authorized(self): 

739 """Test that authorized user is in safe list""" 

740 self.assertTrue(is_in_safe_list(42008604)) 

741 

742 def test_is_in_safe_list_unauthorized(self): 

743 """Test that unauthorized user is not in safe list""" 

744 self.assertFalse(is_in_safe_list(99999999)) 

745 

746 def test_is_in_safe_list_file_not_found(self): 

747 """Test behavior when safe_list.yaml doesn't exist""" 

748 # Remove the test file to simulate missing file 

749 if os.path.exists(self.test_safe_list_path): 

750 os.remove(self.test_safe_list_path) 

751 

752 # Test with any user ID - should return False when file is missing 

753 result = is_in_safe_list(42008604) 

754 

755 # Verify result is False 

756 self.assertFalse(result) 

757 

758 # Verify empty file was created with proper structure 

759 self.assertTrue(os.path.exists(self.test_safe_list_path)) 

760 with open(self.test_safe_list_path, "r") as f: 

761 content = yaml.safe_load(f) 

762 self.assertEqual(content, {"users": []}) 

763 

764 def test_is_in_safe_list_invalid_yaml(self): 

765 """Test behavior with invalid YAML file""" 

766 with open(self.test_safe_list_path, "w") as f: 

767 f.write("invalid: yaml: content: [") 

768 self.assertFalse(is_in_safe_list(42008604)) 

769 

770 @patch("requests.get") 

771 @patch("time.sleep") 

772 @patch("time.time") 

773 def test_get_user_id_rate_limit(self, mock_time, mock_sleep, mock_get): 

774 """Test rate limit handling in get_user_id""" 

775 # Mock current time 

776 current_time = 1000000 

777 mock_time.return_value = current_time 

778 

779 # Setup responses 

780 rate_limited_response = MagicMock() 

781 rate_limited_response.status_code = 403 

782 rate_limited_response.headers = { 

783 "X-RateLimit-Remaining": "0", 

784 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds 

785 } 

786 

787 success_response = MagicMock() 

788 success_response.status_code = 200 

789 success_response.json.return_value = {"id": 12345} 

790 

791 # First call hits rate limit, second call succeeds 

792 mock_get.side_effect = [rate_limited_response, success_response] 

793 

794 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

795 user_id = get_user_id("test-user") 

796 

797 # Verify correct user ID was returned 

798 self.assertEqual(user_id, 12345) 

799 

800 # Verify sleep was called with correct duration 

801 mock_sleep.assert_called_once_with(30) 

802 

803 # Verify correct number of API calls 

804 self.assertEqual(mock_get.call_count, 2) 

805 

806 # Verify API calls were correct 

807 for call in mock_get.call_args_list: 

808 args, kwargs = call 

809 self.assertEqual(args[0], "https://api.github.com/users/test-user") 

810 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token") 

811 

812 @patch("requests.get") 

813 @patch("time.sleep") # Mock sleep to speed up test 

814 def test_get_user_id_connection_error_retry(self, mock_sleep, mock_get): 

815 """Test retry behavior when connection errors occur""" 

816 # Configure mock to fail with connection error twice then succeed 

817 mock_get.side_effect = [ 

818 requests.ConnectionError, 

819 requests.ConnectionError, 

820 MagicMock(status_code=200, json=lambda: {"id": 12345}), 

821 ] 

822 

823 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

824 user_id = get_user_id("test-user") 

825 

826 self.assertEqual(user_id, 12345) 

827 self.assertEqual(mock_get.call_count, 3) 

828 self.assertEqual(mock_sleep.call_count, 2) 

829 mock_sleep.assert_called_with(5) # Verify sleep duration 

830 

831 @patch("requests.get") 

832 @patch("time.sleep") 

833 def test_get_user_id_all_retries_fail(self, mock_sleep, mock_get): 

834 """Test behavior when all retry attempts fail""" 

835 # Configure mock to fail all three attempts 

836 mock_get.side_effect = [ 

837 requests.ConnectionError, 

838 requests.ConnectionError, 

839 requests.ConnectionError, 

840 ] 

841 

842 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

843 user_id = get_user_id("test-user") 

844 

845 self.assertIsNone(user_id) 

846 self.assertEqual(mock_get.call_count, 3) 

847 self.assertEqual( 

848 mock_sleep.call_count, 3 

849 ) # Updated to expect 3 sleeps - one for each ConnectionError 

850 

851 @patch("requests.get") 

852 @patch("time.sleep") 

853 def test_get_user_id_timeout_retry(self, mock_sleep, mock_get): 

854 """Test retry behavior when requests timeout""" 

855 # Configure mock to timeout twice then succeed 

856 mock_get.side_effect = [ 

857 requests.ReadTimeout, 

858 requests.ReadTimeout, 

859 MagicMock(status_code=200, json=lambda: {"id": 12345}), 

860 ] 

861 

862 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

863 user_id = get_user_id("test-user") 

864 

865 # Verify correct user ID was returned after retries 

866 self.assertEqual(user_id, 12345) 

867 

868 # Verify correct number of attempts 

869 self.assertEqual(mock_get.call_count, 3) 

870 

871 # Verify no sleep was called (ReadTimeout doesn't trigger sleep) 

872 mock_sleep.assert_not_called() 

873 

874 # Verify API calls were correct 

875 for call in mock_get.call_args_list: 

876 args, kwargs = call 

877 self.assertEqual(args[0], "https://api.github.com/users/test-user") 

878 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token") 

879 

880 

881class TestGitHubAPI(unittest.TestCase): 

882 """Test GitHub API interaction functionality""" 

883 

884 def setUp(self): 

885 self.mock_response = MagicMock() 

886 self.mock_response.status_code = 200 

887 

888 # Sample issue data that won't change 

889 self.sample_issues = [ 

890 { 

891 "title": "deposit journal.com doi:10.1234/test", 

892 "body": "test body", 

893 "number": 1, 

894 "user": {"login": "test-user"}, 

895 "created_at": "2024-01-01T00:00:00Z", 

896 "html_url": "https://github.com/test-org/test-repo/issues/1", 

897 "labels": [], 

898 } 

899 ] 

900 

901 # Setup environment variables 

902 self.env_patcher = patch.dict( 

903 "os.environ", 

904 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"}, 

905 ) 

906 self.env_patcher.start() 

907 

908 def tearDown(self): 

909 """Clean up after each test""" 

910 self.env_patcher.stop() 

911 

912 @patch("requests.get") 

913 def test_get_open_issues_success(self, mock_get): 

914 """Test successful retrieval of open issues""" 

915 self.mock_response.json.return_value = self.sample_issues 

916 mock_get.return_value = self.mock_response 

917 

918 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

919 issues = get_open_issues() 

920 

921 self.assertEqual(len(issues), 1) 

922 self.assertEqual(issues[0]["title"], "deposit journal.com doi:10.1234/test") 

923 self.assertEqual(issues[0]["number"], "1") 

924 

925 # Verify API call 

926 mock_get.assert_called_once() 

927 args, kwargs = mock_get.call_args 

928 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token") 

929 self.assertEqual(kwargs["params"]["state"], "open") 

930 

931 @patch("requests.get") 

932 def test_get_open_issues_404(self, mock_get): 

933 """Test handling of 404 response""" 

934 self.mock_response.status_code = 404 

935 mock_get.return_value = self.mock_response 

936 

937 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

938 issues = get_open_issues() 

939 

940 self.assertEqual(issues, []) 

941 

942 @patch("requests.get") 

943 @patch("time.sleep") 

944 @patch("time.time") 

945 def test_rate_limit_retry(self, mock_time, mock_sleep, mock_get): 

946 """Test retry behavior when hitting rate limits""" 

947 # Mock current time to have consistent test behavior 

948 current_time = 1000000 

949 mock_time.return_value = current_time 

950 

951 # Setup mock responses 

952 rate_limited_response = MagicMock() 

953 rate_limited_response.status_code = 403 

954 rate_limited_response.headers = { 

955 "X-RateLimit-Remaining": "0", 

956 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds 

957 } 

958 

959 success_response = MagicMock() 

960 success_response.status_code = 200 

961 success_response.json.return_value = [ 

962 { 

963 "title": "deposit Test Issue", 

964 "body": "Test Body", 

965 "number": 1, 

966 "user": {"login": "test-user"}, 

967 "created_at": "2024-01-01T00:00:00Z", 

968 "html_url": "https://github.com/test/1", 

969 "labels": [], 

970 } 

971 ] 

972 

973 # First call hits rate limit, second call succeeds 

974 mock_get.side_effect = [rate_limited_response, success_response] 

975 

976 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

977 issues = get_open_issues() 

978 

979 # Verify rate limit handling 

980 self.assertEqual(len(issues), 1) 

981 self.assertEqual(issues[0]["title"], "deposit Test Issue") 

982 

983 # Verify sleep was called with exactly 30 seconds 

984 mock_sleep.assert_called_once_with(30) 

985 

986 # Verify correct API calls 

987 self.assertEqual(mock_get.call_count, 2) 

988 for call in mock_get.call_args_list: 

989 args, kwargs = call 

990 self.assertEqual(kwargs["params"]["state"], "open") 

991 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token") 

992 

993 @patch("requests.get") 

994 def test_network_error_retry(self, mock_get): 

995 """Test retry behavior on network errors""" 

996 mock_get.side_effect = RequestException("Network error") 

997 

998 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

999 with self.assertRaises(RuntimeError) as context: 

1000 get_open_issues() 

1001 

1002 self.assertIn("Failed to fetch issues after 3 attempts", str(context.exception)) 

1003 self.assertEqual(mock_get.call_count, 3) # Verify 3 retry attempts 

1004 

1005 @patch("requests.get") 

1006 def test_get_open_issues_all_attempts_fail(self, mock_get): 

1007 """Test that empty list is returned when all attempts fail without exception""" 

1008 # Create response that fails but doesn't trigger retry logic 

1009 failed_response = MagicMock() 

1010 failed_response.status_code = 403 

1011 # No rate limit headers, so won't trigger rate limit retry logic 

1012 failed_response.headers = {} 

1013 

1014 # Make all attempts return the same failed response 

1015 mock_get.return_value = failed_response 

1016 

1017 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

1018 issues = get_open_issues() 

1019 

1020 # Verify empty list is returned 

1021 self.assertEqual(issues, []) 

1022 

1023 # Verify we tried MAX_RETRIES times 

1024 self.assertEqual(mock_get.call_count, 3) 

1025 

1026 @patch("requests.get") 

1027 @patch("time.sleep") 

1028 @patch("time.time") 

1029 def test_rate_limit_already_expired(self, mock_time, mock_sleep, mock_get): 

1030 """Test rate limit handling when reset time is in the past""" 

1031 # Mock current time 

1032 current_time = 1000000 

1033 mock_time.return_value = current_time 

1034 

1035 # Setup response with expired rate limit 

1036 rate_limited_response = MagicMock() 

1037 rate_limited_response.status_code = 403 

1038 rate_limited_response.headers = { 

1039 "X-RateLimit-Remaining": "0", 

1040 "X-RateLimit-Reset": str(current_time - 30), # Reset time in the past 

1041 } 

1042 

1043 success_response = MagicMock() 

1044 success_response.status_code = 200 

1045 success_response.json.return_value = [ 

1046 { 

1047 "title": "deposit Test Issue", 

1048 "body": "Test Body", 

1049 "number": 1, 

1050 "user": {"login": "test-user"}, 

1051 "created_at": "2024-01-01T00:00:00Z", 

1052 "html_url": "https://github.com/test/1", 

1053 "labels": [], 

1054 } 

1055 ] 

1056 

1057 # First call hits expired rate limit, second call succeeds 

1058 mock_get.side_effect = [rate_limited_response, success_response] 

1059 

1060 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}): 

1061 issues = get_open_issues() 

1062 

1063 # Verify rate limit handling 

1064 self.assertEqual(len(issues), 1) 

1065 self.assertEqual(issues[0]["title"], "deposit Test Issue") 

1066 

1067 # Verify sleep was NOT called since rate limit was already expired 

1068 mock_sleep.assert_not_called() 

1069 

1070 # Verify correct API calls 

1071 self.assertEqual(mock_get.call_count, 2) 

1072 

1073 

1074class TestAnswerFunction(unittest.TestCase): 

1075 """Test the answer function that updates GitHub issues""" 

1076 

1077 def setUp(self): 

1078 """Set up test environment before each test""" 

1079 self.base_url = "https://api.github.com/repos/test-org/test-repo/issues" 

1080 self.headers = { 

1081 "Accept": "application/vnd.github+json", 

1082 "Authorization": "Bearer fake-token", 

1083 "X-GitHub-Api-Version": "2022-11-28", 

1084 } 

1085 self.issue_number = "123" 

1086 

1087 # Setup environment variables 

1088 self.env_patcher = patch.dict( 

1089 "os.environ", 

1090 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"}, 

1091 ) 

1092 self.env_patcher.start() 

1093 

1094 def tearDown(self): 

1095 """Clean up after each test""" 

1096 self.env_patcher.stop() 

1097 

1098 @patch("requests.post") 

1099 @patch("requests.patch") 

1100 def test_answer_valid_authorized(self, mock_patch, mock_post): 

1101 """Test answering a valid issue from authorized user""" 

1102 # Setup mock responses 

1103 mock_post.return_value.status_code = 201 

1104 mock_patch.return_value.status_code = 200 

1105 

1106 # Call function 

1107 answer( 

1108 is_valid=True, 

1109 message="Thank you for your contribution!", 

1110 issue_number=self.issue_number, 

1111 is_authorized=True, 

1112 ) 

1113 

1114 # Verify label API call 

1115 mock_post.assert_any_call( 

1116 f"{self.base_url}/{self.issue_number}/labels", 

1117 headers=self.headers, 

1118 json={"labels": ["to be processed"]}, 

1119 timeout=30, 

1120 ) 

1121 

1122 # Verify comment API call 

1123 mock_post.assert_any_call( 

1124 f"{self.base_url}/{self.issue_number}/comments", 

1125 headers=self.headers, 

1126 json={"body": "Thank you for your contribution!"}, 

1127 timeout=30, 

1128 ) 

1129 

1130 # Verify issue closure API call 

1131 mock_patch.assert_called_once_with( 

1132 f"{self.base_url}/{self.issue_number}", 

1133 headers=self.headers, 

1134 json={"state": "closed"}, 

1135 timeout=30, 

1136 ) 

1137 

1138 @patch("requests.post") 

1139 @patch("requests.patch") 

1140 def test_answer_invalid_authorized(self, mock_patch, mock_post): 

1141 """Test answering an invalid issue from authorized user""" 

1142 answer( 

1143 is_valid=False, 

1144 message="Invalid format", 

1145 issue_number=self.issue_number, 

1146 is_authorized=True, 

1147 ) 

1148 

1149 # Verify correct label was used 

1150 mock_post.assert_any_call( 

1151 f"{self.base_url}/{self.issue_number}/labels", 

1152 headers=self.headers, 

1153 json={"labels": ["invalid"]}, 

1154 timeout=30, 

1155 ) 

1156 

1157 @patch("requests.post") 

1158 @patch("requests.patch") 

1159 def test_answer_unauthorized(self, mock_patch, mock_post): 

1160 """Test answering an issue from unauthorized user""" 

1161 answer( 

1162 is_valid=False, 

1163 message="Unauthorized user", 

1164 issue_number=self.issue_number, 

1165 is_authorized=False, 

1166 ) 

1167 

1168 # Verify correct label was used 

1169 mock_post.assert_any_call( 

1170 f"{self.base_url}/{self.issue_number}/labels", 

1171 headers=self.headers, 

1172 json={"labels": ["rejected"]}, 

1173 timeout=30, 

1174 ) 

1175 

1176 @patch("requests.post") 

1177 def test_answer_label_error(self, mock_post): 

1178 """Test handling of API error when adding label""" 

1179 mock_post.side_effect = RequestException("Network error") 

1180 

1181 with self.assertRaises(RequestException): 

1182 answer( 

1183 is_valid=True, 

1184 message="Test message", 

1185 issue_number=self.issue_number, 

1186 ) 

1187 

1188 @patch("requests.post") 

1189 @patch("requests.patch") 

1190 def test_answer_comment_error(self, mock_patch, mock_post): 

1191 """Test handling of API error when adding comment""" 

1192 # First post (label) succeeds, second post (comment) fails 

1193 mock_post.side_effect = [ 

1194 MagicMock(status_code=201), 

1195 RequestException("Network error"), 

1196 ] 

1197 

1198 with self.assertRaises(RequestException): 

1199 answer( 

1200 is_valid=True, 

1201 message="Test message", 

1202 issue_number=self.issue_number, 

1203 ) 

1204 

1205 @patch("requests.post") 

1206 @patch("requests.patch") 

1207 def test_answer_close_error(self, mock_patch, mock_post): 

1208 """Test handling of API error when closing issue""" 

1209 mock_post.return_value = MagicMock(status_code=201) 

1210 mock_patch.side_effect = RequestException("Network error") 

1211 

1212 with self.assertRaises(RequestException): 

1213 answer( 

1214 is_valid=True, 

1215 message="Test message", 

1216 issue_number=self.issue_number, 

1217 ) 

1218 

1219 

1220class TestZenodoDeposit(unittest.TestCase): 

1221 """Test Zenodo deposit functionality""" 

1222 

1223 def setUp(self): 

1224 """Set up test environment before each test""" 

1225 self.env_patcher = patch.dict( 

1226 "os.environ", 

1227 { 

1228 "ZENODO_SANDBOX": "fake-sandbox-token", 

1229 "ZENODO_PRODUCTION": "fake-prod-token", 

1230 "ENVIRONMENT": "development", 

1231 }, 

1232 ) 

1233 self.env_patcher.start() 

1234 

1235 self.test_data = [ 

1236 { 

1237 "data": { 

1238 "title": "test deposit", 

1239 "metadata": [{"id": "1", "title": "Test"}], 

1240 "citations": [{"citing": "1", "cited": "2"}], 

1241 }, 

1242 "provenance": { 

1243 "generatedAtTime": "2024-01-01T00:00:00Z", 

1244 "wasAttributedTo": 12345, 

1245 "hadPrimarySource": "https://github.com/test/1", 

1246 }, 

1247 } 

1248 ] 

1249 

1250 def tearDown(self): 

1251 """Clean up after each test""" 

1252 self.env_patcher.stop() 

1253 if os.path.exists("data_to_store.json"): 

1254 os.remove("data_to_store.json") 

1255 

1256 @patch("requests.post") 

1257 def test_create_deposition_resource(self, mock_post): 

1258 """Test creation of Zenodo deposition resource""" 

1259 mock_response = MagicMock() 

1260 mock_response.json.return_value = { 

1261 "id": "12345", 

1262 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"}, 

1263 } 

1264 mock_post.return_value = mock_response 

1265 

1266 deposition_id, bucket = _create_deposition_resource( 

1267 "2024-01-01", base_url="https://sandbox.zenodo.org/api" 

1268 ) 

1269 

1270 self.assertEqual(deposition_id, "12345") 

1271 self.assertEqual(bucket, "https://sandbox.zenodo.org/api/bucket/12345") 

1272 

1273 # Verify API call 

1274 mock_post.assert_called_once() 

1275 args, kwargs = mock_post.call_args 

1276 

1277 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"}) 

1278 self.assertEqual(kwargs["headers"], {"Content-Type": "application/json"}) 

1279 self.assertEqual(kwargs["timeout"], 30) 

1280 

1281 @patch("requests.put") 

1282 def test_upload_data(self, mock_put): 

1283 """Test uploading data file to Zenodo""" 

1284 mock_put.return_value.status_code = 200 

1285 mock_put.return_value.raise_for_status = lambda: None 

1286 

1287 # Create test file 

1288 with open("data_to_store.json", "w") as f: 

1289 json.dump({"test": "data"}, f) 

1290 

1291 _upload_data( 

1292 "2024-01-01", 

1293 "https://sandbox.zenodo.org/api/bucket/12345", 

1294 base_url="https://sandbox.zenodo.org/api", 

1295 ) 

1296 

1297 # Verify API call 

1298 mock_put.assert_called_once() 

1299 args, kwargs = mock_put.call_args 

1300 

1301 self.assertEqual( 

1302 args[0], 

1303 "https://sandbox.zenodo.org/api/bucket/12345/2024-01-01_weekly_deposit.json", 

1304 ) 

1305 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"}) 

1306 self.assertEqual(kwargs["timeout"], 30) 

1307 

1308 @patch("crowdsourcing.process_issues._create_deposition_resource") 

1309 @patch("crowdsourcing.process_issues._upload_data") 

1310 @patch("requests.post") 

1311 def test_deposit_on_zenodo(self, mock_post, mock_upload, mock_create): 

1312 """Test full Zenodo deposit process""" 

1313 # Setup mocks 

1314 mock_create.return_value = ( 

1315 "12345", 

1316 "https://sandbox.zenodo.org/api/bucket/12345", 

1317 ) 

1318 mock_post.return_value.status_code = 202 # Changed from 200 to 202 for publish 

1319 mock_post.return_value.text = "" # Add this to avoid MagicMock text in error 

1320 

1321 deposit_on_zenodo(self.test_data) 

1322 

1323 # Verify API calls order and parameters 

1324 mock_create.assert_called_once_with( 

1325 datetime.now().strftime("%Y-%m-%d"), 

1326 base_url="https://sandbox.zenodo.org/api", # Add base_url 

1327 ) 

1328 mock_upload.assert_called_once_with( 

1329 datetime.now().strftime("%Y-%m-%d"), 

1330 "https://sandbox.zenodo.org/api/bucket/12345", 

1331 base_url="https://sandbox.zenodo.org/api", # Add base_url 

1332 ) 

1333 

1334 # Verify publish request 

1335 mock_post.assert_called_once() 

1336 args, kwargs = mock_post.call_args 

1337 self.assertEqual( 

1338 args[0], 

1339 "https://sandbox.zenodo.org/api/deposit/depositions/12345/actions/publish", 

1340 ) 

1341 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"}) 

1342 self.assertEqual(kwargs["timeout"], 30) 

1343 

1344 # Verify cleanup happened 

1345 self.assertFalse(os.path.exists("data_to_store.json")) 

1346 

1347 @patch("requests.post") 

1348 def test_create_deposition_resource_error(self, mock_post): 

1349 """Test error handling in deposition creation""" 

1350 mock_post.side_effect = requests.RequestException("API Error") 

1351 

1352 with self.assertRaises(requests.RequestException): 

1353 _create_deposition_resource("2024-01-01") 

1354 

1355 @patch("requests.put") 

1356 def test_upload_data_error(self, mock_put): 

1357 """Test error handling in data upload""" 

1358 mock_put.side_effect = requests.RequestException("Upload Error") 

1359 

1360 with open("data_to_store.json", "w") as f: 

1361 json.dump({"test": "data"}, f) 

1362 

1363 with self.assertRaises(requests.RequestException): 

1364 _upload_data("2024-01-01", "https://zenodo.org/api/bucket/12345") 

1365 

1366 @patch("crowdsourcing.process_issues._create_deposition_resource") 

1367 def test_deposit_on_zenodo_create_error(self, mock_create): 

1368 """Test error handling in full deposit process - creation error""" 

1369 mock_create.side_effect = requests.RequestException("Creation Error") 

1370 

1371 with self.assertRaises(requests.RequestException): 

1372 deposit_on_zenodo(self.test_data) 

1373 

1374 # Verify cleanup happened 

1375 self.assertFalse(os.path.exists("data_to_store.json")) 

1376 

1377 def test_deposit_development_environment(self): 

1378 """Test deposit in development environment uses sandbox""" 

1379 with patch("requests.post") as mock_post, patch("requests.put") as mock_put: 

1380 # Mock create deposition 

1381 mock_post.return_value.json.return_value = { 

1382 "id": "12345", 

1383 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"}, 

1384 } 

1385 mock_post.return_value.status_code = 201 

1386 

1387 # Mock upload 

1388 mock_put.return_value.status_code = 200 

1389 mock_put.return_value.raise_for_status = lambda: None 

1390 

1391 # Mock publish 

1392 mock_post.return_value.status_code = 202 

1393 

1394 deposit_on_zenodo(self.test_data) 

1395 

1396 # Verify sandbox URL was used 

1397 calls = mock_post.call_args_list 

1398 self.assertTrue(any("sandbox.zenodo.org" in call[0][0] for call in calls)) 

1399 

1400 def test_deposit_production_environment(self): 

1401 """Test deposit in production environment uses main Zenodo""" 

1402 with patch.dict("os.environ", {"ENVIRONMENT": "production"}): 

1403 with patch("requests.post") as mock_post, patch("requests.put") as mock_put: 

1404 # Mock create deposition 

1405 mock_post.return_value.json.return_value = { 

1406 "id": "12345", 

1407 "links": {"bucket": "https://zenodo.org/api/bucket/12345"}, 

1408 } 

1409 mock_post.return_value.status_code = 201 

1410 

1411 # Mock upload 

1412 mock_put.return_value.status_code = 200 

1413 mock_put.return_value.raise_for_status = lambda: None 

1414 

1415 # Mock publish 

1416 mock_post.return_value.status_code = 202 

1417 

1418 deposit_on_zenodo(self.test_data) 

1419 

1420 # Verify production URL was used 

1421 calls = mock_post.call_args_list 

1422 self.assertTrue( 

1423 all("sandbox.zenodo.org" not in call[0][0] for call in calls) 

1424 ) 

1425 

1426 def test_get_zenodo_token_development(self): 

1427 """Test getting Zenodo token in development environment""" 

1428 token = _get_zenodo_token() 

1429 self.assertEqual(token, "fake-sandbox-token") 

1430 

1431 def test_get_zenodo_token_production(self): 

1432 """Test getting Zenodo token in production environment""" 

1433 with patch.dict("os.environ", {"ENVIRONMENT": "production"}): 

1434 token = _get_zenodo_token() 

1435 self.assertEqual(token, "fake-prod-token") 

1436 

1437 def test_get_zenodo_token_missing(self): 

1438 """Test error when token is missing""" 

1439 with patch.dict( 

1440 "os.environ", {"ZENODO_SANDBOX": "", "ENVIRONMENT": "development"} 

1441 ): 

1442 with self.assertRaises(ValueError) as context: 

1443 _get_zenodo_token() 

1444 self.assertIn("ZENODO_SANDBOX token not found", str(context.exception)) 

1445 

1446 def test_get_zenodo_token_missing_production(self): 

1447 """Test error when production token is missing""" 

1448 with patch.dict( 

1449 "os.environ", 

1450 { 

1451 "ENVIRONMENT": "production", 

1452 "ZENODO_PRODUCTION": "", # Token mancante 

1453 }, 

1454 ): 

1455 with self.assertRaises(ValueError) as context: 

1456 _get_zenodo_token() 

1457 self.assertIn("ZENODO_PRODUCTION token not found", str(context.exception)) 

1458 

1459 @patch("crowdsourcing.process_issues._create_deposition_resource") 

1460 @patch("crowdsourcing.process_issues._upload_data") 

1461 @patch("requests.post") 

1462 def test_deposit_on_zenodo_publish_error(self, mock_post, mock_upload, mock_create): 

1463 """Test error handling when publish fails""" 

1464 # Setup mocks 

1465 mock_create.return_value = ( 

1466 "12345", 

1467 "https://sandbox.zenodo.org/api/bucket/12345", 

1468 ) 

1469 mock_post.return_value.status_code = 400 # Simula errore di pubblicazione 

1470 mock_post.return_value.text = "Publication failed" 

1471 

1472 with self.assertRaises(Exception) as context: 

1473 deposit_on_zenodo(self.test_data) 

1474 

1475 self.assertEqual( 

1476 str(context.exception), "Failed to publish deposition: Publication failed" 

1477 ) 

1478 

1479 # Verify cleanup happened even after error 

1480 self.assertFalse(os.path.exists("data_to_store.json")) 

1481 

1482 

1483class TestProcessOpenIssues(unittest.TestCase): 

1484 """Test the main process_open_issues function""" 

1485 

1486 def setUp(self): 

1487 """Set up test environment""" 

1488 self.env_patcher = patch.dict( 

1489 "os.environ", {"GH_TOKEN": "fake-gh-token", "ZENODO": "fake-zenodo-token"} 

1490 ) 

1491 self.env_patcher.start() 

1492 

1493 # Sample issue data with properly formatted CSV and valid DOI 

1494 self.sample_issue = { 

1495 "title": "deposit journal.com doi:10.1007/s42835-022-01029-y", 

1496 "body": """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor" 

1497"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher","" 

1498"doi:10.1007/978-3-030-00668-6_8","Cited Paper","Another Author","2024","Another Journal","2","2","20-30","journal article","Test Publisher","" 

1499===###===@@@=== 

1500"citing_id","cited_id" 

1501"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\"""", 

1502 "number": "1", 

1503 "author": {"login": "test-user"}, 

1504 "createdAt": "2024-01-01T00:00:00Z", 

1505 "url": "https://github.com/test/1", 

1506 } 

1507 

1508 def tearDown(self): 

1509 """Clean up after each test""" 

1510 self.env_patcher.stop() 

1511 

1512 @patch("crowdsourcing.process_issues.get_open_issues") 

1513 @patch("crowdsourcing.process_issues.get_user_id") 

1514 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1515 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1516 @patch("crowdsourcing.process_issues.answer") 

1517 def test_process_valid_authorized_issue( 

1518 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues 

1519 ): 

1520 """Test processing a valid issue from authorized user""" 

1521 # Setup mocks 

1522 mock_get_issues.return_value = [self.sample_issue] 

1523 mock_user_id.return_value = 12345 

1524 mock_safe_list.return_value = True 

1525 

1526 # Run function 

1527 process_open_issues() 

1528 

1529 # Verify user validation 

1530 mock_user_id.assert_called_once_with("test-user") 

1531 mock_safe_list.assert_called_once_with(12345) 

1532 

1533 # Verify issue was processed 

1534 mock_answer.assert_called_once() 

1535 args, kwargs = mock_answer.call_args 

1536 self.assertTrue(args[0]) # is_valid 

1537 self.assertIn("Thank you", args[1]) # message 

1538 self.assertEqual(args[2], "1") # issue_number 

1539 self.assertTrue(kwargs["is_authorized"]) 

1540 

1541 # Verify data was deposited 

1542 mock_deposit.assert_called_once() 

1543 args, kwargs = mock_deposit.call_args 

1544 deposited_data = args[0][0] 

1545 self.assertEqual(deposited_data["data"]["title"], self.sample_issue["title"]) 

1546 self.assertEqual( 

1547 deposited_data["provenance"]["wasAttributedTo"], 

1548 f"https://api.github.com/user/{12345}", 

1549 ) 

1550 

1551 @patch("crowdsourcing.process_issues.get_open_issues") 

1552 @patch("crowdsourcing.process_issues.get_user_id") 

1553 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1554 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1555 @patch("crowdsourcing.process_issues.answer") 

1556 def test_process_unauthorized_user( 

1557 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues 

1558 ): 

1559 """Test processing an issue from unauthorized user""" 

1560 # Setup mocks 

1561 mock_get_issues.return_value = [self.sample_issue] 

1562 mock_user_id.return_value = 12345 

1563 mock_safe_list.return_value = False 

1564 

1565 # Run function 

1566 process_open_issues() 

1567 

1568 # Verify user was checked but not authorized 

1569 mock_user_id.assert_called_once_with("test-user") 

1570 mock_safe_list.assert_called_once_with(12345) 

1571 

1572 # Verify appropriate response 

1573 mock_answer.assert_called_once() 

1574 args, kwargs = mock_answer.call_args 

1575 self.assertFalse(args[0]) # is_valid 

1576 self.assertIn("register as a trusted user", args[1]) # message 

1577 self.assertEqual(args[2], "1") # issue_number 

1578 self.assertFalse(kwargs["is_authorized"]) 

1579 

1580 # Verify no deposit was made 

1581 mock_deposit.assert_not_called() 

1582 

1583 @patch("crowdsourcing.process_issues.get_open_issues") 

1584 @patch("crowdsourcing.process_issues.get_user_id") 

1585 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1586 @patch("crowdsourcing.process_issues.validate") 

1587 @patch("crowdsourcing.process_issues.get_data_to_store") 

1588 @patch("crowdsourcing.process_issues.answer") 

1589 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1590 def test_process_open_issues_data_processing_error( 

1591 self, 

1592 mock_deposit, 

1593 mock_answer, 

1594 mock_get_data, 

1595 mock_validate, 

1596 mock_safe_list, 

1597 mock_user_id, 

1598 mock_get_issues, 

1599 ): 

1600 """Test handling of get_data_to_store error for an issue""" 

1601 # Setup mocks 

1602 mock_get_issues.return_value = [self.sample_issue] 

1603 mock_user_id.return_value = 12345 

1604 mock_safe_list.return_value = True 

1605 mock_validate.return_value = (True, "Valid data") 

1606 mock_get_data.side_effect = Exception("Data processing error") 

1607 

1608 # Run function 

1609 process_open_issues() 

1610 

1611 # Verify error was handled and processing continued 

1612 mock_get_data.assert_called_once() 

1613 mock_answer.assert_called_once() 

1614 # Verify deposit wasn't attempted since no valid data was processed 

1615 mock_deposit.assert_not_called() 

1616 

1617 @patch("crowdsourcing.process_issues.get_open_issues") 

1618 @patch("crowdsourcing.process_issues.get_user_id") 

1619 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1620 @patch("crowdsourcing.process_issues.validate") 

1621 @patch("crowdsourcing.process_issues.get_data_to_store") 

1622 @patch("crowdsourcing.process_issues.answer") 

1623 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1624 def test_process_open_issues_zenodo_deposit_error( 

1625 self, 

1626 mock_deposit, 

1627 mock_answer, 

1628 mock_get_data, 

1629 mock_validate, 

1630 mock_safe_list, 

1631 mock_user_id, 

1632 mock_get_issues, 

1633 ): 

1634 """Test handling of Zenodo deposit error""" 

1635 # Setup mocks 

1636 mock_get_issues.return_value = [self.sample_issue] 

1637 mock_user_id.return_value = 12345 

1638 mock_safe_list.return_value = True 

1639 mock_validate.return_value = (True, "Valid data") 

1640 mock_get_data.return_value = {"test": "data"} 

1641 mock_deposit.side_effect = Exception("Zenodo deposit error") 

1642 

1643 # Verify the Zenodo deposit error is re-raised 

1644 with self.assertRaises(Exception) as context: 

1645 process_open_issues() 

1646 

1647 self.assertEqual(str(context.exception), "Zenodo deposit error") 

1648 mock_deposit.assert_called_once() 

1649 

1650 @patch("crowdsourcing.process_issues.get_open_issues") 

1651 @patch("crowdsourcing.process_issues.get_user_id") 

1652 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1653 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1654 @patch("crowdsourcing.process_issues.answer") 

1655 def test_process_localhost_issue_skips_zenodo( 

1656 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues 

1657 ): 

1658 """Test that issues with localhost domain are not deposited to Zenodo""" 

1659 localhost_issue = self.sample_issue.copy() 

1660 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y" 

1661 

1662 mock_get_issues.return_value = [localhost_issue] 

1663 mock_user_id.return_value = 12345 

1664 mock_safe_list.return_value = True 

1665 

1666 process_open_issues() 

1667 

1668 mock_answer.assert_called_once() 

1669 args, kwargs = mock_answer.call_args 

1670 self.assertTrue(args[0]) # is_valid 

1671 self.assertIn("Test deposit validated successfully", args[1]) # message indicates test 

1672 self.assertEqual(args[2], "1") # issue_number 

1673 self.assertTrue(kwargs["is_test"]) # is_test flag is True 

1674 

1675 # Verify Zenodo deposit was NOT called 

1676 mock_deposit.assert_not_called() 

1677 

1678 @patch("crowdsourcing.process_issues.get_open_issues") 

1679 @patch("crowdsourcing.process_issues.get_user_id") 

1680 @patch("crowdsourcing.process_issues.is_in_safe_list") 

1681 @patch("crowdsourcing.process_issues.deposit_on_zenodo") 

1682 @patch("crowdsourcing.process_issues.answer") 

1683 def test_process_mixed_localhost_and_production_issues( 

1684 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues 

1685 ): 

1686 """Test that only production issues are deposited when mixing localhost and production""" 

1687 localhost_issue = self.sample_issue.copy() 

1688 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y" 

1689 localhost_issue["number"] = "1" 

1690 

1691 production_issue = self.sample_issue.copy() 

1692 production_issue["title"] = "deposit journal.com doi:10.1007/s42835-022-01029-y" 

1693 production_issue["number"] = "2" 

1694 

1695 mock_get_issues.return_value = [localhost_issue, production_issue] 

1696 mock_user_id.return_value = 12345 

1697 mock_safe_list.return_value = True 

1698 

1699 process_open_issues() 

1700 

1701 # Verify both issues were validated and answered 

1702 self.assertEqual(mock_answer.call_count, 2) 

1703 

1704 # Verify Zenodo deposit was called only once with production data 

1705 mock_deposit.assert_called_once() 

1706 deposited_data = mock_deposit.call_args[0][0] 

1707 self.assertEqual(len(deposited_data), 1) 

1708 self.assertEqual(deposited_data[0]["data"]["domain"], "journal.com") 

1709 

1710 

1711if __name__ == "__main__": # pragma: no cover 

1712 unittest.main()