Coverage for test/test_process

1#!/usr/bin/python

3# SPDX-FileCopyrightText: 2022-2025 Arcangelo Massari <arcangelo.massari@unibo.it>

5# SPDX-License-Identifier: ISC

7import json

8import os

9import shutil

10import unittest

11from datetime import datetime

12from unittest.mock import MagicMock, patch

14import requests

15import yaml

16from dotenv import load_dotenv

17from crowdsourcing.process_issues import (

18 _create_deposition_resource,

19 _get_zenodo_token,

20 _upload_data,

21 _validate_title,

22 answer,

23 deposit_on_zenodo,

24 get_data_to_store,

25 get_open_issues,

26 get_user_id,

27 is_in_safe_list,

28 process_open_issues,

29 validate,

30)

31from requests.exceptions import RequestException

33load_dotenv() # Carica le variabili dal file .env

36class TestTitleValidation(unittest.TestCase):

37 def test_valid_doi_title(self):

38 """Test that a valid DOI title is accepted"""

39 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

40 is_valid, message = _validate_title(title)

41 self.assertTrue(is_valid)

42 self.assertEqual(message, "")

44 def test_valid_isbn_title(self):

45 """Test that a valid ISBN title is accepted"""

46 title = "deposit publisher.com isbn:9780134093413"

47 is_valid, message = _validate_title(title)

48 self.assertTrue(is_valid)

49 self.assertEqual(message, "")

51 def test_missing_deposit_keyword(self):

52 """Test that title without 'deposit' keyword is rejected"""

53 title = "submit journal.com doi:10.1007/s42835-022-01029-y"

54 is_valid, message = _validate_title(title)

55 self.assertFalse(is_valid)

56 self.assertIn("title of the issue was not structured correctly", message)

58 def test_unsupported_identifier(self):

59 """Test that unsupported identifier types are rejected"""

60 title = "deposit journal.com arxiv:2203.01234"

61 is_valid, message = _validate_title(title)

62 self.assertFalse(is_valid)

63 self.assertEqual(message, "The identifier schema 'arxiv' is not supported")

65 def test_invalid_doi(self):

66 """Test that invalid DOI format is rejected"""

67 title = "deposit journal.com doi:invalid-doi-format"

68 is_valid, message = _validate_title(title)

69 self.assertFalse(is_valid)

70 self.assertIn("is not a valid DOI", message)

72 def test_malformed_title(self):

73 """Test that malformed title structure is rejected"""

74 title = "deposit doi:10.1007/s42835-022-01029-y" # missing domain

75 is_valid, message = _validate_title(title)

76 self.assertFalse(is_valid)

77 self.assertIn("title of the issue was not structured correctly", message)

79 def test_unsupported_schema(self):

80 """Test that an unsupported identifier schema returns appropriate error"""

81 title = "deposit journal.com issn:1234-5678" # issn is not in supported schemas

82 is_valid, message = _validate_title(title)

83 self.assertFalse(is_valid)

84 print("message", message)

85 self.assertEqual(message, "The identifier schema 'issn' is not supported")

87 def test_valid_temp_id_title(self):

88 """Test that a valid temporary ID title is accepted"""

89 title = "deposit journal.com temp:12345"

90 is_valid, message = _validate_title(title)

91 self.assertTrue(is_valid)

92 self.assertEqual(message, "")

94 def test_valid_local_id_title(self):

95 """Test that a valid local ID title is accepted"""

96 title = "deposit journal.com local:record123"

97 is_valid, message = _validate_title(title)

98 self.assertTrue(is_valid)

99 self.assertEqual(message, "")

100

101 def test_invalid_temp_id_format(self):

102 """Test that invalid temporary ID format is rejected"""

103 title = "deposit journal.com temp12345" # Missing colon

104 is_valid, message = _validate_title(title)

105 self.assertFalse(is_valid)

106 self.assertIn("title of the issue was not structured correctly", message)

107

108 def test_invalid_local_id_format(self):

109 """Test that invalid local ID format is rejected"""

110 title = "deposit journal.com local.record123" # Wrong separator

111 is_valid, message = _validate_title(title)

112 self.assertFalse(is_valid)

113 self.assertIn("title of the issue was not structured correctly", message)

114

115

116class TestValidation(unittest.TestCase):

117 def setUp(self):

118 """Set up test environment before each test"""

119 # Create temporary test directory

120 self.test_dir = os.path.join(os.path.dirname(__file__), "temp_test_dir")

121 self.validation_output = os.path.join(self.test_dir, "validation_output")

122 self.validation_reports = os.path.join(self.test_dir, "validation_reports")

123

124 os.makedirs(self.validation_output, exist_ok=True)

125 os.makedirs(self.validation_reports, exist_ok=True)

126

127 # Setup environment variables

128 self.env_patcher = patch.dict(

129 "os.environ",

130 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},

131 )

132 self.env_patcher.start()

133

134 def tearDown(self):

135 """Clean up after each test"""

136 # Stop environment patcher

137 self.env_patcher.stop()

138

139 # Clean up test directory

140 if os.path.exists(self.test_dir):

141 shutil.rmtree(self.test_dir)

142

143 def test_valid_issue(self):

144 """Test that a valid issue with correct title and CSV data is accepted"""

145 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

146 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

147"doi:10.1007/s42835-022-01029-y","A Study on Electric Properties","Smith, John","2024","Journal of Physics","5","2","100-120","journal article","Test Publisher",""

148"doi:10.1007/978-3-662-07918-8_3","Influence of Dielectric Properties, State, and Electrodes on Electric Strength","Ushakov, Vasily Y.","2004","Insulation of High-Voltage Equipment [isbn:9783642058530 isbn:9783662079188]","","","27-82","book chapter","Springer Science and Business Media LLC [crossref:297]",""

149"doi:10.1016/0021-9991(73)90147-2","Flux-corrected transport. I. SHASTA, a fluid transport algorithm that works","Boris, Jay P; Book, David L","1973-01","Journal of Computational Physics [issn:0021-9991]","11","1","38-69","journal article","Elsevier BV [crossref:78]",""

150===###===@@@===

151"citing_id","cited_id"

152"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3"

153"doi:10.1007/s42835-022-01029-y","doi:10.1016/0021-9991(73)90147-2\""""

154 is_valid, message = validate(

155 title,

156 body,

157 "123",

158 validation_output_dir=self.validation_output,

159 validation_reports_dir=self.validation_reports,

160 )

161 self.assertTrue(is_valid)

162 self.assertIn("Thank you for your contribution", message)

163

164 def test_invalid_separator(self):

165 """Test that issue with incorrect separator is rejected"""

166 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

167 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

168"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher",""

169WRONG_SEPARATOR

170"citing_id","cited_id"

171"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\""""

172 is_valid, message = validate(

173 title,

174 body,

175 "124",

176 validation_output_dir=self.validation_output,

177 validation_reports_dir=self.validation_reports,

178 )

179 self.assertFalse(is_valid)

180 self.assertIn("Please use the separator", message)

181

182 def test_invalid_title_valid_body(self):

183 """Test that issue with invalid title but valid body is rejected"""

184 title = "invalid title format"

185 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

186"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher",""

187===###===@@@===

188"citing_id","cited_id"

189"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\""""

190 is_valid, message = validate(

191 title,

192 body,

193 "125",

194 validation_output_dir=self.validation_output,

195 validation_reports_dir=self.validation_reports,

196 )

197 self.assertFalse(is_valid)

198 self.assertIn("title of the issue was not structured correctly", message)

199

200 def test_invalid_csv_structure(self):

201 """Test that CSV with wrong column structure returns appropriate error"""

202 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

203 body = """"wrong","column","headers"

204"data1","data2","data3"

205===###===@@@===

206"wrong","citation","headers"

207"cite1","cite2","cite3"\""""

208 is_valid, message = validate(

209 title,

210 body,

211 "126",

212 validation_output_dir=self.validation_output,

213 validation_reports_dir=self.validation_reports,

214 )

215 self.assertFalse(is_valid)

216 self.assertIn(

217 "Please ensure both metadata and citations are valid CSVs following the required format.",

218 message,

219 )

220

221 def test_get_data_to_store_valid_input(self):

222 """Test get_data_to_store with valid input data"""

223 title = "deposit journal.com doi:10.1234/test"

224 body = """"id","title"

225"1","Test Title"

226===###===@@@===

227"citing","cited"

228"id1","id2"\""""

229 created_at = "2024-01-01T00:00:00Z"

230 had_primary_source = "https://github.com/test/1"

231 user_id = 12345

232

233 result = get_data_to_store(title, body, created_at, had_primary_source, user_id)

234

235 self.assertEqual(result["data"]["title"], title)

236 self.assertEqual(result["data"]["domain"], "journal.com")

237 self.assertEqual(len(result["data"]["metadata"]), 1)

238 self.assertEqual(len(result["data"]["citations"]), 1)

239 self.assertEqual(result["provenance"]["generatedAtTime"], created_at)

240 self.assertEqual(

241 result["provenance"]["wasAttributedTo"],

242 f"https://api.github.com/user/{user_id}",

243 )

244 self.assertEqual(result["provenance"]["hadPrimarySource"], had_primary_source)

245

246 def test_get_data_to_store_invalid_csv(self):

247 """Test get_data_to_store with invalid CSV format"""

248 title = "deposit journal.com doi:10.1234/test"

249 # CSV con una sola sezione (manca il separatore)

250 body = """"id","title"

251"1","Test Title"\""""

252

253 with self.assertRaises(ValueError) as context:

254 get_data_to_store(

255 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345

256 )

257

258 # Verifichiamo che l'errore contenga il messaggio corretto

259 self.assertIn("Failed to process issue data", str(context.exception))

260

261 def test_get_data_to_store_empty_sections(self):

262 """Test get_data_to_store with empty metadata or citations sections"""

263 title = "deposit journal.com doi:10.1234/test"

264 body = """"id","title"

265===###===@@@===

266"citing","cited"\""""

267

268 with self.assertRaises(ValueError) as context:

269 get_data_to_store(

270 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345

271 )

272

273 self.assertIn("Empty metadata or citations section", str(context.exception))

274

275 def test_get_data_to_store_invalid_separator(self):

276 """Test get_data_to_store with invalid separator in body"""

277 title = "deposit journal.com doi:10.1234/test"

278 body = """"id","title"

279INVALID_SEPARATOR

280"citing","cited"\""""

281

282 with self.assertRaises(ValueError) as context:

283 get_data_to_store(

284 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345

285 )

286

287 self.assertIn("Failed to process issue data", str(context.exception))

288

289 @patch("crowdsourcing.process_issues.get_open_issues")

290 @patch("crowdsourcing.process_issues.get_user_id")

291 @patch("crowdsourcing.process_issues.is_in_safe_list")

292 @patch("crowdsourcing.process_issues.validate")

293 @patch("crowdsourcing.process_issues.get_data_to_store")

294 @patch("crowdsourcing.process_issues.answer")

295 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

296 @patch("crowdsourcing.process_issues.archive_manager")

297 def test_validation_with_validator(self, mock_archive_manager, *args):

298 """Test validation using the oc_validator library"""

299 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

300 # CSV con errori di validazione intenzionali

301 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

302"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""

303===###===@@@===

304"citing_id","cited_id"

305"doi:10.1007/s42835-022-01029-y","invalid_doi\""""

306

307 # Run validation

308 is_valid, message = validate(

309 title,

310 body,

311 "127",

312 validation_output_dir=self.validation_output,

313 validation_reports_dir=self.validation_reports,

314 )

315

316 # Verify validation failed

317 self.assertFalse(is_valid)

318 self.assertIn("Validation errors found in", message)

319 self.assertIn("metadata and citations", message)

320

321 # Verify final report was generated in validation_reports

322 report_files = os.listdir(self.validation_reports)

323 self.assertTrue(

324 any(

325 f.startswith("validation_") and f.endswith(".html")

326 for f in report_files

327 )

328 )

329

330 # Verify archive_manager.add_report was called

331 mock_archive_manager.add_report.assert_called_once()

332

333 def test_validation_with_metadata_validation_file(self):

334 """Test validation when metadata validation file contains errors"""

335 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

336 # Invalid metadata CSV with missing required fields

337 body = """"wrong_field","another_wrong"

338"value1","value2"

339===###===@@@===

340"citing_id","cited_id"

341"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\""""

342

343 is_valid, message = validate(

344 title,

345 body,

346 "128",

347 validation_output_dir=self.validation_output,

348 validation_reports_dir=self.validation_reports,

349 )

350

351 self.assertFalse(is_valid)

352 self.assertIn(

353 "Please ensure both metadata and citations are valid CSVs", message

354 )

355 self.assertIn("check our guide", message)

356

357 def test_validation_with_both_validation_files(self):

358 """Test validation when both metadata and citations have validation errors"""

359 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

360 # Invalid metadata missing fields and invalid citation identifiers

361 body = """"id","title"

362"doi:invalid","Test Title"

363===###===@@@===

364"citing_id","cited_id"

365"invalid:123","another:456"\""""

366

367 is_valid, message = validate(

368 title,

369 body,

370 "129",

371 validation_output_dir=self.validation_output,

372 validation_reports_dir=self.validation_reports,

373 )

374

375 self.assertFalse(is_valid)

376 self.assertIn(

377 "Please ensure both metadata and citations are valid CSVs", message

378 )

379 self.assertIn("check our guide", message)

380

381 @patch("crowdsourcing.process_issues.archive_manager")

382 def test_validation_reads_validation_files(self, mock_archive_manager):

383 """Test that validation properly reads and processes validation files"""

384 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

385 # CSV con errori di validazione intenzionali

386 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

387"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""

388"doi:10.1162/qss_a_00292","","","","","","","","journal article","",""

389===###===@@@===

390"citing_id","cited_id"

391"doi:10.1007/s42835-022-01029-y","invalid_doi"

392"doi:10.1162/qss_a_00292","doi:10.1007/s42835-022-01029-y"\""""

393

394 # Run validation

395 is_valid, message = validate(

396 title,

397 body,

398 "130",

399 validation_output_dir=self.validation_output,

400 validation_reports_dir=self.validation_reports,

401 )

402

403 # Verify validation failed

404 self.assertFalse(is_valid)

405 self.assertIn("Validation errors found in metadata and citations", message)

406 self.assertIn("Please check the detailed validation report:", message)

407 self.assertIn(

408 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_",

409 message,

410 )

411 self.assertIn(".html", message)

412

413 # Verify final report was generated in validation_reports

414 report_files = os.listdir(self.validation_reports)

415 self.assertTrue(

416 any(

417 f.startswith("validation_") and f.endswith(".html")

418 for f in report_files

419 )

420 )

421

422 # Verify archive_manager.add_report was called

423 mock_archive_manager.add_report.assert_called_once()

424

425 @patch("crowdsourcing.process_issues.archive_manager")

426 def test_validation_html_report_generation(self, mock_archive_manager):

427 """Test that HTML validation reports are properly generated when validation fails"""

428 # Clean up any existing directories from previous tests

429 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

430 # Invalid data that will fail validation

431 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

432"INVALID_DOI","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

433===###===@@@===

434"citing_id","cited_id"

435"INVALID_DOI","doi:10.1007/978-3-030-00668-6_8\""""

436

437 # Run validation

438 is_valid, message = validate(

439 title,

440 body,

441 "131",

442 validation_output_dir=self.validation_output,

443 validation_reports_dir=self.validation_reports,

444 )

445

446 # Verify validation failed

447 self.assertFalse(is_valid)

448

449 # Check that merged report exists in validation_reports

450 report_files = os.listdir(self.validation_reports)

451 self.assertTrue(

452 any(

453 f.startswith("validation_") and f.endswith(".html")

454 for f in report_files

455 )

456 )

457

458 # Verify report URL is in the error message

459 self.assertIn("Please check the detailed validation report:", message)

460 self.assertIn(

461 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_",

462 message,

463 )

464 self.assertIn(".html", message)

465

466 @patch("crowdsourcing.process_issues.archive_manager")

467 def test_validation_html_report_generation_only_metadata_errors(

468 self, mock_archive_manager

469 ):

470 """Test HTML report generation when only metadata has validation errors"""

471 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

472 # CSV with invalid metadata but valid citations

473 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

474"doi:10.1007/s42835-022-01029-y","Test Title","","","","","","","invalid_type","",""

475"doi:10.1162/qss_a_00292","Test Title","","","","","","","journal article","",""

476===###===@@@===

477"citing_id","cited_id"

478"doi:10.1007/s42835-022-01029-y","doi:10.1162/qss_a_00292\""""

479

480 # Run validation

481 is_valid, message = validate(

482 title,

483 body,

484 "132",

485 validation_output_dir=self.validation_output,

486 validation_reports_dir=self.validation_reports,

487 )

488

489 # Verify validation failed

490 self.assertFalse(is_valid)

491

492 # Check that final report exists

493 report_files = [

494 f for f in os.listdir(self.validation_reports) if f.endswith(".html")

495 ]

496 self.assertEqual(len(report_files), 1, "Should be exactly one final report")

497 final_report = report_files[0]

498 self.assertTrue(final_report.startswith("validation_"))

499

500 # Verify archive_manager.add_report was called with correct parameters

501 mock_archive_manager.add_report.assert_called_once_with(

502 final_report,

503 f"https://test-org.github.io/test-repo/validation_reports/{final_report}",

504 )

505

506 @patch("crowdsourcing.process_issues.archive_manager")

507 def test_validation_html_report_generation_only_citations_errors(

508 self, mock_archive_manager

509 ):

510 """Test HTML report generation when only citations have validation errors"""

511 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

512 # CSV with valid metadata but invalid citations

513 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

514"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

515===###===@@@===

516"citing_id","cited_id"

517"INVALID_DOI","ANOTHER_INVALID_DOI"\""""

518

519 # Run validation

520 is_valid, message = validate(

521 title,

522 body,

523 "133",

524 validation_output_dir=self.validation_output,

525 validation_reports_dir=self.validation_reports,

526 )

527

528 # Verify validation failed

529 self.assertFalse(is_valid)

530

531 # Check that final report exists

532 report_files = [

533 f for f in os.listdir(self.validation_reports) if f.endswith(".html")

534 ]

535 self.assertEqual(len(report_files), 1, "Should be exactly one final report")

536 final_report = report_files[0]

537 self.assertTrue(final_report.startswith("validation_"))

538

539 # Verify archive_manager.add_report was called with correct parameters

540 mock_archive_manager.add_report.assert_called_once_with(

541 final_report,

542 f"https://test-org.github.io/test-repo/validation_reports/{final_report}",

543 )

544

545 def test_validate_empty_body(self):

546 """Test validate() with empty body content"""

547 title = "deposit journal.com doi:10.1162/qss_a_00292"

548 body = None

549

550 is_valid, message = validate(

551 title,

552 body,

553 "134",

554 validation_output_dir=self.validation_output,

555 validation_reports_dir=self.validation_reports,

556 )

557

558 self.assertFalse(is_valid)

559 self.assertIn("The issue body cannot be empty", message)

560 self.assertIn(

561 "https://github.com/opencitations/crowdsourcing/blob/main/README.md",

562 message,

563 )

564

565 def test_validate_empty_string_body(self):

566 """Test validate() with empty string body content"""

567 title = "deposit journal.com doi:10.1162/qss_a_00292"

568 body = ""

569

570 is_valid, message = validate(

571 title,

572 body,

573 "135",

574 validation_output_dir=self.validation_output,

575 validation_reports_dir=self.validation_reports,

576 )

577

578 self.assertFalse(is_valid)

579 self.assertIn("The issue body cannot be empty", message)

580 self.assertIn(

581 "https://github.com/opencitations/crowdsourcing/blob/main/README.md",

582 message,

583 )

584

585 @patch("crowdsourcing.process_issues.archive_manager")

586 def test_validation_report_issue_number(self, mock_archive_manager):

587 """Test that validation report filename contains correct issue number"""

588 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

589 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

590"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""

591===###===@@@===

592"citing_id","cited_id"

593"doi:10.1007/s42835-022-01029-y","invalid_doi"\""""

594

595 test_issue_number = "42"

596

597 # Run validation

598 is_valid, message = validate(

599 title,

600 body,

601 test_issue_number,

602 validation_output_dir=self.validation_output,

603 validation_reports_dir=self.validation_reports,

604 )

605

606 # Verify validation failed and generated a report

607 self.assertFalse(is_valid)

608

609 # Check that the report file exists with correct issue number

610 report_files = os.listdir(self.validation_reports)

611 matching_files = [

612 f

613 for f in report_files

614 if f.startswith(f"validation_issue_{test_issue_number}")

615 ]

616 self.assertEqual(

617 len(matching_files), 1, "Should find exactly one matching report file"

618 )

619 self.assertTrue(

620 matching_files[0].endswith(".html"), "Report file should be HTML"

621 )

622 self.assertEqual(

623 matching_files[0], f"validation_issue_{test_issue_number}.html"

624 )

625

626 def test_valid_temp_ids_in_csv(self):

627 """Test that CSV data with temporary IDs is accepted"""

628 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

629 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

630"temp:123","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

631"temp:456","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""

632===###===@@@===

633"citing_id","cited_id"

634"temp:123","temp:456\""""

635 is_valid, message = validate(

636 title,

637 body,

638 "136",

639 validation_output_dir=self.validation_output,

640 validation_reports_dir=self.validation_reports,

641 )

642 self.assertTrue(is_valid)

643 self.assertIn("Thank you for your contribution", message)

644

645 def test_valid_local_ids_in_csv(self):

646 """Test that CSV data with local IDs is accepted"""

647 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

648 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

649"local:rec1","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

650"local:rec2","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""

651===###===@@@===

652"citing_id","cited_id"

653"local:rec1","local:rec2\""""

654 is_valid, message = validate(

655 title,

656 body,

657 "137",

658 validation_output_dir=self.validation_output,

659 validation_reports_dir=self.validation_reports,

660 )

661 self.assertTrue(is_valid)

662 self.assertIn("Thank you for your contribution", message)

663

664 def test_mixed_identifier_types_in_csv(self):

665 """Test that CSV data with mixed identifier types (DOI, temp, local) is accepted"""

666 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"

667 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

668"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

669"temp:123","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""

670"local:rec1","Third Title","Third Author","2024","Test Journal","1","1","21-30","journal article","Test Publisher",""

671===###===@@@===

672"citing_id","cited_id"

673"doi:10.1007/s42835-022-01029-y","temp:123"

674"temp:123","local:rec1\""""

675 is_valid, message = validate(

676 title,

677 body,

678 "138",

679 validation_output_dir=self.validation_output,

680 validation_reports_dir=self.validation_reports,

681 )

682 self.assertTrue(is_valid)

683 self.assertIn("Thank you for your contribution", message)

684

685

686class TestUserValidation(unittest.TestCase):

687 def setUp(self):

688 """Set up test environment before each test"""

689 # Create a temporary test safe list file

690 self.test_safe_list_path = "test_safe_list.yaml"

691 test_safe_list = {

692 "users": [

693 {"id": 3869247, "name": "Silvio Peroni"},

694 {"id": 42008604, "name": "Arcangelo Massari"},

695 ]

696 }

697 with open(self.test_safe_list_path, "w") as f:

698 yaml.dump(test_safe_list, f)

699

700 # Create patcher to use test file instead of real one

701 self.safe_list_patcher = patch(

702 "crowdsourcing.process_issues.SAFE_LIST_PATH", self.test_safe_list_path

703 )

704 self.safe_list_patcher.start()

705

706 def tearDown(self):

707 """Clean up after each test"""

708 # Remove temporary file

709 if os.path.exists(self.test_safe_list_path):

710 os.remove(self.test_safe_list_path)

711

712 # Stop patcher

713 self.safe_list_patcher.stop()

714

715 def test_get_user_id_real_user(self):

716 """Test getting ID of a real GitHub user"""

717 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

718 with patch("requests.get") as mock_get:

719 mock_response = MagicMock()

720 mock_response.status_code = 200

721 mock_response.json.return_value = {"id": 42008604}

722 mock_get.return_value = mock_response

723

724 user_id = get_user_id("arcangelo7")

725 self.assertEqual(user_id, 42008604)

726

727 def test_get_user_id_nonexistent_user(self):

728 """Test getting ID of a nonexistent GitHub user"""

729 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

730 with patch("requests.get") as mock_get:

731 mock_response = MagicMock()

732 mock_response.status_code = 404

733 mock_get.return_value = mock_response

734

735 user_id = get_user_id("this_user_definitely_does_not_exist_123456789")

736 self.assertIsNone(user_id)

737

738 def test_is_in_safe_list_authorized(self):

739 """Test that authorized user is in safe list"""

740 self.assertTrue(is_in_safe_list(42008604))

741

742 def test_is_in_safe_list_unauthorized(self):

743 """Test that unauthorized user is not in safe list"""

744 self.assertFalse(is_in_safe_list(99999999))

745

746 def test_is_in_safe_list_file_not_found(self):

747 """Test behavior when safe_list.yaml doesn't exist"""

748 # Remove the test file to simulate missing file

749 if os.path.exists(self.test_safe_list_path):

750 os.remove(self.test_safe_list_path)

751

752 # Test with any user ID - should return False when file is missing

753 result = is_in_safe_list(42008604)

754

755 # Verify result is False

756 self.assertFalse(result)

757

758 # Verify empty file was created with proper structure

759 self.assertTrue(os.path.exists(self.test_safe_list_path))

760 with open(self.test_safe_list_path, "r") as f:

761 content = yaml.safe_load(f)

762 self.assertEqual(content, {"users": []})

763

764 def test_is_in_safe_list_invalid_yaml(self):

765 """Test behavior with invalid YAML file"""

766 with open(self.test_safe_list_path, "w") as f:

767 f.write("invalid: yaml: content: [")

768 self.assertFalse(is_in_safe_list(42008604))

769

770 @patch("requests.get")

771 @patch("time.sleep")

772 @patch("time.time")

773 def test_get_user_id_rate_limit(self, mock_time, mock_sleep, mock_get):

774 """Test rate limit handling in get_user_id"""

775 # Mock current time

776 current_time = 1000000

777 mock_time.return_value = current_time

778

779 # Setup responses

780 rate_limited_response = MagicMock()

781 rate_limited_response.status_code = 403

782 rate_limited_response.headers = {

783 "X-RateLimit-Remaining": "0",

784 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds

785 }

786

787 success_response = MagicMock()

788 success_response.status_code = 200

789 success_response.json.return_value = {"id": 12345}

790

791 # First call hits rate limit, second call succeeds

792 mock_get.side_effect = [rate_limited_response, success_response]

793

794 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

795 user_id = get_user_id("test-user")

796

797 # Verify correct user ID was returned

798 self.assertEqual(user_id, 12345)

799

800 # Verify sleep was called with correct duration

801 mock_sleep.assert_called_once_with(30)

802

803 # Verify correct number of API calls

804 self.assertEqual(mock_get.call_count, 2)

805

806 # Verify API calls were correct

807 for call in mock_get.call_args_list:

808 args, kwargs = call

809 self.assertEqual(args[0], "https://api.github.com/users/test-user")

810 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")

811

812 @patch("requests.get")

813 @patch("time.sleep") # Mock sleep to speed up test

814 def test_get_user_id_connection_error_retry(self, mock_sleep, mock_get):

815 """Test retry behavior when connection errors occur"""

816 # Configure mock to fail with connection error twice then succeed

817 mock_get.side_effect = [

818 requests.ConnectionError,

819 requests.ConnectionError,

820 MagicMock(status_code=200, json=lambda: {"id": 12345}),

821 ]

822

823 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

824 user_id = get_user_id("test-user")

825

826 self.assertEqual(user_id, 12345)

827 self.assertEqual(mock_get.call_count, 3)

828 self.assertEqual(mock_sleep.call_count, 2)

829 mock_sleep.assert_called_with(5) # Verify sleep duration

830

831 @patch("requests.get")

832 @patch("time.sleep")

833 def test_get_user_id_all_retries_fail(self, mock_sleep, mock_get):

834 """Test behavior when all retry attempts fail"""

835 # Configure mock to fail all three attempts

836 mock_get.side_effect = [

837 requests.ConnectionError,

838 requests.ConnectionError,

839 requests.ConnectionError,

840 ]

841

842 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

843 user_id = get_user_id("test-user")

844

845 self.assertIsNone(user_id)

846 self.assertEqual(mock_get.call_count, 3)

847 self.assertEqual(

848 mock_sleep.call_count, 3

849 ) # Updated to expect 3 sleeps - one for each ConnectionError

850

851 @patch("requests.get")

852 @patch("time.sleep")

853 def test_get_user_id_timeout_retry(self, mock_sleep, mock_get):

854 """Test retry behavior when requests timeout"""

855 # Configure mock to timeout twice then succeed

856 mock_get.side_effect = [

857 requests.ReadTimeout,

858 requests.ReadTimeout,

859 MagicMock(status_code=200, json=lambda: {"id": 12345}),

860 ]

861

862 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

863 user_id = get_user_id("test-user")

864

865 # Verify correct user ID was returned after retries

866 self.assertEqual(user_id, 12345)

867

868 # Verify correct number of attempts

869 self.assertEqual(mock_get.call_count, 3)

870

871 # Verify no sleep was called (ReadTimeout doesn't trigger sleep)

872 mock_sleep.assert_not_called()

873

874 # Verify API calls were correct

875 for call in mock_get.call_args_list:

876 args, kwargs = call

877 self.assertEqual(args[0], "https://api.github.com/users/test-user")

878 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")

879

880

881class TestGitHubAPI(unittest.TestCase):

882 """Test GitHub API interaction functionality"""

883

884 def setUp(self):

885 self.mock_response = MagicMock()

886 self.mock_response.status_code = 200

887

888 # Sample issue data that won't change

889 self.sample_issues = [

890 {

891 "title": "deposit journal.com doi:10.1234/test",

892 "body": "test body",

893 "number": 1,

894 "user": {"login": "test-user"},

895 "created_at": "2024-01-01T00:00:00Z",

896 "html_url": "https://github.com/test-org/test-repo/issues/1",

897 "labels": [],

898 }

899 ]

900

901 # Setup environment variables

902 self.env_patcher = patch.dict(

903 "os.environ",

904 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},

905 )

906 self.env_patcher.start()

907

908 def tearDown(self):

909 """Clean up after each test"""

910 self.env_patcher.stop()

911

912 @patch("requests.get")

913 def test_get_open_issues_success(self, mock_get):

914 """Test successful retrieval of open issues"""

915 self.mock_response.json.return_value = self.sample_issues

916 mock_get.return_value = self.mock_response

917

918 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

919 issues = get_open_issues()

920

921 self.assertEqual(len(issues), 1)

922 self.assertEqual(issues[0]["title"], "deposit journal.com doi:10.1234/test")

923 self.assertEqual(issues[0]["number"], "1")

924

925 # Verify API call

926 mock_get.assert_called_once()

927 args, kwargs = mock_get.call_args

928 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")

929 self.assertEqual(kwargs["params"]["state"], "open")

930

931 @patch("requests.get")

932 def test_get_open_issues_404(self, mock_get):

933 """Test handling of 404 response"""

934 self.mock_response.status_code = 404

935 mock_get.return_value = self.mock_response

936

937 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

938 issues = get_open_issues()

939

940 self.assertEqual(issues, [])

941

942 @patch("requests.get")

943 @patch("time.sleep")

944 @patch("time.time")

945 def test_rate_limit_retry(self, mock_time, mock_sleep, mock_get):

946 """Test retry behavior when hitting rate limits"""

947 # Mock current time to have consistent test behavior

948 current_time = 1000000

949 mock_time.return_value = current_time

950

951 # Setup mock responses

952 rate_limited_response = MagicMock()

953 rate_limited_response.status_code = 403

954 rate_limited_response.headers = {

955 "X-RateLimit-Remaining": "0",

956 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds

957 }

958

959 success_response = MagicMock()

960 success_response.status_code = 200

961 success_response.json.return_value = [

962 {

963 "title": "deposit Test Issue",

964 "body": "Test Body",

965 "number": 1,

966 "user": {"login": "test-user"},

967 "created_at": "2024-01-01T00:00:00Z",

968 "html_url": "https://github.com/test/1",

969 "labels": [],

970 }

971 ]

972

973 # First call hits rate limit, second call succeeds

974 mock_get.side_effect = [rate_limited_response, success_response]

975

976 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

977 issues = get_open_issues()

978

979 # Verify rate limit handling

980 self.assertEqual(len(issues), 1)

981 self.assertEqual(issues[0]["title"], "deposit Test Issue")

982

983 # Verify sleep was called with exactly 30 seconds

984 mock_sleep.assert_called_once_with(30)

985

986 # Verify correct API calls

987 self.assertEqual(mock_get.call_count, 2)

988 for call in mock_get.call_args_list:

989 args, kwargs = call

990 self.assertEqual(kwargs["params"]["state"], "open")

991 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")

992

993 @patch("requests.get")

994 def test_network_error_retry(self, mock_get):

995 """Test retry behavior on network errors"""

996 mock_get.side_effect = RequestException("Network error")

997

998 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

999 with self.assertRaises(RuntimeError) as context:

1000 get_open_issues()

1001

1002 self.assertIn("Failed to fetch issues after 3 attempts", str(context.exception))

1003 self.assertEqual(mock_get.call_count, 3) # Verify 3 retry attempts

1004

1005 @patch("requests.get")

1006 def test_get_open_issues_all_attempts_fail(self, mock_get):

1007 """Test that empty list is returned when all attempts fail without exception"""

1008 # Create response that fails but doesn't trigger retry logic

1009 failed_response = MagicMock()

1010 failed_response.status_code = 403

1011 # No rate limit headers, so won't trigger rate limit retry logic

1012 failed_response.headers = {}

1013

1014 # Make all attempts return the same failed response

1015 mock_get.return_value = failed_response

1016

1017 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

1018 issues = get_open_issues()

1019

1020 # Verify empty list is returned

1021 self.assertEqual(issues, [])

1022

1023 # Verify we tried MAX_RETRIES times

1024 self.assertEqual(mock_get.call_count, 3)

1025

1026 @patch("requests.get")

1027 @patch("time.sleep")

1028 @patch("time.time")

1029 def test_rate_limit_already_expired(self, mock_time, mock_sleep, mock_get):

1030 """Test rate limit handling when reset time is in the past"""

1031 # Mock current time

1032 current_time = 1000000

1033 mock_time.return_value = current_time

1034

1035 # Setup response with expired rate limit

1036 rate_limited_response = MagicMock()

1037 rate_limited_response.status_code = 403

1038 rate_limited_response.headers = {

1039 "X-RateLimit-Remaining": "0",

1040 "X-RateLimit-Reset": str(current_time - 30), # Reset time in the past

1041 }

1042

1043 success_response = MagicMock()

1044 success_response.status_code = 200

1045 success_response.json.return_value = [

1046 {

1047 "title": "deposit Test Issue",

1048 "body": "Test Body",

1049 "number": 1,

1050 "user": {"login": "test-user"},

1051 "created_at": "2024-01-01T00:00:00Z",

1052 "html_url": "https://github.com/test/1",

1053 "labels": [],

1054 }

1055 ]

1056

1057 # First call hits expired rate limit, second call succeeds

1058 mock_get.side_effect = [rate_limited_response, success_response]

1059

1060 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):

1061 issues = get_open_issues()

1062

1063 # Verify rate limit handling

1064 self.assertEqual(len(issues), 1)

1065 self.assertEqual(issues[0]["title"], "deposit Test Issue")

1066

1067 # Verify sleep was NOT called since rate limit was already expired

1068 mock_sleep.assert_not_called()

1069

1070 # Verify correct API calls

1071 self.assertEqual(mock_get.call_count, 2)

1072

1073

1074class TestAnswerFunction(unittest.TestCase):

1075 """Test the answer function that updates GitHub issues"""

1076

1077 def setUp(self):

1078 """Set up test environment before each test"""

1079 self.base_url = "https://api.github.com/repos/test-org/test-repo/issues"

1080 self.headers = {

1081 "Accept": "application/vnd.github+json",

1082 "Authorization": "Bearer fake-token",

1083 "X-GitHub-Api-Version": "2022-11-28",

1084 }

1085 self.issue_number = "123"

1086

1087 # Setup environment variables

1088 self.env_patcher = patch.dict(

1089 "os.environ",

1090 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},

1091 )

1092 self.env_patcher.start()

1093

1094 def tearDown(self):

1095 """Clean up after each test"""

1096 self.env_patcher.stop()

1097

1098 @patch("requests.post")

1099 @patch("requests.patch")

1100 def test_answer_valid_authorized(self, mock_patch, mock_post):

1101 """Test answering a valid issue from authorized user"""

1102 # Setup mock responses

1103 mock_post.return_value.status_code = 201

1104 mock_patch.return_value.status_code = 200

1105

1106 # Call function

1107 answer(

1108 is_valid=True,

1109 message="Thank you for your contribution!",

1110 issue_number=self.issue_number,

1111 is_authorized=True,

1112 )

1113

1114 # Verify label API call

1115 mock_post.assert_any_call(

1116 f"{self.base_url}/{self.issue_number}/labels",

1117 headers=self.headers,

1118 json={"labels": ["to be processed"]},

1119 timeout=30,

1120 )

1121

1122 # Verify comment API call

1123 mock_post.assert_any_call(

1124 f"{self.base_url}/{self.issue_number}/comments",

1125 headers=self.headers,

1126 json={"body": "Thank you for your contribution!"},

1127 timeout=30,

1128 )

1129

1130 # Verify issue closure API call

1131 mock_patch.assert_called_once_with(

1132 f"{self.base_url}/{self.issue_number}",

1133 headers=self.headers,

1134 json={"state": "closed"},

1135 timeout=30,

1136 )

1137

1138 @patch("requests.post")

1139 @patch("requests.patch")

1140 def test_answer_invalid_authorized(self, mock_patch, mock_post):

1141 """Test answering an invalid issue from authorized user"""

1142 answer(

1143 is_valid=False,

1144 message="Invalid format",

1145 issue_number=self.issue_number,

1146 is_authorized=True,

1147 )

1148

1149 # Verify correct label was used

1150 mock_post.assert_any_call(

1151 f"{self.base_url}/{self.issue_number}/labels",

1152 headers=self.headers,

1153 json={"labels": ["invalid"]},

1154 timeout=30,

1155 )

1156

1157 @patch("requests.post")

1158 @patch("requests.patch")

1159 def test_answer_unauthorized(self, mock_patch, mock_post):

1160 """Test answering an issue from unauthorized user"""

1161 answer(

1162 is_valid=False,

1163 message="Unauthorized user",

1164 issue_number=self.issue_number,

1165 is_authorized=False,

1166 )

1167

1168 # Verify correct label was used

1169 mock_post.assert_any_call(

1170 f"{self.base_url}/{self.issue_number}/labels",

1171 headers=self.headers,

1172 json={"labels": ["rejected"]},

1173 timeout=30,

1174 )

1175

1176 @patch("requests.post")

1177 def test_answer_label_error(self, mock_post):

1178 """Test handling of API error when adding label"""

1179 mock_post.side_effect = RequestException("Network error")

1180

1181 with self.assertRaises(RequestException):

1182 answer(

1183 is_valid=True,

1184 message="Test message",

1185 issue_number=self.issue_number,

1186 )

1187

1188 @patch("requests.post")

1189 @patch("requests.patch")

1190 def test_answer_comment_error(self, mock_patch, mock_post):

1191 """Test handling of API error when adding comment"""

1192 # First post (label) succeeds, second post (comment) fails

1193 mock_post.side_effect = [

1194 MagicMock(status_code=201),

1195 RequestException("Network error"),

1196 ]

1197

1198 with self.assertRaises(RequestException):

1199 answer(

1200 is_valid=True,

1201 message="Test message",

1202 issue_number=self.issue_number,

1203 )

1204

1205 @patch("requests.post")

1206 @patch("requests.patch")

1207 def test_answer_close_error(self, mock_patch, mock_post):

1208 """Test handling of API error when closing issue"""

1209 mock_post.return_value = MagicMock(status_code=201)

1210 mock_patch.side_effect = RequestException("Network error")

1211

1212 with self.assertRaises(RequestException):

1213 answer(

1214 is_valid=True,

1215 message="Test message",

1216 issue_number=self.issue_number,

1217 )

1218

1219

1220class TestZenodoDeposit(unittest.TestCase):

1221 """Test Zenodo deposit functionality"""

1222

1223 def setUp(self):

1224 """Set up test environment before each test"""

1225 self.env_patcher = patch.dict(

1226 "os.environ",

1227 {

1228 "ZENODO_SANDBOX": "fake-sandbox-token",

1229 "ZENODO_PRODUCTION": "fake-prod-token",

1230 "ENVIRONMENT": "development",

1231 },

1232 )

1233 self.env_patcher.start()

1234

1235 self.test_data = [

1236 {

1237 "data": {

1238 "title": "test deposit",

1239 "metadata": [{"id": "1", "title": "Test"}],

1240 "citations": [{"citing": "1", "cited": "2"}],

1241 },

1242 "provenance": {

1243 "generatedAtTime": "2024-01-01T00:00:00Z",

1244 "wasAttributedTo": 12345,

1245 "hadPrimarySource": "https://github.com/test/1",

1246 },

1247 }

1248 ]

1249

1250 def tearDown(self):

1251 """Clean up after each test"""

1252 self.env_patcher.stop()

1253 if os.path.exists("data_to_store.json"):

1254 os.remove("data_to_store.json")

1255

1256 @patch("requests.post")

1257 def test_create_deposition_resource(self, mock_post):

1258 """Test creation of Zenodo deposition resource"""

1259 mock_response = MagicMock()

1260 mock_response.json.return_value = {

1261 "id": "12345",

1262 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"},

1263 }

1264 mock_post.return_value = mock_response

1265

1266 deposition_id, bucket = _create_deposition_resource(

1267 "2024-01-01", base_url="https://sandbox.zenodo.org/api"

1268 )

1269

1270 self.assertEqual(deposition_id, "12345")

1271 self.assertEqual(bucket, "https://sandbox.zenodo.org/api/bucket/12345")

1272

1273 # Verify API call

1274 mock_post.assert_called_once()

1275 args, kwargs = mock_post.call_args

1276

1277 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})

1278 self.assertEqual(kwargs["headers"], {"Content-Type": "application/json"})

1279 self.assertEqual(kwargs["timeout"], 30)

1280

1281 @patch("requests.put")

1282 def test_upload_data(self, mock_put):

1283 """Test uploading data file to Zenodo"""

1284 mock_put.return_value.status_code = 200

1285 mock_put.return_value.raise_for_status = lambda: None

1286

1287 # Create test file

1288 with open("data_to_store.json", "w") as f:

1289 json.dump({"test": "data"}, f)

1290

1291 _upload_data(

1292 "2024-01-01",

1293 "https://sandbox.zenodo.org/api/bucket/12345",

1294 base_url="https://sandbox.zenodo.org/api",

1295 )

1296

1297 # Verify API call

1298 mock_put.assert_called_once()

1299 args, kwargs = mock_put.call_args

1300

1301 self.assertEqual(

1302 args[0],

1303 "https://sandbox.zenodo.org/api/bucket/12345/2024-01-01_weekly_deposit.json",

1304 )

1305 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})

1306 self.assertEqual(kwargs["timeout"], 30)

1307

1308 @patch("crowdsourcing.process_issues._create_deposition_resource")

1309 @patch("crowdsourcing.process_issues._upload_data")

1310 @patch("requests.post")

1311 def test_deposit_on_zenodo(self, mock_post, mock_upload, mock_create):

1312 """Test full Zenodo deposit process"""

1313 # Setup mocks

1314 mock_create.return_value = (

1315 "12345",

1316 "https://sandbox.zenodo.org/api/bucket/12345",

1317 )

1318 mock_post.return_value.status_code = 202 # Changed from 200 to 202 for publish

1319 mock_post.return_value.text = "" # Add this to avoid MagicMock text in error

1320

1321 deposit_on_zenodo(self.test_data)

1322

1323 # Verify API calls order and parameters

1324 mock_create.assert_called_once_with(

1325 datetime.now().strftime("%Y-%m-%d"),

1326 base_url="https://sandbox.zenodo.org/api", # Add base_url

1327 )

1328 mock_upload.assert_called_once_with(

1329 datetime.now().strftime("%Y-%m-%d"),

1330 "https://sandbox.zenodo.org/api/bucket/12345",

1331 base_url="https://sandbox.zenodo.org/api", # Add base_url

1332 )

1333

1334 # Verify publish request

1335 mock_post.assert_called_once()

1336 args, kwargs = mock_post.call_args

1337 self.assertEqual(

1338 args[0],

1339 "https://sandbox.zenodo.org/api/deposit/depositions/12345/actions/publish",

1340 )

1341 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})

1342 self.assertEqual(kwargs["timeout"], 30)

1343

1344 # Verify cleanup happened

1345 self.assertFalse(os.path.exists("data_to_store.json"))

1346

1347 @patch("requests.post")

1348 def test_create_deposition_resource_error(self, mock_post):

1349 """Test error handling in deposition creation"""

1350 mock_post.side_effect = requests.RequestException("API Error")

1351

1352 with self.assertRaises(requests.RequestException):

1353 _create_deposition_resource("2024-01-01")

1354

1355 @patch("requests.put")

1356 def test_upload_data_error(self, mock_put):

1357 """Test error handling in data upload"""

1358 mock_put.side_effect = requests.RequestException("Upload Error")

1359

1360 with open("data_to_store.json", "w") as f:

1361 json.dump({"test": "data"}, f)

1362

1363 with self.assertRaises(requests.RequestException):

1364 _upload_data("2024-01-01", "https://zenodo.org/api/bucket/12345")

1365

1366 @patch("crowdsourcing.process_issues._create_deposition_resource")

1367 def test_deposit_on_zenodo_create_error(self, mock_create):

1368 """Test error handling in full deposit process - creation error"""

1369 mock_create.side_effect = requests.RequestException("Creation Error")

1370

1371 with self.assertRaises(requests.RequestException):

1372 deposit_on_zenodo(self.test_data)

1373

1374 # Verify cleanup happened

1375 self.assertFalse(os.path.exists("data_to_store.json"))

1376

1377 def test_deposit_development_environment(self):

1378 """Test deposit in development environment uses sandbox"""

1379 with patch("requests.post") as mock_post, patch("requests.put") as mock_put:

1380 # Mock create deposition

1381 mock_post.return_value.json.return_value = {

1382 "id": "12345",

1383 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"},

1384 }

1385 mock_post.return_value.status_code = 201

1386

1387 # Mock upload

1388 mock_put.return_value.status_code = 200

1389 mock_put.return_value.raise_for_status = lambda: None

1390

1391 # Mock publish

1392 mock_post.return_value.status_code = 202

1393

1394 deposit_on_zenodo(self.test_data)

1395

1396 # Verify sandbox URL was used

1397 calls = mock_post.call_args_list

1398 self.assertTrue(any("sandbox.zenodo.org" in call[0][0] for call in calls))

1399

1400 def test_deposit_production_environment(self):

1401 """Test deposit in production environment uses main Zenodo"""

1402 with patch.dict("os.environ", {"ENVIRONMENT": "production"}):

1403 with patch("requests.post") as mock_post, patch("requests.put") as mock_put:

1404 # Mock create deposition

1405 mock_post.return_value.json.return_value = {

1406 "id": "12345",

1407 "links": {"bucket": "https://zenodo.org/api/bucket/12345"},

1408 }

1409 mock_post.return_value.status_code = 201

1410

1411 # Mock upload

1412 mock_put.return_value.status_code = 200

1413 mock_put.return_value.raise_for_status = lambda: None

1414

1415 # Mock publish

1416 mock_post.return_value.status_code = 202

1417

1418 deposit_on_zenodo(self.test_data)

1419

1420 # Verify production URL was used

1421 calls = mock_post.call_args_list

1422 self.assertTrue(

1423 all("sandbox.zenodo.org" not in call[0][0] for call in calls)

1424 )

1425

1426 def test_get_zenodo_token_development(self):

1427 """Test getting Zenodo token in development environment"""

1428 token = _get_zenodo_token()

1429 self.assertEqual(token, "fake-sandbox-token")

1430

1431 def test_get_zenodo_token_production(self):

1432 """Test getting Zenodo token in production environment"""

1433 with patch.dict("os.environ", {"ENVIRONMENT": "production"}):

1434 token = _get_zenodo_token()

1435 self.assertEqual(token, "fake-prod-token")

1436

1437 def test_get_zenodo_token_missing(self):

1438 """Test error when token is missing"""

1439 with patch.dict(

1440 "os.environ", {"ZENODO_SANDBOX": "", "ENVIRONMENT": "development"}

1441 ):

1442 with self.assertRaises(ValueError) as context:

1443 _get_zenodo_token()

1444 self.assertIn("ZENODO_SANDBOX token not found", str(context.exception))

1445

1446 def test_get_zenodo_token_missing_production(self):

1447 """Test error when production token is missing"""

1448 with patch.dict(

1449 "os.environ",

1450 {

1451 "ENVIRONMENT": "production",

1452 "ZENODO_PRODUCTION": "", # Token mancante

1453 },

1454 ):

1455 with self.assertRaises(ValueError) as context:

1456 _get_zenodo_token()

1457 self.assertIn("ZENODO_PRODUCTION token not found", str(context.exception))

1458

1459 @patch("crowdsourcing.process_issues._create_deposition_resource")

1460 @patch("crowdsourcing.process_issues._upload_data")

1461 @patch("requests.post")

1462 def test_deposit_on_zenodo_publish_error(self, mock_post, mock_upload, mock_create):

1463 """Test error handling when publish fails"""

1464 # Setup mocks

1465 mock_create.return_value = (

1466 "12345",

1467 "https://sandbox.zenodo.org/api/bucket/12345",

1468 )

1469 mock_post.return_value.status_code = 400 # Simula errore di pubblicazione

1470 mock_post.return_value.text = "Publication failed"

1471

1472 with self.assertRaises(Exception) as context:

1473 deposit_on_zenodo(self.test_data)

1474

1475 self.assertEqual(

1476 str(context.exception), "Failed to publish deposition: Publication failed"

1477 )

1478

1479 # Verify cleanup happened even after error

1480 self.assertFalse(os.path.exists("data_to_store.json"))

1481

1482

1483class TestProcessOpenIssues(unittest.TestCase):

1484 """Test the main process_open_issues function"""

1485

1486 def setUp(self):

1487 """Set up test environment"""

1488 self.env_patcher = patch.dict(

1489 "os.environ", {"GH_TOKEN": "fake-gh-token", "ZENODO": "fake-zenodo-token"}

1490 )

1491 self.env_patcher.start()

1492

1493 # Sample issue data with properly formatted CSV and valid DOI

1494 self.sample_issue = {

1495 "title": "deposit journal.com doi:10.1007/s42835-022-01029-y",

1496 "body": """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"

1497"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""

1498"doi:10.1007/978-3-030-00668-6_8","Cited Paper","Another Author","2024","Another Journal","2","2","20-30","journal article","Test Publisher",""

1499===###===@@@===

1500"citing_id","cited_id"

1501"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\"""",

1502 "number": "1",

1503 "author": {"login": "test-user"},

1504 "createdAt": "2024-01-01T00:00:00Z",

1505 "url": "https://github.com/test/1",

1506 }

1507

1508 def tearDown(self):

1509 """Clean up after each test"""

1510 self.env_patcher.stop()

1511

1512 @patch("crowdsourcing.process_issues.get_open_issues")

1513 @patch("crowdsourcing.process_issues.get_user_id")

1514 @patch("crowdsourcing.process_issues.is_in_safe_list")

1515 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1516 @patch("crowdsourcing.process_issues.answer")

1517 def test_process_valid_authorized_issue(

1518 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues

1519 ):

1520 """Test processing a valid issue from authorized user"""

1521 # Setup mocks

1522 mock_get_issues.return_value = [self.sample_issue]

1523 mock_user_id.return_value = 12345

1524 mock_safe_list.return_value = True

1525

1526 # Run function

1527 process_open_issues()

1528

1529 # Verify user validation

1530 mock_user_id.assert_called_once_with("test-user")

1531 mock_safe_list.assert_called_once_with(12345)

1532

1533 # Verify issue was processed

1534 mock_answer.assert_called_once()

1535 args, kwargs = mock_answer.call_args

1536 self.assertTrue(args[0]) # is_valid

1537 self.assertIn("Thank you", args[1]) # message

1538 self.assertEqual(args[2], "1") # issue_number

1539 self.assertTrue(kwargs["is_authorized"])

1540

1541 # Verify data was deposited

1542 mock_deposit.assert_called_once()

1543 args, kwargs = mock_deposit.call_args

1544 deposited_data = args[0][0]

1545 self.assertEqual(deposited_data["data"]["title"], self.sample_issue["title"])

1546 self.assertEqual(

1547 deposited_data["provenance"]["wasAttributedTo"],

1548 f"https://api.github.com/user/{12345}",

1549 )

1550

1551 @patch("crowdsourcing.process_issues.get_open_issues")

1552 @patch("crowdsourcing.process_issues.get_user_id")

1553 @patch("crowdsourcing.process_issues.is_in_safe_list")

1554 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1555 @patch("crowdsourcing.process_issues.answer")

1556 def test_process_unauthorized_user(

1557 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues

1558 ):

1559 """Test processing an issue from unauthorized user"""

1560 # Setup mocks

1561 mock_get_issues.return_value = [self.sample_issue]

1562 mock_user_id.return_value = 12345

1563 mock_safe_list.return_value = False

1564

1565 # Run function

1566 process_open_issues()

1567

1568 # Verify user was checked but not authorized

1569 mock_user_id.assert_called_once_with("test-user")

1570 mock_safe_list.assert_called_once_with(12345)

1571

1572 # Verify appropriate response

1573 mock_answer.assert_called_once()

1574 args, kwargs = mock_answer.call_args

1575 self.assertFalse(args[0]) # is_valid

1576 self.assertIn("register as a trusted user", args[1]) # message

1577 self.assertEqual(args[2], "1") # issue_number

1578 self.assertFalse(kwargs["is_authorized"])

1579

1580 # Verify no deposit was made

1581 mock_deposit.assert_not_called()

1582

1583 @patch("crowdsourcing.process_issues.get_open_issues")

1584 @patch("crowdsourcing.process_issues.get_user_id")

1585 @patch("crowdsourcing.process_issues.is_in_safe_list")

1586 @patch("crowdsourcing.process_issues.validate")

1587 @patch("crowdsourcing.process_issues.get_data_to_store")

1588 @patch("crowdsourcing.process_issues.answer")

1589 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1590 def test_process_open_issues_data_processing_error(

1591 self,

1592 mock_deposit,

1593 mock_answer,

1594 mock_get_data,

1595 mock_validate,

1596 mock_safe_list,

1597 mock_user_id,

1598 mock_get_issues,

1599 ):

1600 """Test handling of get_data_to_store error for an issue"""

1601 # Setup mocks

1602 mock_get_issues.return_value = [self.sample_issue]

1603 mock_user_id.return_value = 12345

1604 mock_safe_list.return_value = True

1605 mock_validate.return_value = (True, "Valid data")

1606 mock_get_data.side_effect = Exception("Data processing error")

1607

1608 # Run function

1609 process_open_issues()

1610

1611 # Verify error was handled and processing continued

1612 mock_get_data.assert_called_once()

1613 mock_answer.assert_called_once()

1614 # Verify deposit wasn't attempted since no valid data was processed

1615 mock_deposit.assert_not_called()

1616

1617 @patch("crowdsourcing.process_issues.get_open_issues")

1618 @patch("crowdsourcing.process_issues.get_user_id")

1619 @patch("crowdsourcing.process_issues.is_in_safe_list")

1620 @patch("crowdsourcing.process_issues.validate")

1621 @patch("crowdsourcing.process_issues.get_data_to_store")

1622 @patch("crowdsourcing.process_issues.answer")

1623 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1624 def test_process_open_issues_zenodo_deposit_error(

1625 self,

1626 mock_deposit,

1627 mock_answer,

1628 mock_get_data,

1629 mock_validate,

1630 mock_safe_list,

1631 mock_user_id,

1632 mock_get_issues,

1633 ):

1634 """Test handling of Zenodo deposit error"""

1635 # Setup mocks

1636 mock_get_issues.return_value = [self.sample_issue]

1637 mock_user_id.return_value = 12345

1638 mock_safe_list.return_value = True

1639 mock_validate.return_value = (True, "Valid data")

1640 mock_get_data.return_value = {"test": "data"}

1641 mock_deposit.side_effect = Exception("Zenodo deposit error")

1642

1643 # Verify the Zenodo deposit error is re-raised

1644 with self.assertRaises(Exception) as context:

1645 process_open_issues()

1646

1647 self.assertEqual(str(context.exception), "Zenodo deposit error")

1648 mock_deposit.assert_called_once()

1649

1650 @patch("crowdsourcing.process_issues.get_open_issues")

1651 @patch("crowdsourcing.process_issues.get_user_id")

1652 @patch("crowdsourcing.process_issues.is_in_safe_list")

1653 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1654 @patch("crowdsourcing.process_issues.answer")

1655 def test_process_localhost_issue_skips_zenodo(

1656 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues

1657 ):

1658 """Test that issues with localhost domain are not deposited to Zenodo"""

1659 localhost_issue = self.sample_issue.copy()

1660 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y"

1661

1662 mock_get_issues.return_value = [localhost_issue]

1663 mock_user_id.return_value = 12345

1664 mock_safe_list.return_value = True

1665

1666 process_open_issues()

1667

1668 mock_answer.assert_called_once()

1669 args, kwargs = mock_answer.call_args

1670 self.assertTrue(args[0]) # is_valid

1671 self.assertIn("Test deposit validated successfully", args[1]) # message indicates test

1672 self.assertEqual(args[2], "1") # issue_number

1673 self.assertTrue(kwargs["is_test"]) # is_test flag is True

1674

1675 # Verify Zenodo deposit was NOT called

1676 mock_deposit.assert_not_called()

1677

1678 @patch("crowdsourcing.process_issues.get_open_issues")

1679 @patch("crowdsourcing.process_issues.get_user_id")

1680 @patch("crowdsourcing.process_issues.is_in_safe_list")

1681 @patch("crowdsourcing.process_issues.deposit_on_zenodo")

1682 @patch("crowdsourcing.process_issues.answer")

1683 def test_process_mixed_localhost_and_production_issues(

1684 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues

1685 ):

1686 """Test that only production issues are deposited when mixing localhost and production"""

1687 localhost_issue = self.sample_issue.copy()

1688 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y"

1689 localhost_issue["number"] = "1"

1690

1691 production_issue = self.sample_issue.copy()

1692 production_issue["title"] = "deposit journal.com doi:10.1007/s42835-022-01029-y"

1693 production_issue["number"] = "2"

1694

1695 mock_get_issues.return_value = [localhost_issue, production_issue]

1696 mock_user_id.return_value = 12345

1697 mock_safe_list.return_value = True

1698

1699 process_open_issues()

1700

1701 # Verify both issues were validated and answered

1702 self.assertEqual(mock_answer.call_count, 2)

1703

1704 # Verify Zenodo deposit was called only once with production data

1705 mock_deposit.assert_called_once()

1706 deposited_data = mock_deposit.call_args[0][0]

1707 self.assertEqual(len(deposited_data), 1)

1708 self.assertEqual(deposited_data[0]["data"]["domain"], "journal.com")

1709

1710

1711if __name__ == "__main__": # pragma: no cover

1712 unittest.main()

Coverage for test / test_process_issues.py: 100%

736 statements