Coverage for test / test_process_issues.py: 100%
736 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-21 14:31 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-21 14:31 +0000
1#!/usr/bin/python
3# SPDX-FileCopyrightText: 2022-2025 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7import json
8import os
9import shutil
10import unittest
11from datetime import datetime
12from unittest.mock import MagicMock, patch
14import requests
15import yaml
16from dotenv import load_dotenv
17from crowdsourcing.process_issues import (
18 _create_deposition_resource,
19 _get_zenodo_token,
20 _upload_data,
21 _validate_title,
22 answer,
23 deposit_on_zenodo,
24 get_data_to_store,
25 get_open_issues,
26 get_user_id,
27 is_in_safe_list,
28 process_open_issues,
29 validate,
30)
31from requests.exceptions import RequestException
33load_dotenv() # Carica le variabili dal file .env
36class TestTitleValidation(unittest.TestCase):
37 def test_valid_doi_title(self):
38 """Test that a valid DOI title is accepted"""
39 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
40 is_valid, message = _validate_title(title)
41 self.assertTrue(is_valid)
42 self.assertEqual(message, "")
44 def test_valid_isbn_title(self):
45 """Test that a valid ISBN title is accepted"""
46 title = "deposit publisher.com isbn:9780134093413"
47 is_valid, message = _validate_title(title)
48 self.assertTrue(is_valid)
49 self.assertEqual(message, "")
51 def test_missing_deposit_keyword(self):
52 """Test that title without 'deposit' keyword is rejected"""
53 title = "submit journal.com doi:10.1007/s42835-022-01029-y"
54 is_valid, message = _validate_title(title)
55 self.assertFalse(is_valid)
56 self.assertIn("title of the issue was not structured correctly", message)
58 def test_unsupported_identifier(self):
59 """Test that unsupported identifier types are rejected"""
60 title = "deposit journal.com arxiv:2203.01234"
61 is_valid, message = _validate_title(title)
62 self.assertFalse(is_valid)
63 self.assertEqual(message, "The identifier schema 'arxiv' is not supported")
65 def test_invalid_doi(self):
66 """Test that invalid DOI format is rejected"""
67 title = "deposit journal.com doi:invalid-doi-format"
68 is_valid, message = _validate_title(title)
69 self.assertFalse(is_valid)
70 self.assertIn("is not a valid DOI", message)
72 def test_malformed_title(self):
73 """Test that malformed title structure is rejected"""
74 title = "deposit doi:10.1007/s42835-022-01029-y" # missing domain
75 is_valid, message = _validate_title(title)
76 self.assertFalse(is_valid)
77 self.assertIn("title of the issue was not structured correctly", message)
79 def test_unsupported_schema(self):
80 """Test that an unsupported identifier schema returns appropriate error"""
81 title = "deposit journal.com issn:1234-5678" # issn is not in supported schemas
82 is_valid, message = _validate_title(title)
83 self.assertFalse(is_valid)
84 print("message", message)
85 self.assertEqual(message, "The identifier schema 'issn' is not supported")
87 def test_valid_temp_id_title(self):
88 """Test that a valid temporary ID title is accepted"""
89 title = "deposit journal.com temp:12345"
90 is_valid, message = _validate_title(title)
91 self.assertTrue(is_valid)
92 self.assertEqual(message, "")
94 def test_valid_local_id_title(self):
95 """Test that a valid local ID title is accepted"""
96 title = "deposit journal.com local:record123"
97 is_valid, message = _validate_title(title)
98 self.assertTrue(is_valid)
99 self.assertEqual(message, "")
101 def test_invalid_temp_id_format(self):
102 """Test that invalid temporary ID format is rejected"""
103 title = "deposit journal.com temp12345" # Missing colon
104 is_valid, message = _validate_title(title)
105 self.assertFalse(is_valid)
106 self.assertIn("title of the issue was not structured correctly", message)
108 def test_invalid_local_id_format(self):
109 """Test that invalid local ID format is rejected"""
110 title = "deposit journal.com local.record123" # Wrong separator
111 is_valid, message = _validate_title(title)
112 self.assertFalse(is_valid)
113 self.assertIn("title of the issue was not structured correctly", message)
116class TestValidation(unittest.TestCase):
117 def setUp(self):
118 """Set up test environment before each test"""
119 # Create temporary test directory
120 self.test_dir = os.path.join(os.path.dirname(__file__), "temp_test_dir")
121 self.validation_output = os.path.join(self.test_dir, "validation_output")
122 self.validation_reports = os.path.join(self.test_dir, "validation_reports")
124 os.makedirs(self.validation_output, exist_ok=True)
125 os.makedirs(self.validation_reports, exist_ok=True)
127 # Setup environment variables
128 self.env_patcher = patch.dict(
129 "os.environ",
130 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},
131 )
132 self.env_patcher.start()
134 def tearDown(self):
135 """Clean up after each test"""
136 # Stop environment patcher
137 self.env_patcher.stop()
139 # Clean up test directory
140 if os.path.exists(self.test_dir):
141 shutil.rmtree(self.test_dir)
143 def test_valid_issue(self):
144 """Test that a valid issue with correct title and CSV data is accepted"""
145 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
146 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
147"doi:10.1007/s42835-022-01029-y","A Study on Electric Properties","Smith, John","2024","Journal of Physics","5","2","100-120","journal article","Test Publisher",""
148"doi:10.1007/978-3-662-07918-8_3","Influence of Dielectric Properties, State, and Electrodes on Electric Strength","Ushakov, Vasily Y.","2004","Insulation of High-Voltage Equipment [isbn:9783642058530 isbn:9783662079188]","","","27-82","book chapter","Springer Science and Business Media LLC [crossref:297]",""
149"doi:10.1016/0021-9991(73)90147-2","Flux-corrected transport. I. SHASTA, a fluid transport algorithm that works","Boris, Jay P; Book, David L","1973-01","Journal of Computational Physics [issn:0021-9991]","11","1","38-69","journal article","Elsevier BV [crossref:78]",""
150===###===@@@===
151"citing_id","cited_id"
152"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3"
153"doi:10.1007/s42835-022-01029-y","doi:10.1016/0021-9991(73)90147-2\""""
154 is_valid, message = validate(
155 title,
156 body,
157 "123",
158 validation_output_dir=self.validation_output,
159 validation_reports_dir=self.validation_reports,
160 )
161 self.assertTrue(is_valid)
162 self.assertIn("Thank you for your contribution", message)
164 def test_invalid_separator(self):
165 """Test that issue with incorrect separator is rejected"""
166 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
167 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
168"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher",""
169WRONG_SEPARATOR
170"citing_id","cited_id"
171"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\""""
172 is_valid, message = validate(
173 title,
174 body,
175 "124",
176 validation_output_dir=self.validation_output,
177 validation_reports_dir=self.validation_reports,
178 )
179 self.assertFalse(is_valid)
180 self.assertIn("Please use the separator", message)
182 def test_invalid_title_valid_body(self):
183 """Test that issue with invalid title but valid body is rejected"""
184 title = "invalid title format"
185 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
186"doi:10.1007/978-3-662-07918-8_3","Test Title","Test Author","2004","Test Venue","1","1","1-10","journal article","Test Publisher",""
187===###===@@@===
188"citing_id","cited_id"
189"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-662-07918-8_3\""""
190 is_valid, message = validate(
191 title,
192 body,
193 "125",
194 validation_output_dir=self.validation_output,
195 validation_reports_dir=self.validation_reports,
196 )
197 self.assertFalse(is_valid)
198 self.assertIn("title of the issue was not structured correctly", message)
200 def test_invalid_csv_structure(self):
201 """Test that CSV with wrong column structure returns appropriate error"""
202 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
203 body = """"wrong","column","headers"
204"data1","data2","data3"
205===###===@@@===
206"wrong","citation","headers"
207"cite1","cite2","cite3"\""""
208 is_valid, message = validate(
209 title,
210 body,
211 "126",
212 validation_output_dir=self.validation_output,
213 validation_reports_dir=self.validation_reports,
214 )
215 self.assertFalse(is_valid)
216 self.assertIn(
217 "Please ensure both metadata and citations are valid CSVs following the required format.",
218 message,
219 )
221 def test_get_data_to_store_valid_input(self):
222 """Test get_data_to_store with valid input data"""
223 title = "deposit journal.com doi:10.1234/test"
224 body = """"id","title"
225"1","Test Title"
226===###===@@@===
227"citing","cited"
228"id1","id2"\""""
229 created_at = "2024-01-01T00:00:00Z"
230 had_primary_source = "https://github.com/test/1"
231 user_id = 12345
233 result = get_data_to_store(title, body, created_at, had_primary_source, user_id)
235 self.assertEqual(result["data"]["title"], title)
236 self.assertEqual(result["data"]["domain"], "journal.com")
237 self.assertEqual(len(result["data"]["metadata"]), 1)
238 self.assertEqual(len(result["data"]["citations"]), 1)
239 self.assertEqual(result["provenance"]["generatedAtTime"], created_at)
240 self.assertEqual(
241 result["provenance"]["wasAttributedTo"],
242 f"https://api.github.com/user/{user_id}",
243 )
244 self.assertEqual(result["provenance"]["hadPrimarySource"], had_primary_source)
246 def test_get_data_to_store_invalid_csv(self):
247 """Test get_data_to_store with invalid CSV format"""
248 title = "deposit journal.com doi:10.1234/test"
249 # CSV con una sola sezione (manca il separatore)
250 body = """"id","title"
251"1","Test Title"\""""
253 with self.assertRaises(ValueError) as context:
254 get_data_to_store(
255 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345
256 )
258 # Verifichiamo che l'errore contenga il messaggio corretto
259 self.assertIn("Failed to process issue data", str(context.exception))
261 def test_get_data_to_store_empty_sections(self):
262 """Test get_data_to_store with empty metadata or citations sections"""
263 title = "deposit journal.com doi:10.1234/test"
264 body = """"id","title"
265===###===@@@===
266"citing","cited"\""""
268 with self.assertRaises(ValueError) as context:
269 get_data_to_store(
270 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345
271 )
273 self.assertIn("Empty metadata or citations section", str(context.exception))
275 def test_get_data_to_store_invalid_separator(self):
276 """Test get_data_to_store with invalid separator in body"""
277 title = "deposit journal.com doi:10.1234/test"
278 body = """"id","title"
279INVALID_SEPARATOR
280"citing","cited"\""""
282 with self.assertRaises(ValueError) as context:
283 get_data_to_store(
284 title, body, "2024-01-01T00:00:00Z", "https://github.com/test/1", 12345
285 )
287 self.assertIn("Failed to process issue data", str(context.exception))
289 @patch("crowdsourcing.process_issues.get_open_issues")
290 @patch("crowdsourcing.process_issues.get_user_id")
291 @patch("crowdsourcing.process_issues.is_in_safe_list")
292 @patch("crowdsourcing.process_issues.validate")
293 @patch("crowdsourcing.process_issues.get_data_to_store")
294 @patch("crowdsourcing.process_issues.answer")
295 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
296 @patch("crowdsourcing.process_issues.archive_manager")
297 def test_validation_with_validator(self, mock_archive_manager, *args):
298 """Test validation using the oc_validator library"""
299 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
300 # CSV con errori di validazione intenzionali
301 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
302"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""
303===###===@@@===
304"citing_id","cited_id"
305"doi:10.1007/s42835-022-01029-y","invalid_doi\""""
307 # Run validation
308 is_valid, message = validate(
309 title,
310 body,
311 "127",
312 validation_output_dir=self.validation_output,
313 validation_reports_dir=self.validation_reports,
314 )
316 # Verify validation failed
317 self.assertFalse(is_valid)
318 self.assertIn("Validation errors found in", message)
319 self.assertIn("metadata and citations", message)
321 # Verify final report was generated in validation_reports
322 report_files = os.listdir(self.validation_reports)
323 self.assertTrue(
324 any(
325 f.startswith("validation_") and f.endswith(".html")
326 for f in report_files
327 )
328 )
330 # Verify archive_manager.add_report was called
331 mock_archive_manager.add_report.assert_called_once()
333 def test_validation_with_metadata_validation_file(self):
334 """Test validation when metadata validation file contains errors"""
335 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
336 # Invalid metadata CSV with missing required fields
337 body = """"wrong_field","another_wrong"
338"value1","value2"
339===###===@@@===
340"citing_id","cited_id"
341"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\""""
343 is_valid, message = validate(
344 title,
345 body,
346 "128",
347 validation_output_dir=self.validation_output,
348 validation_reports_dir=self.validation_reports,
349 )
351 self.assertFalse(is_valid)
352 self.assertIn(
353 "Please ensure both metadata and citations are valid CSVs", message
354 )
355 self.assertIn("check our guide", message)
357 def test_validation_with_both_validation_files(self):
358 """Test validation when both metadata and citations have validation errors"""
359 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
360 # Invalid metadata missing fields and invalid citation identifiers
361 body = """"id","title"
362"doi:invalid","Test Title"
363===###===@@@===
364"citing_id","cited_id"
365"invalid:123","another:456"\""""
367 is_valid, message = validate(
368 title,
369 body,
370 "129",
371 validation_output_dir=self.validation_output,
372 validation_reports_dir=self.validation_reports,
373 )
375 self.assertFalse(is_valid)
376 self.assertIn(
377 "Please ensure both metadata and citations are valid CSVs", message
378 )
379 self.assertIn("check our guide", message)
381 @patch("crowdsourcing.process_issues.archive_manager")
382 def test_validation_reads_validation_files(self, mock_archive_manager):
383 """Test that validation properly reads and processes validation files"""
384 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
385 # CSV con errori di validazione intenzionali
386 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
387"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""
388"doi:10.1162/qss_a_00292","","","","","","","","journal article","",""
389===###===@@@===
390"citing_id","cited_id"
391"doi:10.1007/s42835-022-01029-y","invalid_doi"
392"doi:10.1162/qss_a_00292","doi:10.1007/s42835-022-01029-y"\""""
394 # Run validation
395 is_valid, message = validate(
396 title,
397 body,
398 "130",
399 validation_output_dir=self.validation_output,
400 validation_reports_dir=self.validation_reports,
401 )
403 # Verify validation failed
404 self.assertFalse(is_valid)
405 self.assertIn("Validation errors found in metadata and citations", message)
406 self.assertIn("Please check the detailed validation report:", message)
407 self.assertIn(
408 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_",
409 message,
410 )
411 self.assertIn(".html", message)
413 # Verify final report was generated in validation_reports
414 report_files = os.listdir(self.validation_reports)
415 self.assertTrue(
416 any(
417 f.startswith("validation_") and f.endswith(".html")
418 for f in report_files
419 )
420 )
422 # Verify archive_manager.add_report was called
423 mock_archive_manager.add_report.assert_called_once()
425 @patch("crowdsourcing.process_issues.archive_manager")
426 def test_validation_html_report_generation(self, mock_archive_manager):
427 """Test that HTML validation reports are properly generated when validation fails"""
428 # Clean up any existing directories from previous tests
429 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
430 # Invalid data that will fail validation
431 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
432"INVALID_DOI","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
433===###===@@@===
434"citing_id","cited_id"
435"INVALID_DOI","doi:10.1007/978-3-030-00668-6_8\""""
437 # Run validation
438 is_valid, message = validate(
439 title,
440 body,
441 "131",
442 validation_output_dir=self.validation_output,
443 validation_reports_dir=self.validation_reports,
444 )
446 # Verify validation failed
447 self.assertFalse(is_valid)
449 # Check that merged report exists in validation_reports
450 report_files = os.listdir(self.validation_reports)
451 self.assertTrue(
452 any(
453 f.startswith("validation_") and f.endswith(".html")
454 for f in report_files
455 )
456 )
458 # Verify report URL is in the error message
459 self.assertIn("Please check the detailed validation report:", message)
460 self.assertIn(
461 "test-org.github.io/test-repo/validation_reports/index.html?report=validation_",
462 message,
463 )
464 self.assertIn(".html", message)
466 @patch("crowdsourcing.process_issues.archive_manager")
467 def test_validation_html_report_generation_only_metadata_errors(
468 self, mock_archive_manager
469 ):
470 """Test HTML report generation when only metadata has validation errors"""
471 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
472 # CSV with invalid metadata but valid citations
473 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
474"doi:10.1007/s42835-022-01029-y","Test Title","","","","","","","invalid_type","",""
475"doi:10.1162/qss_a_00292","Test Title","","","","","","","journal article","",""
476===###===@@@===
477"citing_id","cited_id"
478"doi:10.1007/s42835-022-01029-y","doi:10.1162/qss_a_00292\""""
480 # Run validation
481 is_valid, message = validate(
482 title,
483 body,
484 "132",
485 validation_output_dir=self.validation_output,
486 validation_reports_dir=self.validation_reports,
487 )
489 # Verify validation failed
490 self.assertFalse(is_valid)
492 # Check that final report exists
493 report_files = [
494 f for f in os.listdir(self.validation_reports) if f.endswith(".html")
495 ]
496 self.assertEqual(len(report_files), 1, "Should be exactly one final report")
497 final_report = report_files[0]
498 self.assertTrue(final_report.startswith("validation_"))
500 # Verify archive_manager.add_report was called with correct parameters
501 mock_archive_manager.add_report.assert_called_once_with(
502 final_report,
503 f"https://test-org.github.io/test-repo/validation_reports/{final_report}",
504 )
506 @patch("crowdsourcing.process_issues.archive_manager")
507 def test_validation_html_report_generation_only_citations_errors(
508 self, mock_archive_manager
509 ):
510 """Test HTML report generation when only citations have validation errors"""
511 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
512 # CSV with valid metadata but invalid citations
513 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
514"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
515===###===@@@===
516"citing_id","cited_id"
517"INVALID_DOI","ANOTHER_INVALID_DOI"\""""
519 # Run validation
520 is_valid, message = validate(
521 title,
522 body,
523 "133",
524 validation_output_dir=self.validation_output,
525 validation_reports_dir=self.validation_reports,
526 )
528 # Verify validation failed
529 self.assertFalse(is_valid)
531 # Check that final report exists
532 report_files = [
533 f for f in os.listdir(self.validation_reports) if f.endswith(".html")
534 ]
535 self.assertEqual(len(report_files), 1, "Should be exactly one final report")
536 final_report = report_files[0]
537 self.assertTrue(final_report.startswith("validation_"))
539 # Verify archive_manager.add_report was called with correct parameters
540 mock_archive_manager.add_report.assert_called_once_with(
541 final_report,
542 f"https://test-org.github.io/test-repo/validation_reports/{final_report}",
543 )
545 def test_validate_empty_body(self):
546 """Test validate() with empty body content"""
547 title = "deposit journal.com doi:10.1162/qss_a_00292"
548 body = None
550 is_valid, message = validate(
551 title,
552 body,
553 "134",
554 validation_output_dir=self.validation_output,
555 validation_reports_dir=self.validation_reports,
556 )
558 self.assertFalse(is_valid)
559 self.assertIn("The issue body cannot be empty", message)
560 self.assertIn(
561 "https://github.com/opencitations/crowdsourcing/blob/main/README.md",
562 message,
563 )
565 def test_validate_empty_string_body(self):
566 """Test validate() with empty string body content"""
567 title = "deposit journal.com doi:10.1162/qss_a_00292"
568 body = ""
570 is_valid, message = validate(
571 title,
572 body,
573 "135",
574 validation_output_dir=self.validation_output,
575 validation_reports_dir=self.validation_reports,
576 )
578 self.assertFalse(is_valid)
579 self.assertIn("The issue body cannot be empty", message)
580 self.assertIn(
581 "https://github.com/opencitations/crowdsourcing/blob/main/README.md",
582 message,
583 )
585 @patch("crowdsourcing.process_issues.archive_manager")
586 def test_validation_report_issue_number(self, mock_archive_manager):
587 """Test that validation report filename contains correct issue number"""
588 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
589 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
590"doi:10.1007/s42835-022-01029-y","","","","","","","","invalid_type","",""
591===###===@@@===
592"citing_id","cited_id"
593"doi:10.1007/s42835-022-01029-y","invalid_doi"\""""
595 test_issue_number = "42"
597 # Run validation
598 is_valid, message = validate(
599 title,
600 body,
601 test_issue_number,
602 validation_output_dir=self.validation_output,
603 validation_reports_dir=self.validation_reports,
604 )
606 # Verify validation failed and generated a report
607 self.assertFalse(is_valid)
609 # Check that the report file exists with correct issue number
610 report_files = os.listdir(self.validation_reports)
611 matching_files = [
612 f
613 for f in report_files
614 if f.startswith(f"validation_issue_{test_issue_number}")
615 ]
616 self.assertEqual(
617 len(matching_files), 1, "Should find exactly one matching report file"
618 )
619 self.assertTrue(
620 matching_files[0].endswith(".html"), "Report file should be HTML"
621 )
622 self.assertEqual(
623 matching_files[0], f"validation_issue_{test_issue_number}.html"
624 )
626 def test_valid_temp_ids_in_csv(self):
627 """Test that CSV data with temporary IDs is accepted"""
628 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
629 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
630"temp:123","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
631"temp:456","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""
632===###===@@@===
633"citing_id","cited_id"
634"temp:123","temp:456\""""
635 is_valid, message = validate(
636 title,
637 body,
638 "136",
639 validation_output_dir=self.validation_output,
640 validation_reports_dir=self.validation_reports,
641 )
642 self.assertTrue(is_valid)
643 self.assertIn("Thank you for your contribution", message)
645 def test_valid_local_ids_in_csv(self):
646 """Test that CSV data with local IDs is accepted"""
647 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
648 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
649"local:rec1","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
650"local:rec2","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""
651===###===@@@===
652"citing_id","cited_id"
653"local:rec1","local:rec2\""""
654 is_valid, message = validate(
655 title,
656 body,
657 "137",
658 validation_output_dir=self.validation_output,
659 validation_reports_dir=self.validation_reports,
660 )
661 self.assertTrue(is_valid)
662 self.assertIn("Thank you for your contribution", message)
664 def test_mixed_identifier_types_in_csv(self):
665 """Test that CSV data with mixed identifier types (DOI, temp, local) is accepted"""
666 title = "deposit journal.com doi:10.1007/s42835-022-01029-y"
667 body = """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
668"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
669"temp:123","Another Title","Another Author","2024","Test Journal","1","1","11-20","journal article","Test Publisher",""
670"local:rec1","Third Title","Third Author","2024","Test Journal","1","1","21-30","journal article","Test Publisher",""
671===###===@@@===
672"citing_id","cited_id"
673"doi:10.1007/s42835-022-01029-y","temp:123"
674"temp:123","local:rec1\""""
675 is_valid, message = validate(
676 title,
677 body,
678 "138",
679 validation_output_dir=self.validation_output,
680 validation_reports_dir=self.validation_reports,
681 )
682 self.assertTrue(is_valid)
683 self.assertIn("Thank you for your contribution", message)
686class TestUserValidation(unittest.TestCase):
687 def setUp(self):
688 """Set up test environment before each test"""
689 # Create a temporary test safe list file
690 self.test_safe_list_path = "test_safe_list.yaml"
691 test_safe_list = {
692 "users": [
693 {"id": 3869247, "name": "Silvio Peroni"},
694 {"id": 42008604, "name": "Arcangelo Massari"},
695 ]
696 }
697 with open(self.test_safe_list_path, "w") as f:
698 yaml.dump(test_safe_list, f)
700 # Create patcher to use test file instead of real one
701 self.safe_list_patcher = patch(
702 "crowdsourcing.process_issues.SAFE_LIST_PATH", self.test_safe_list_path
703 )
704 self.safe_list_patcher.start()
706 def tearDown(self):
707 """Clean up after each test"""
708 # Remove temporary file
709 if os.path.exists(self.test_safe_list_path):
710 os.remove(self.test_safe_list_path)
712 # Stop patcher
713 self.safe_list_patcher.stop()
715 def test_get_user_id_real_user(self):
716 """Test getting ID of a real GitHub user"""
717 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
718 with patch("requests.get") as mock_get:
719 mock_response = MagicMock()
720 mock_response.status_code = 200
721 mock_response.json.return_value = {"id": 42008604}
722 mock_get.return_value = mock_response
724 user_id = get_user_id("arcangelo7")
725 self.assertEqual(user_id, 42008604)
727 def test_get_user_id_nonexistent_user(self):
728 """Test getting ID of a nonexistent GitHub user"""
729 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
730 with patch("requests.get") as mock_get:
731 mock_response = MagicMock()
732 mock_response.status_code = 404
733 mock_get.return_value = mock_response
735 user_id = get_user_id("this_user_definitely_does_not_exist_123456789")
736 self.assertIsNone(user_id)
738 def test_is_in_safe_list_authorized(self):
739 """Test that authorized user is in safe list"""
740 self.assertTrue(is_in_safe_list(42008604))
742 def test_is_in_safe_list_unauthorized(self):
743 """Test that unauthorized user is not in safe list"""
744 self.assertFalse(is_in_safe_list(99999999))
746 def test_is_in_safe_list_file_not_found(self):
747 """Test behavior when safe_list.yaml doesn't exist"""
748 # Remove the test file to simulate missing file
749 if os.path.exists(self.test_safe_list_path):
750 os.remove(self.test_safe_list_path)
752 # Test with any user ID - should return False when file is missing
753 result = is_in_safe_list(42008604)
755 # Verify result is False
756 self.assertFalse(result)
758 # Verify empty file was created with proper structure
759 self.assertTrue(os.path.exists(self.test_safe_list_path))
760 with open(self.test_safe_list_path, "r") as f:
761 content = yaml.safe_load(f)
762 self.assertEqual(content, {"users": []})
764 def test_is_in_safe_list_invalid_yaml(self):
765 """Test behavior with invalid YAML file"""
766 with open(self.test_safe_list_path, "w") as f:
767 f.write("invalid: yaml: content: [")
768 self.assertFalse(is_in_safe_list(42008604))
770 @patch("requests.get")
771 @patch("time.sleep")
772 @patch("time.time")
773 def test_get_user_id_rate_limit(self, mock_time, mock_sleep, mock_get):
774 """Test rate limit handling in get_user_id"""
775 # Mock current time
776 current_time = 1000000
777 mock_time.return_value = current_time
779 # Setup responses
780 rate_limited_response = MagicMock()
781 rate_limited_response.status_code = 403
782 rate_limited_response.headers = {
783 "X-RateLimit-Remaining": "0",
784 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds
785 }
787 success_response = MagicMock()
788 success_response.status_code = 200
789 success_response.json.return_value = {"id": 12345}
791 # First call hits rate limit, second call succeeds
792 mock_get.side_effect = [rate_limited_response, success_response]
794 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
795 user_id = get_user_id("test-user")
797 # Verify correct user ID was returned
798 self.assertEqual(user_id, 12345)
800 # Verify sleep was called with correct duration
801 mock_sleep.assert_called_once_with(30)
803 # Verify correct number of API calls
804 self.assertEqual(mock_get.call_count, 2)
806 # Verify API calls were correct
807 for call in mock_get.call_args_list:
808 args, kwargs = call
809 self.assertEqual(args[0], "https://api.github.com/users/test-user")
810 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")
812 @patch("requests.get")
813 @patch("time.sleep") # Mock sleep to speed up test
814 def test_get_user_id_connection_error_retry(self, mock_sleep, mock_get):
815 """Test retry behavior when connection errors occur"""
816 # Configure mock to fail with connection error twice then succeed
817 mock_get.side_effect = [
818 requests.ConnectionError,
819 requests.ConnectionError,
820 MagicMock(status_code=200, json=lambda: {"id": 12345}),
821 ]
823 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
824 user_id = get_user_id("test-user")
826 self.assertEqual(user_id, 12345)
827 self.assertEqual(mock_get.call_count, 3)
828 self.assertEqual(mock_sleep.call_count, 2)
829 mock_sleep.assert_called_with(5) # Verify sleep duration
831 @patch("requests.get")
832 @patch("time.sleep")
833 def test_get_user_id_all_retries_fail(self, mock_sleep, mock_get):
834 """Test behavior when all retry attempts fail"""
835 # Configure mock to fail all three attempts
836 mock_get.side_effect = [
837 requests.ConnectionError,
838 requests.ConnectionError,
839 requests.ConnectionError,
840 ]
842 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
843 user_id = get_user_id("test-user")
845 self.assertIsNone(user_id)
846 self.assertEqual(mock_get.call_count, 3)
847 self.assertEqual(
848 mock_sleep.call_count, 3
849 ) # Updated to expect 3 sleeps - one for each ConnectionError
851 @patch("requests.get")
852 @patch("time.sleep")
853 def test_get_user_id_timeout_retry(self, mock_sleep, mock_get):
854 """Test retry behavior when requests timeout"""
855 # Configure mock to timeout twice then succeed
856 mock_get.side_effect = [
857 requests.ReadTimeout,
858 requests.ReadTimeout,
859 MagicMock(status_code=200, json=lambda: {"id": 12345}),
860 ]
862 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
863 user_id = get_user_id("test-user")
865 # Verify correct user ID was returned after retries
866 self.assertEqual(user_id, 12345)
868 # Verify correct number of attempts
869 self.assertEqual(mock_get.call_count, 3)
871 # Verify no sleep was called (ReadTimeout doesn't trigger sleep)
872 mock_sleep.assert_not_called()
874 # Verify API calls were correct
875 for call in mock_get.call_args_list:
876 args, kwargs = call
877 self.assertEqual(args[0], "https://api.github.com/users/test-user")
878 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")
881class TestGitHubAPI(unittest.TestCase):
882 """Test GitHub API interaction functionality"""
884 def setUp(self):
885 self.mock_response = MagicMock()
886 self.mock_response.status_code = 200
888 # Sample issue data that won't change
889 self.sample_issues = [
890 {
891 "title": "deposit journal.com doi:10.1234/test",
892 "body": "test body",
893 "number": 1,
894 "user": {"login": "test-user"},
895 "created_at": "2024-01-01T00:00:00Z",
896 "html_url": "https://github.com/test-org/test-repo/issues/1",
897 "labels": [],
898 }
899 ]
901 # Setup environment variables
902 self.env_patcher = patch.dict(
903 "os.environ",
904 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},
905 )
906 self.env_patcher.start()
908 def tearDown(self):
909 """Clean up after each test"""
910 self.env_patcher.stop()
912 @patch("requests.get")
913 def test_get_open_issues_success(self, mock_get):
914 """Test successful retrieval of open issues"""
915 self.mock_response.json.return_value = self.sample_issues
916 mock_get.return_value = self.mock_response
918 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
919 issues = get_open_issues()
921 self.assertEqual(len(issues), 1)
922 self.assertEqual(issues[0]["title"], "deposit journal.com doi:10.1234/test")
923 self.assertEqual(issues[0]["number"], "1")
925 # Verify API call
926 mock_get.assert_called_once()
927 args, kwargs = mock_get.call_args
928 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")
929 self.assertEqual(kwargs["params"]["state"], "open")
931 @patch("requests.get")
932 def test_get_open_issues_404(self, mock_get):
933 """Test handling of 404 response"""
934 self.mock_response.status_code = 404
935 mock_get.return_value = self.mock_response
937 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
938 issues = get_open_issues()
940 self.assertEqual(issues, [])
942 @patch("requests.get")
943 @patch("time.sleep")
944 @patch("time.time")
945 def test_rate_limit_retry(self, mock_time, mock_sleep, mock_get):
946 """Test retry behavior when hitting rate limits"""
947 # Mock current time to have consistent test behavior
948 current_time = 1000000
949 mock_time.return_value = current_time
951 # Setup mock responses
952 rate_limited_response = MagicMock()
953 rate_limited_response.status_code = 403
954 rate_limited_response.headers = {
955 "X-RateLimit-Remaining": "0",
956 "X-RateLimit-Reset": str(current_time + 30), # Reset in 30 seconds
957 }
959 success_response = MagicMock()
960 success_response.status_code = 200
961 success_response.json.return_value = [
962 {
963 "title": "deposit Test Issue",
964 "body": "Test Body",
965 "number": 1,
966 "user": {"login": "test-user"},
967 "created_at": "2024-01-01T00:00:00Z",
968 "html_url": "https://github.com/test/1",
969 "labels": [],
970 }
971 ]
973 # First call hits rate limit, second call succeeds
974 mock_get.side_effect = [rate_limited_response, success_response]
976 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
977 issues = get_open_issues()
979 # Verify rate limit handling
980 self.assertEqual(len(issues), 1)
981 self.assertEqual(issues[0]["title"], "deposit Test Issue")
983 # Verify sleep was called with exactly 30 seconds
984 mock_sleep.assert_called_once_with(30)
986 # Verify correct API calls
987 self.assertEqual(mock_get.call_count, 2)
988 for call in mock_get.call_args_list:
989 args, kwargs = call
990 self.assertEqual(kwargs["params"]["state"], "open")
991 self.assertEqual(kwargs["headers"]["Authorization"], "Bearer fake-token")
993 @patch("requests.get")
994 def test_network_error_retry(self, mock_get):
995 """Test retry behavior on network errors"""
996 mock_get.side_effect = RequestException("Network error")
998 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
999 with self.assertRaises(RuntimeError) as context:
1000 get_open_issues()
1002 self.assertIn("Failed to fetch issues after 3 attempts", str(context.exception))
1003 self.assertEqual(mock_get.call_count, 3) # Verify 3 retry attempts
1005 @patch("requests.get")
1006 def test_get_open_issues_all_attempts_fail(self, mock_get):
1007 """Test that empty list is returned when all attempts fail without exception"""
1008 # Create response that fails but doesn't trigger retry logic
1009 failed_response = MagicMock()
1010 failed_response.status_code = 403
1011 # No rate limit headers, so won't trigger rate limit retry logic
1012 failed_response.headers = {}
1014 # Make all attempts return the same failed response
1015 mock_get.return_value = failed_response
1017 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
1018 issues = get_open_issues()
1020 # Verify empty list is returned
1021 self.assertEqual(issues, [])
1023 # Verify we tried MAX_RETRIES times
1024 self.assertEqual(mock_get.call_count, 3)
1026 @patch("requests.get")
1027 @patch("time.sleep")
1028 @patch("time.time")
1029 def test_rate_limit_already_expired(self, mock_time, mock_sleep, mock_get):
1030 """Test rate limit handling when reset time is in the past"""
1031 # Mock current time
1032 current_time = 1000000
1033 mock_time.return_value = current_time
1035 # Setup response with expired rate limit
1036 rate_limited_response = MagicMock()
1037 rate_limited_response.status_code = 403
1038 rate_limited_response.headers = {
1039 "X-RateLimit-Remaining": "0",
1040 "X-RateLimit-Reset": str(current_time - 30), # Reset time in the past
1041 }
1043 success_response = MagicMock()
1044 success_response.status_code = 200
1045 success_response.json.return_value = [
1046 {
1047 "title": "deposit Test Issue",
1048 "body": "Test Body",
1049 "number": 1,
1050 "user": {"login": "test-user"},
1051 "created_at": "2024-01-01T00:00:00Z",
1052 "html_url": "https://github.com/test/1",
1053 "labels": [],
1054 }
1055 ]
1057 # First call hits expired rate limit, second call succeeds
1058 mock_get.side_effect = [rate_limited_response, success_response]
1060 with patch.dict("os.environ", {"GH_TOKEN": "fake-token"}):
1061 issues = get_open_issues()
1063 # Verify rate limit handling
1064 self.assertEqual(len(issues), 1)
1065 self.assertEqual(issues[0]["title"], "deposit Test Issue")
1067 # Verify sleep was NOT called since rate limit was already expired
1068 mock_sleep.assert_not_called()
1070 # Verify correct API calls
1071 self.assertEqual(mock_get.call_count, 2)
1074class TestAnswerFunction(unittest.TestCase):
1075 """Test the answer function that updates GitHub issues"""
1077 def setUp(self):
1078 """Set up test environment before each test"""
1079 self.base_url = "https://api.github.com/repos/test-org/test-repo/issues"
1080 self.headers = {
1081 "Accept": "application/vnd.github+json",
1082 "Authorization": "Bearer fake-token",
1083 "X-GitHub-Api-Version": "2022-11-28",
1084 }
1085 self.issue_number = "123"
1087 # Setup environment variables
1088 self.env_patcher = patch.dict(
1089 "os.environ",
1090 {"GH_TOKEN": "fake-token", "GITHUB_REPOSITORY": "test-org/test-repo"},
1091 )
1092 self.env_patcher.start()
1094 def tearDown(self):
1095 """Clean up after each test"""
1096 self.env_patcher.stop()
1098 @patch("requests.post")
1099 @patch("requests.patch")
1100 def test_answer_valid_authorized(self, mock_patch, mock_post):
1101 """Test answering a valid issue from authorized user"""
1102 # Setup mock responses
1103 mock_post.return_value.status_code = 201
1104 mock_patch.return_value.status_code = 200
1106 # Call function
1107 answer(
1108 is_valid=True,
1109 message="Thank you for your contribution!",
1110 issue_number=self.issue_number,
1111 is_authorized=True,
1112 )
1114 # Verify label API call
1115 mock_post.assert_any_call(
1116 f"{self.base_url}/{self.issue_number}/labels",
1117 headers=self.headers,
1118 json={"labels": ["to be processed"]},
1119 timeout=30,
1120 )
1122 # Verify comment API call
1123 mock_post.assert_any_call(
1124 f"{self.base_url}/{self.issue_number}/comments",
1125 headers=self.headers,
1126 json={"body": "Thank you for your contribution!"},
1127 timeout=30,
1128 )
1130 # Verify issue closure API call
1131 mock_patch.assert_called_once_with(
1132 f"{self.base_url}/{self.issue_number}",
1133 headers=self.headers,
1134 json={"state": "closed"},
1135 timeout=30,
1136 )
1138 @patch("requests.post")
1139 @patch("requests.patch")
1140 def test_answer_invalid_authorized(self, mock_patch, mock_post):
1141 """Test answering an invalid issue from authorized user"""
1142 answer(
1143 is_valid=False,
1144 message="Invalid format",
1145 issue_number=self.issue_number,
1146 is_authorized=True,
1147 )
1149 # Verify correct label was used
1150 mock_post.assert_any_call(
1151 f"{self.base_url}/{self.issue_number}/labels",
1152 headers=self.headers,
1153 json={"labels": ["invalid"]},
1154 timeout=30,
1155 )
1157 @patch("requests.post")
1158 @patch("requests.patch")
1159 def test_answer_unauthorized(self, mock_patch, mock_post):
1160 """Test answering an issue from unauthorized user"""
1161 answer(
1162 is_valid=False,
1163 message="Unauthorized user",
1164 issue_number=self.issue_number,
1165 is_authorized=False,
1166 )
1168 # Verify correct label was used
1169 mock_post.assert_any_call(
1170 f"{self.base_url}/{self.issue_number}/labels",
1171 headers=self.headers,
1172 json={"labels": ["rejected"]},
1173 timeout=30,
1174 )
1176 @patch("requests.post")
1177 def test_answer_label_error(self, mock_post):
1178 """Test handling of API error when adding label"""
1179 mock_post.side_effect = RequestException("Network error")
1181 with self.assertRaises(RequestException):
1182 answer(
1183 is_valid=True,
1184 message="Test message",
1185 issue_number=self.issue_number,
1186 )
1188 @patch("requests.post")
1189 @patch("requests.patch")
1190 def test_answer_comment_error(self, mock_patch, mock_post):
1191 """Test handling of API error when adding comment"""
1192 # First post (label) succeeds, second post (comment) fails
1193 mock_post.side_effect = [
1194 MagicMock(status_code=201),
1195 RequestException("Network error"),
1196 ]
1198 with self.assertRaises(RequestException):
1199 answer(
1200 is_valid=True,
1201 message="Test message",
1202 issue_number=self.issue_number,
1203 )
1205 @patch("requests.post")
1206 @patch("requests.patch")
1207 def test_answer_close_error(self, mock_patch, mock_post):
1208 """Test handling of API error when closing issue"""
1209 mock_post.return_value = MagicMock(status_code=201)
1210 mock_patch.side_effect = RequestException("Network error")
1212 with self.assertRaises(RequestException):
1213 answer(
1214 is_valid=True,
1215 message="Test message",
1216 issue_number=self.issue_number,
1217 )
1220class TestZenodoDeposit(unittest.TestCase):
1221 """Test Zenodo deposit functionality"""
1223 def setUp(self):
1224 """Set up test environment before each test"""
1225 self.env_patcher = patch.dict(
1226 "os.environ",
1227 {
1228 "ZENODO_SANDBOX": "fake-sandbox-token",
1229 "ZENODO_PRODUCTION": "fake-prod-token",
1230 "ENVIRONMENT": "development",
1231 },
1232 )
1233 self.env_patcher.start()
1235 self.test_data = [
1236 {
1237 "data": {
1238 "title": "test deposit",
1239 "metadata": [{"id": "1", "title": "Test"}],
1240 "citations": [{"citing": "1", "cited": "2"}],
1241 },
1242 "provenance": {
1243 "generatedAtTime": "2024-01-01T00:00:00Z",
1244 "wasAttributedTo": 12345,
1245 "hadPrimarySource": "https://github.com/test/1",
1246 },
1247 }
1248 ]
1250 def tearDown(self):
1251 """Clean up after each test"""
1252 self.env_patcher.stop()
1253 if os.path.exists("data_to_store.json"):
1254 os.remove("data_to_store.json")
1256 @patch("requests.post")
1257 def test_create_deposition_resource(self, mock_post):
1258 """Test creation of Zenodo deposition resource"""
1259 mock_response = MagicMock()
1260 mock_response.json.return_value = {
1261 "id": "12345",
1262 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"},
1263 }
1264 mock_post.return_value = mock_response
1266 deposition_id, bucket = _create_deposition_resource(
1267 "2024-01-01", base_url="https://sandbox.zenodo.org/api"
1268 )
1270 self.assertEqual(deposition_id, "12345")
1271 self.assertEqual(bucket, "https://sandbox.zenodo.org/api/bucket/12345")
1273 # Verify API call
1274 mock_post.assert_called_once()
1275 args, kwargs = mock_post.call_args
1277 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})
1278 self.assertEqual(kwargs["headers"], {"Content-Type": "application/json"})
1279 self.assertEqual(kwargs["timeout"], 30)
1281 @patch("requests.put")
1282 def test_upload_data(self, mock_put):
1283 """Test uploading data file to Zenodo"""
1284 mock_put.return_value.status_code = 200
1285 mock_put.return_value.raise_for_status = lambda: None
1287 # Create test file
1288 with open("data_to_store.json", "w") as f:
1289 json.dump({"test": "data"}, f)
1291 _upload_data(
1292 "2024-01-01",
1293 "https://sandbox.zenodo.org/api/bucket/12345",
1294 base_url="https://sandbox.zenodo.org/api",
1295 )
1297 # Verify API call
1298 mock_put.assert_called_once()
1299 args, kwargs = mock_put.call_args
1301 self.assertEqual(
1302 args[0],
1303 "https://sandbox.zenodo.org/api/bucket/12345/2024-01-01_weekly_deposit.json",
1304 )
1305 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})
1306 self.assertEqual(kwargs["timeout"], 30)
1308 @patch("crowdsourcing.process_issues._create_deposition_resource")
1309 @patch("crowdsourcing.process_issues._upload_data")
1310 @patch("requests.post")
1311 def test_deposit_on_zenodo(self, mock_post, mock_upload, mock_create):
1312 """Test full Zenodo deposit process"""
1313 # Setup mocks
1314 mock_create.return_value = (
1315 "12345",
1316 "https://sandbox.zenodo.org/api/bucket/12345",
1317 )
1318 mock_post.return_value.status_code = 202 # Changed from 200 to 202 for publish
1319 mock_post.return_value.text = "" # Add this to avoid MagicMock text in error
1321 deposit_on_zenodo(self.test_data)
1323 # Verify API calls order and parameters
1324 mock_create.assert_called_once_with(
1325 datetime.now().strftime("%Y-%m-%d"),
1326 base_url="https://sandbox.zenodo.org/api", # Add base_url
1327 )
1328 mock_upload.assert_called_once_with(
1329 datetime.now().strftime("%Y-%m-%d"),
1330 "https://sandbox.zenodo.org/api/bucket/12345",
1331 base_url="https://sandbox.zenodo.org/api", # Add base_url
1332 )
1334 # Verify publish request
1335 mock_post.assert_called_once()
1336 args, kwargs = mock_post.call_args
1337 self.assertEqual(
1338 args[0],
1339 "https://sandbox.zenodo.org/api/deposit/depositions/12345/actions/publish",
1340 )
1341 self.assertEqual(kwargs["params"], {"access_token": "fake-sandbox-token"})
1342 self.assertEqual(kwargs["timeout"], 30)
1344 # Verify cleanup happened
1345 self.assertFalse(os.path.exists("data_to_store.json"))
1347 @patch("requests.post")
1348 def test_create_deposition_resource_error(self, mock_post):
1349 """Test error handling in deposition creation"""
1350 mock_post.side_effect = requests.RequestException("API Error")
1352 with self.assertRaises(requests.RequestException):
1353 _create_deposition_resource("2024-01-01")
1355 @patch("requests.put")
1356 def test_upload_data_error(self, mock_put):
1357 """Test error handling in data upload"""
1358 mock_put.side_effect = requests.RequestException("Upload Error")
1360 with open("data_to_store.json", "w") as f:
1361 json.dump({"test": "data"}, f)
1363 with self.assertRaises(requests.RequestException):
1364 _upload_data("2024-01-01", "https://zenodo.org/api/bucket/12345")
1366 @patch("crowdsourcing.process_issues._create_deposition_resource")
1367 def test_deposit_on_zenodo_create_error(self, mock_create):
1368 """Test error handling in full deposit process - creation error"""
1369 mock_create.side_effect = requests.RequestException("Creation Error")
1371 with self.assertRaises(requests.RequestException):
1372 deposit_on_zenodo(self.test_data)
1374 # Verify cleanup happened
1375 self.assertFalse(os.path.exists("data_to_store.json"))
1377 def test_deposit_development_environment(self):
1378 """Test deposit in development environment uses sandbox"""
1379 with patch("requests.post") as mock_post, patch("requests.put") as mock_put:
1380 # Mock create deposition
1381 mock_post.return_value.json.return_value = {
1382 "id": "12345",
1383 "links": {"bucket": "https://sandbox.zenodo.org/api/bucket/12345"},
1384 }
1385 mock_post.return_value.status_code = 201
1387 # Mock upload
1388 mock_put.return_value.status_code = 200
1389 mock_put.return_value.raise_for_status = lambda: None
1391 # Mock publish
1392 mock_post.return_value.status_code = 202
1394 deposit_on_zenodo(self.test_data)
1396 # Verify sandbox URL was used
1397 calls = mock_post.call_args_list
1398 self.assertTrue(any("sandbox.zenodo.org" in call[0][0] for call in calls))
1400 def test_deposit_production_environment(self):
1401 """Test deposit in production environment uses main Zenodo"""
1402 with patch.dict("os.environ", {"ENVIRONMENT": "production"}):
1403 with patch("requests.post") as mock_post, patch("requests.put") as mock_put:
1404 # Mock create deposition
1405 mock_post.return_value.json.return_value = {
1406 "id": "12345",
1407 "links": {"bucket": "https://zenodo.org/api/bucket/12345"},
1408 }
1409 mock_post.return_value.status_code = 201
1411 # Mock upload
1412 mock_put.return_value.status_code = 200
1413 mock_put.return_value.raise_for_status = lambda: None
1415 # Mock publish
1416 mock_post.return_value.status_code = 202
1418 deposit_on_zenodo(self.test_data)
1420 # Verify production URL was used
1421 calls = mock_post.call_args_list
1422 self.assertTrue(
1423 all("sandbox.zenodo.org" not in call[0][0] for call in calls)
1424 )
1426 def test_get_zenodo_token_development(self):
1427 """Test getting Zenodo token in development environment"""
1428 token = _get_zenodo_token()
1429 self.assertEqual(token, "fake-sandbox-token")
1431 def test_get_zenodo_token_production(self):
1432 """Test getting Zenodo token in production environment"""
1433 with patch.dict("os.environ", {"ENVIRONMENT": "production"}):
1434 token = _get_zenodo_token()
1435 self.assertEqual(token, "fake-prod-token")
1437 def test_get_zenodo_token_missing(self):
1438 """Test error when token is missing"""
1439 with patch.dict(
1440 "os.environ", {"ZENODO_SANDBOX": "", "ENVIRONMENT": "development"}
1441 ):
1442 with self.assertRaises(ValueError) as context:
1443 _get_zenodo_token()
1444 self.assertIn("ZENODO_SANDBOX token not found", str(context.exception))
1446 def test_get_zenodo_token_missing_production(self):
1447 """Test error when production token is missing"""
1448 with patch.dict(
1449 "os.environ",
1450 {
1451 "ENVIRONMENT": "production",
1452 "ZENODO_PRODUCTION": "", # Token mancante
1453 },
1454 ):
1455 with self.assertRaises(ValueError) as context:
1456 _get_zenodo_token()
1457 self.assertIn("ZENODO_PRODUCTION token not found", str(context.exception))
1459 @patch("crowdsourcing.process_issues._create_deposition_resource")
1460 @patch("crowdsourcing.process_issues._upload_data")
1461 @patch("requests.post")
1462 def test_deposit_on_zenodo_publish_error(self, mock_post, mock_upload, mock_create):
1463 """Test error handling when publish fails"""
1464 # Setup mocks
1465 mock_create.return_value = (
1466 "12345",
1467 "https://sandbox.zenodo.org/api/bucket/12345",
1468 )
1469 mock_post.return_value.status_code = 400 # Simula errore di pubblicazione
1470 mock_post.return_value.text = "Publication failed"
1472 with self.assertRaises(Exception) as context:
1473 deposit_on_zenodo(self.test_data)
1475 self.assertEqual(
1476 str(context.exception), "Failed to publish deposition: Publication failed"
1477 )
1479 # Verify cleanup happened even after error
1480 self.assertFalse(os.path.exists("data_to_store.json"))
1483class TestProcessOpenIssues(unittest.TestCase):
1484 """Test the main process_open_issues function"""
1486 def setUp(self):
1487 """Set up test environment"""
1488 self.env_patcher = patch.dict(
1489 "os.environ", {"GH_TOKEN": "fake-gh-token", "ZENODO": "fake-zenodo-token"}
1490 )
1491 self.env_patcher.start()
1493 # Sample issue data with properly formatted CSV and valid DOI
1494 self.sample_issue = {
1495 "title": "deposit journal.com doi:10.1007/s42835-022-01029-y",
1496 "body": """"id","title","author","pub_date","venue","volume","issue","page","type","publisher","editor"
1497"doi:10.1007/s42835-022-01029-y","Test Title","Test Author","2024","Test Journal","1","1","1-10","journal article","Test Publisher",""
1498"doi:10.1007/978-3-030-00668-6_8","Cited Paper","Another Author","2024","Another Journal","2","2","20-30","journal article","Test Publisher",""
1499===###===@@@===
1500"citing_id","cited_id"
1501"doi:10.1007/s42835-022-01029-y","doi:10.1007/978-3-030-00668-6_8\"""",
1502 "number": "1",
1503 "author": {"login": "test-user"},
1504 "createdAt": "2024-01-01T00:00:00Z",
1505 "url": "https://github.com/test/1",
1506 }
1508 def tearDown(self):
1509 """Clean up after each test"""
1510 self.env_patcher.stop()
1512 @patch("crowdsourcing.process_issues.get_open_issues")
1513 @patch("crowdsourcing.process_issues.get_user_id")
1514 @patch("crowdsourcing.process_issues.is_in_safe_list")
1515 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1516 @patch("crowdsourcing.process_issues.answer")
1517 def test_process_valid_authorized_issue(
1518 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues
1519 ):
1520 """Test processing a valid issue from authorized user"""
1521 # Setup mocks
1522 mock_get_issues.return_value = [self.sample_issue]
1523 mock_user_id.return_value = 12345
1524 mock_safe_list.return_value = True
1526 # Run function
1527 process_open_issues()
1529 # Verify user validation
1530 mock_user_id.assert_called_once_with("test-user")
1531 mock_safe_list.assert_called_once_with(12345)
1533 # Verify issue was processed
1534 mock_answer.assert_called_once()
1535 args, kwargs = mock_answer.call_args
1536 self.assertTrue(args[0]) # is_valid
1537 self.assertIn("Thank you", args[1]) # message
1538 self.assertEqual(args[2], "1") # issue_number
1539 self.assertTrue(kwargs["is_authorized"])
1541 # Verify data was deposited
1542 mock_deposit.assert_called_once()
1543 args, kwargs = mock_deposit.call_args
1544 deposited_data = args[0][0]
1545 self.assertEqual(deposited_data["data"]["title"], self.sample_issue["title"])
1546 self.assertEqual(
1547 deposited_data["provenance"]["wasAttributedTo"],
1548 f"https://api.github.com/user/{12345}",
1549 )
1551 @patch("crowdsourcing.process_issues.get_open_issues")
1552 @patch("crowdsourcing.process_issues.get_user_id")
1553 @patch("crowdsourcing.process_issues.is_in_safe_list")
1554 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1555 @patch("crowdsourcing.process_issues.answer")
1556 def test_process_unauthorized_user(
1557 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues
1558 ):
1559 """Test processing an issue from unauthorized user"""
1560 # Setup mocks
1561 mock_get_issues.return_value = [self.sample_issue]
1562 mock_user_id.return_value = 12345
1563 mock_safe_list.return_value = False
1565 # Run function
1566 process_open_issues()
1568 # Verify user was checked but not authorized
1569 mock_user_id.assert_called_once_with("test-user")
1570 mock_safe_list.assert_called_once_with(12345)
1572 # Verify appropriate response
1573 mock_answer.assert_called_once()
1574 args, kwargs = mock_answer.call_args
1575 self.assertFalse(args[0]) # is_valid
1576 self.assertIn("register as a trusted user", args[1]) # message
1577 self.assertEqual(args[2], "1") # issue_number
1578 self.assertFalse(kwargs["is_authorized"])
1580 # Verify no deposit was made
1581 mock_deposit.assert_not_called()
1583 @patch("crowdsourcing.process_issues.get_open_issues")
1584 @patch("crowdsourcing.process_issues.get_user_id")
1585 @patch("crowdsourcing.process_issues.is_in_safe_list")
1586 @patch("crowdsourcing.process_issues.validate")
1587 @patch("crowdsourcing.process_issues.get_data_to_store")
1588 @patch("crowdsourcing.process_issues.answer")
1589 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1590 def test_process_open_issues_data_processing_error(
1591 self,
1592 mock_deposit,
1593 mock_answer,
1594 mock_get_data,
1595 mock_validate,
1596 mock_safe_list,
1597 mock_user_id,
1598 mock_get_issues,
1599 ):
1600 """Test handling of get_data_to_store error for an issue"""
1601 # Setup mocks
1602 mock_get_issues.return_value = [self.sample_issue]
1603 mock_user_id.return_value = 12345
1604 mock_safe_list.return_value = True
1605 mock_validate.return_value = (True, "Valid data")
1606 mock_get_data.side_effect = Exception("Data processing error")
1608 # Run function
1609 process_open_issues()
1611 # Verify error was handled and processing continued
1612 mock_get_data.assert_called_once()
1613 mock_answer.assert_called_once()
1614 # Verify deposit wasn't attempted since no valid data was processed
1615 mock_deposit.assert_not_called()
1617 @patch("crowdsourcing.process_issues.get_open_issues")
1618 @patch("crowdsourcing.process_issues.get_user_id")
1619 @patch("crowdsourcing.process_issues.is_in_safe_list")
1620 @patch("crowdsourcing.process_issues.validate")
1621 @patch("crowdsourcing.process_issues.get_data_to_store")
1622 @patch("crowdsourcing.process_issues.answer")
1623 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1624 def test_process_open_issues_zenodo_deposit_error(
1625 self,
1626 mock_deposit,
1627 mock_answer,
1628 mock_get_data,
1629 mock_validate,
1630 mock_safe_list,
1631 mock_user_id,
1632 mock_get_issues,
1633 ):
1634 """Test handling of Zenodo deposit error"""
1635 # Setup mocks
1636 mock_get_issues.return_value = [self.sample_issue]
1637 mock_user_id.return_value = 12345
1638 mock_safe_list.return_value = True
1639 mock_validate.return_value = (True, "Valid data")
1640 mock_get_data.return_value = {"test": "data"}
1641 mock_deposit.side_effect = Exception("Zenodo deposit error")
1643 # Verify the Zenodo deposit error is re-raised
1644 with self.assertRaises(Exception) as context:
1645 process_open_issues()
1647 self.assertEqual(str(context.exception), "Zenodo deposit error")
1648 mock_deposit.assert_called_once()
1650 @patch("crowdsourcing.process_issues.get_open_issues")
1651 @patch("crowdsourcing.process_issues.get_user_id")
1652 @patch("crowdsourcing.process_issues.is_in_safe_list")
1653 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1654 @patch("crowdsourcing.process_issues.answer")
1655 def test_process_localhost_issue_skips_zenodo(
1656 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues
1657 ):
1658 """Test that issues with localhost domain are not deposited to Zenodo"""
1659 localhost_issue = self.sample_issue.copy()
1660 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y"
1662 mock_get_issues.return_value = [localhost_issue]
1663 mock_user_id.return_value = 12345
1664 mock_safe_list.return_value = True
1666 process_open_issues()
1668 mock_answer.assert_called_once()
1669 args, kwargs = mock_answer.call_args
1670 self.assertTrue(args[0]) # is_valid
1671 self.assertIn("Test deposit validated successfully", args[1]) # message indicates test
1672 self.assertEqual(args[2], "1") # issue_number
1673 self.assertTrue(kwargs["is_test"]) # is_test flag is True
1675 # Verify Zenodo deposit was NOT called
1676 mock_deposit.assert_not_called()
1678 @patch("crowdsourcing.process_issues.get_open_issues")
1679 @patch("crowdsourcing.process_issues.get_user_id")
1680 @patch("crowdsourcing.process_issues.is_in_safe_list")
1681 @patch("crowdsourcing.process_issues.deposit_on_zenodo")
1682 @patch("crowdsourcing.process_issues.answer")
1683 def test_process_mixed_localhost_and_production_issues(
1684 self, mock_answer, mock_deposit, mock_safe_list, mock_user_id, mock_get_issues
1685 ):
1686 """Test that only production issues are deposited when mixing localhost and production"""
1687 localhost_issue = self.sample_issue.copy()
1688 localhost_issue["title"] = "deposit localhost:330 doi:10.1007/s42835-022-01029-y"
1689 localhost_issue["number"] = "1"
1691 production_issue = self.sample_issue.copy()
1692 production_issue["title"] = "deposit journal.com doi:10.1007/s42835-022-01029-y"
1693 production_issue["number"] = "2"
1695 mock_get_issues.return_value = [localhost_issue, production_issue]
1696 mock_user_id.return_value = 12345
1697 mock_safe_list.return_value = True
1699 process_open_issues()
1701 # Verify both issues were validated and answered
1702 self.assertEqual(mock_answer.call_count, 2)
1704 # Verify Zenodo deposit was called only once with production data
1705 mock_deposit.assert_called_once()
1706 deposited_data = mock_deposit.call_args[0][0]
1707 self.assertEqual(len(deposited_data), 1)
1708 self.assertEqual(deposited_data[0]["data"]["domain"], "journal.com")
1711if __name__ == "__main__": # pragma: no cover
1712 unittest.main()