Coverage for test / oc_idmanager_test.py: 98%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it> 

3# SPDX-FileCopyrightText: 2023-2026 Marta Soricetti <marta.soricetti@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7 

8import json 

9import os 

10import unittest 

11from os import makedirs 

12from os.path import exists, join 

13 

14from oc_ds_converter.oc_idmanager import * 

15from oc_ds_converter.oc_idmanager.jid import JIDManager 

16from oc_ds_converter.oc_idmanager.url import URLManager 

17 

18 

19class IdentifierManagerTest(unittest.TestCase): 

20 """This class aim at testing identifiers manager.""" 

21 

22 def setUp(self): 

23 if not exists("tmp"): 

24 makedirs("tmp") 

25 

26 test_dir = os.path.join("test","data") 

27 with open(join(test_dir, "glob.json"), encoding="utf-8") as fp: 

28 self.data = json.load(fp) 

29 

30 self.valid_wikipedia_1 = "30456" 

31 self.valid_wikipedia_2 = "43744177" # category page 

32 self.invalid_wikipedia_1 = "01267548" 

33 self.invalid_wikipedia_2 = "Berlin_Wall" 

34 

35 self.valid_url_1 = "https://datacite.org/" 

36 self.valid_url_2 = "opencitations.net" 

37 self.valid_url_3 = "https://www.nih.gov/" 

38 self.valid_url_4 = "https://it.wikipedia.org/wiki/Muro di Berlino" 

39 self.invalid_url_1 = "https://www.nih.gov/invalid_url" 

40 self.invalid_url_2 = "opencitations.net/not a real page .org" # not existing yet 

41 

42 def test_url_valid(self): 

43 um_nofile = URLManager() 

44 self.assertTrue(um_nofile.is_valid(self.valid_url_1)) 

45 self.assertTrue(um_nofile.is_valid(self.valid_url_2)) 

46 self.assertTrue(um_nofile.is_valid(self.valid_url_3)) 

47 self.assertTrue(um_nofile.is_valid(self.valid_url_4)) 

48 self.assertFalse(um_nofile.is_valid(self.invalid_url_1)) 

49 self.assertFalse(um_nofile.is_valid(self.invalid_url_2)) 

50 

51 um_file = URLManager(self.data, use_api_service=False) 

52 self.assertTrue(um_file.normalise(self.valid_url_1, include_prefix=True) in self.data) 

53 self.assertTrue(um_file.normalise(self.valid_url_2, include_prefix=True) in self.data) 

54 

55 clean_data = {} 

56 

57 um_nofile_noapi = URLManager(clean_data, use_api_service=False) 

58 self.assertTrue(um_nofile_noapi.is_valid(self.valid_url_1)) 

59 self.assertTrue(um_nofile_noapi.is_valid(self.invalid_url_1)) 

60 

61 

62 def test_wikipedia_normalise(self): 

63 wpm = WikipediaManager() 

64 self.assertTrue( 

65 self.valid_wikipedia_1, 

66 wpm.normalise("30456") 

67 ) 

68 self.assertTrue( 

69 self.valid_wikipedia_2, 

70 wpm.normalise(self.valid_wikipedia_2) 

71 ) 

72 self.assertTrue( 

73 self.valid_wikipedia_2, 

74 wpm.normalise("wikipedia" + self.valid_wikipedia_2) 

75 ) 

76 

77 def test_wikipedia_is_valid(self): 

78 wpm = WikipediaManager() 

79 self.assertTrue(wpm.is_valid(self.valid_wikipedia_1)) 

80 self.assertTrue(wpm.is_valid(self.valid_wikipedia_2)) 

81 self.assertFalse(wpm.is_valid(self.invalid_wikipedia_1)) 

82 self.assertFalse(wpm.is_valid(self.invalid_wikipedia_2)) 

83 

84 wpm_file = WikipediaManager(self.data) 

85 self.assertTrue(wpm_file.normalise(self.valid_wikipedia_1, include_prefix=True) in self.data) 

86 self.assertTrue(wpm_file.normalise(self.valid_wikipedia_2, include_prefix=True) in self.data) 

87 self.assertTrue(wpm_file.normalise(self.invalid_wikipedia_1, include_prefix=True) in self.data) 

88 self.assertTrue(wpm_file.is_valid((wpm_file.normalise(self.valid_wikipedia_1, include_prefix=True)))) 

89 self.assertTrue(wpm_file.is_valid((wpm_file.normalise(self.valid_wikipedia_2, include_prefix=True)))) 

90 self.assertFalse(wpm_file.is_valid((wpm_file.normalise(self.invalid_wikipedia_1, include_prefix=True)))) 

91 

92 clean_data = {} 

93 wpm_nofile_noapi = WikipediaManager(clean_data, use_api_service=False) 

94 self.assertTrue(wpm_nofile_noapi.is_valid(self.valid_wikipedia_1)) 

95 self.assertTrue(wpm_nofile_noapi.is_valid(self.valid_wikipedia_2))