Coverage for test / oc_idmanager_test.py: 98%
62 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arcangelo Massari <arcangelo.massari@unibo.it>
2# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
3# SPDX-FileCopyrightText: 2023-2026 Marta Soricetti <marta.soricetti@unibo.it>
4#
5# SPDX-License-Identifier: ISC
8import json
9import os
10import unittest
11from os import makedirs
12from os.path import exists, join
14from oc_ds_converter.oc_idmanager import *
15from oc_ds_converter.oc_idmanager.jid import JIDManager
16from oc_ds_converter.oc_idmanager.url import URLManager
19class IdentifierManagerTest(unittest.TestCase):
20 """This class aim at testing identifiers manager."""
22 def setUp(self):
23 if not exists("tmp"):
24 makedirs("tmp")
26 test_dir = os.path.join("test","data")
27 with open(join(test_dir, "glob.json"), encoding="utf-8") as fp:
28 self.data = json.load(fp)
30 self.valid_wikipedia_1 = "30456"
31 self.valid_wikipedia_2 = "43744177" # category page
32 self.invalid_wikipedia_1 = "01267548"
33 self.invalid_wikipedia_2 = "Berlin_Wall"
35 self.valid_url_1 = "https://datacite.org/"
36 self.valid_url_2 = "opencitations.net"
37 self.valid_url_3 = "https://www.nih.gov/"
38 self.valid_url_4 = "https://it.wikipedia.org/wiki/Muro di Berlino"
39 self.invalid_url_1 = "https://www.nih.gov/invalid_url"
40 self.invalid_url_2 = "opencitations.net/not a real page .org" # not existing yet
42 def test_url_valid(self):
43 um_nofile = URLManager()
44 self.assertTrue(um_nofile.is_valid(self.valid_url_1))
45 self.assertTrue(um_nofile.is_valid(self.valid_url_2))
46 self.assertTrue(um_nofile.is_valid(self.valid_url_3))
47 self.assertTrue(um_nofile.is_valid(self.valid_url_4))
48 self.assertFalse(um_nofile.is_valid(self.invalid_url_1))
49 self.assertFalse(um_nofile.is_valid(self.invalid_url_2))
51 um_file = URLManager(self.data, use_api_service=False)
52 self.assertTrue(um_file.normalise(self.valid_url_1, include_prefix=True) in self.data)
53 self.assertTrue(um_file.normalise(self.valid_url_2, include_prefix=True) in self.data)
55 clean_data = {}
57 um_nofile_noapi = URLManager(clean_data, use_api_service=False)
58 self.assertTrue(um_nofile_noapi.is_valid(self.valid_url_1))
59 self.assertTrue(um_nofile_noapi.is_valid(self.invalid_url_1))
62 def test_wikipedia_normalise(self):
63 wpm = WikipediaManager()
64 self.assertTrue(
65 self.valid_wikipedia_1,
66 wpm.normalise("30456")
67 )
68 self.assertTrue(
69 self.valid_wikipedia_2,
70 wpm.normalise(self.valid_wikipedia_2)
71 )
72 self.assertTrue(
73 self.valid_wikipedia_2,
74 wpm.normalise("wikipedia" + self.valid_wikipedia_2)
75 )
77 def test_wikipedia_is_valid(self):
78 wpm = WikipediaManager()
79 self.assertTrue(wpm.is_valid(self.valid_wikipedia_1))
80 self.assertTrue(wpm.is_valid(self.valid_wikipedia_2))
81 self.assertFalse(wpm.is_valid(self.invalid_wikipedia_1))
82 self.assertFalse(wpm.is_valid(self.invalid_wikipedia_2))
84 wpm_file = WikipediaManager(self.data)
85 self.assertTrue(wpm_file.normalise(self.valid_wikipedia_1, include_prefix=True) in self.data)
86 self.assertTrue(wpm_file.normalise(self.valid_wikipedia_2, include_prefix=True) in self.data)
87 self.assertTrue(wpm_file.normalise(self.invalid_wikipedia_1, include_prefix=True) in self.data)
88 self.assertTrue(wpm_file.is_valid((wpm_file.normalise(self.valid_wikipedia_1, include_prefix=True))))
89 self.assertTrue(wpm_file.is_valid((wpm_file.normalise(self.valid_wikipedia_2, include_prefix=True))))
90 self.assertFalse(wpm_file.is_valid((wpm_file.normalise(self.invalid_wikipedia_1, include_prefix=True))))
92 clean_data = {}
93 wpm_nofile_noapi = WikipediaManager(clean_data, use_api_service=False)
94 self.assertTrue(wpm_nofile_noapi.is_valid(self.valid_wikipedia_1))
95 self.assertTrue(wpm_nofile_noapi.is_valid(self.valid_wikipedia_2))