Coverage for test/collect_identifiers_test.py: 98%
53 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4"""
5Test script for collect_identifiers method with real CSV data.
6This test verifies that all types of identifiers are correctly extracted.
7"""
9import csv
10import os
11import unittest
13from oc_meta.core.curator import Curator
14from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
17class TestCollectIdentifiers(unittest.TestCase):
18 """Test suite for collect_identifiers method using real data."""
20 @classmethod
21 def setUpClass(cls):
22 """Set up test fixtures with real CSV data."""
23 csv_path = os.path.join(os.path.dirname(__file__), 'test_data_collect_identifiers.csv')
24 cls.test_data = []
26 with open(csv_path, 'r', encoding='utf-8') as csvfile:
27 reader = csv.DictReader(csvfile)
28 for row in reader:
29 cls.test_data.append(row)
31 ts_url = "http://localhost:9999/sparql"
32 prov_config = "test_prov_config.yaml"
34 cls.counter_handler = RedisCounterHandler(
35 host="localhost",
36 port=6379,
37 db=0
38 )
40 cls.curator = Curator(
41 data=cls.test_data,
42 ts=ts_url,
43 prov_config=prov_config,
44 counter_handler=cls.counter_handler,
45 base_iri="https://w3id.org/oc/meta",
46 prefix="060"
47 )
49 def test_collect_identifiers_structure(self):
50 """Test that collect_identifiers returns the expected 3-tuple structure."""
51 result = self.curator.collect_identifiers(valid_dois_cache={})
53 self.assertEqual(len(result), 3, "collect_identifiers should return 3 values")
55 metavals, identifiers, vvis = result
57 self.assertIsInstance(metavals, set)
58 self.assertIsInstance(identifiers, set)
59 self.assertIsInstance(vvis, set)
61 def test_identifiers_extraction(self):
62 """Test that DOI, ISSN, ORCID, and Crossref identifiers are correctly extracted."""
63 metavals, identifiers, vvis = self.curator.collect_identifiers(valid_dois_cache={})
65 expected_dois = [
66 'doi:10.17759/chp.2024200411',
67 'doi:10.1016/j.molliq.2024.126787'
68 ]
70 for doi in expected_dois:
71 self.assertIn(doi, identifiers, f"DOI {doi} should be in identifiers")
73 expected_orcid = 'orcid:0000-0002-7915-1367'
74 self.assertIn(expected_orcid, identifiers, f"ORCID {expected_orcid} should be in identifiers")
76 expected_crossrefs = ['crossref:7555', 'crossref:78', 'crossref:3434']
77 for crossref in expected_crossrefs:
78 self.assertIn(crossref, identifiers, f"Crossref {crossref} should be in identifiers")
80 # Verify exact VVI values based on test data
81 expected_vvis = {
82 # Cultural-Historical Psychology volume 20, issue 4
83 ('20', '4', '', ('issn:1816-5435', 'issn:2224-8935')),
84 # Marmara University volume 30, issue 2
85 ('30', '2', '', ('issn:2146-0590',)),
86 # Journal of Environmental Chemical Engineering volume 13, issue 1
87 ('13', '1', '', ('issn:2213-3437',)),
88 # Radiology Case Reports volume 20, issue 3
89 ('20', '3', '', ('issn:1930-0433',)),
90 # Journal of Atmospheric volume 267, no issue
91 ('267', '', '', ('issn:1364-6826',)),
92 # Engineering Failure Analysis volume 169, no issue
93 ('169', '', '', ('issn:1350-6307',)),
94 # Construction and Building Materials volume 458, no issue
95 ('458', '', '', ('issn:0950-0618',)),
96 # Materials Science volume 188, no issue
97 ('188', '', '', ('issn:1369-8001',)),
98 # Journal of Molecular Liquids volume 419, no issue
99 ('419', '', '', ('issn:0167-7322',))
100 }
102 self.assertIsInstance(vvis, set, "VVIs should be a set")
104 for vvi in vvis:
105 self.assertEqual(len(vvi), 4, f"Each VVI should have 4 elements: {vvi}")
106 volume, issue, venue_metaid, venue_ids_tuple = vvi
108 self.assertIsInstance(volume, str, f"Volume should be string: {volume}")
109 self.assertIsInstance(issue, str, f"Issue should be string: {issue}")
110 self.assertIsInstance(venue_metaid, (str, type(None)), f"Venue metaid should be string or None: {venue_metaid}")
111 self.assertIsInstance(venue_ids_tuple, tuple, f"Venue IDs should be tuple: {venue_ids_tuple}")
113 self.assertEqual(len(vvis), len(expected_vvis), f"Expected {len(expected_vvis)} VVIs, got {len(vvis)}")
115 for expected_vvi in expected_vvis:
116 self.assertIn(expected_vvi, vvis, f"Expected VVI {expected_vvi} should be present in collected VVIs")
118 venue_identifiers = [
119 'issn:1816-5435', 'issn:2224-8935', 'issn:2146-0590',
120 'issn:2213-3437', 'issn:1930-0433', 'issn:1364-6826',
121 'issn:1350-6307', 'issn:0950-0618', 'issn:1369-8001', 'issn:0167-7322'
122 ]
124 for venue_id in venue_identifiers:
125 self.assertNotIn(venue_id, identifiers, f"Venue ID {venue_id} should not be in main identifiers")
128if __name__ == "__main__":
129 unittest.main()