Coverage for test/collect_identifiers_test.py: 98%

53 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4""" 

5Test script for collect_identifiers method with real CSV data. 

6This test verifies that all types of identifiers are correctly extracted. 

7""" 

8 

9import csv 

10import os 

11import unittest 

12 

13from oc_meta.core.curator import Curator 

14from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

15 

16 

17class TestCollectIdentifiers(unittest.TestCase): 

18 """Test suite for collect_identifiers method using real data.""" 

19 

20 @classmethod 

21 def setUpClass(cls): 

22 """Set up test fixtures with real CSV data.""" 

23 csv_path = os.path.join(os.path.dirname(__file__), 'test_data_collect_identifiers.csv') 

24 cls.test_data = [] 

25 

26 with open(csv_path, 'r', encoding='utf-8') as csvfile: 

27 reader = csv.DictReader(csvfile) 

28 for row in reader: 

29 cls.test_data.append(row) 

30 

31 ts_url = "http://localhost:9999/sparql" 

32 prov_config = "test_prov_config.yaml" 

33 

34 cls.counter_handler = RedisCounterHandler( 

35 host="localhost", 

36 port=6379, 

37 db=0 

38 ) 

39 

40 cls.curator = Curator( 

41 data=cls.test_data, 

42 ts=ts_url, 

43 prov_config=prov_config, 

44 counter_handler=cls.counter_handler, 

45 base_iri="https://w3id.org/oc/meta", 

46 prefix="060" 

47 ) 

48 

49 def test_collect_identifiers_structure(self): 

50 """Test that collect_identifiers returns the expected 3-tuple structure.""" 

51 result = self.curator.collect_identifiers(valid_dois_cache={}) 

52 

53 self.assertEqual(len(result), 3, "collect_identifiers should return 3 values") 

54 

55 metavals, identifiers, vvis = result 

56 

57 self.assertIsInstance(metavals, set) 

58 self.assertIsInstance(identifiers, set) 

59 self.assertIsInstance(vvis, set) 

60 

61 def test_identifiers_extraction(self): 

62 """Test that DOI, ISSN, ORCID, and Crossref identifiers are correctly extracted.""" 

63 metavals, identifiers, vvis = self.curator.collect_identifiers(valid_dois_cache={}) 

64 

65 expected_dois = [ 

66 'doi:10.17759/chp.2024200411', 

67 'doi:10.1016/j.molliq.2024.126787' 

68 ] 

69 

70 for doi in expected_dois: 

71 self.assertIn(doi, identifiers, f"DOI {doi} should be in identifiers") 

72 

73 expected_orcid = 'orcid:0000-0002-7915-1367' 

74 self.assertIn(expected_orcid, identifiers, f"ORCID {expected_orcid} should be in identifiers") 

75 

76 expected_crossrefs = ['crossref:7555', 'crossref:78', 'crossref:3434'] 

77 for crossref in expected_crossrefs: 

78 self.assertIn(crossref, identifiers, f"Crossref {crossref} should be in identifiers") 

79 

80 # Verify exact VVI values based on test data 

81 expected_vvis = { 

82 # Cultural-Historical Psychology volume 20, issue 4 

83 ('20', '4', '', ('issn:1816-5435', 'issn:2224-8935')), 

84 # Marmara University volume 30, issue 2  

85 ('30', '2', '', ('issn:2146-0590',)), 

86 # Journal of Environmental Chemical Engineering volume 13, issue 1 

87 ('13', '1', '', ('issn:2213-3437',)), 

88 # Radiology Case Reports volume 20, issue 3 

89 ('20', '3', '', ('issn:1930-0433',)), 

90 # Journal of Atmospheric volume 267, no issue 

91 ('267', '', '', ('issn:1364-6826',)), 

92 # Engineering Failure Analysis volume 169, no issue 

93 ('169', '', '', ('issn:1350-6307',)), 

94 # Construction and Building Materials volume 458, no issue 

95 ('458', '', '', ('issn:0950-0618',)), 

96 # Materials Science volume 188, no issue 

97 ('188', '', '', ('issn:1369-8001',)), 

98 # Journal of Molecular Liquids volume 419, no issue 

99 ('419', '', '', ('issn:0167-7322',)) 

100 } 

101 

102 self.assertIsInstance(vvis, set, "VVIs should be a set") 

103 

104 for vvi in vvis: 

105 self.assertEqual(len(vvi), 4, f"Each VVI should have 4 elements: {vvi}") 

106 volume, issue, venue_metaid, venue_ids_tuple = vvi 

107 

108 self.assertIsInstance(volume, str, f"Volume should be string: {volume}") 

109 self.assertIsInstance(issue, str, f"Issue should be string: {issue}") 

110 self.assertIsInstance(venue_metaid, (str, type(None)), f"Venue metaid should be string or None: {venue_metaid}") 

111 self.assertIsInstance(venue_ids_tuple, tuple, f"Venue IDs should be tuple: {venue_ids_tuple}") 

112 

113 self.assertEqual(len(vvis), len(expected_vvis), f"Expected {len(expected_vvis)} VVIs, got {len(vvis)}") 

114 

115 for expected_vvi in expected_vvis: 

116 self.assertIn(expected_vvi, vvis, f"Expected VVI {expected_vvi} should be present in collected VVIs") 

117 

118 venue_identifiers = [ 

119 'issn:1816-5435', 'issn:2224-8935', 'issn:2146-0590', 

120 'issn:2213-3437', 'issn:1930-0433', 'issn:1364-6826', 

121 'issn:1350-6307', 'issn:0950-0618', 'issn:1369-8001', 'issn:0167-7322' 

122 ] 

123 

124 for venue_id in venue_identifiers: 

125 self.assertNotIn(venue_id, identifiers, f"Venue ID {venue_id} should not be in main identifiers") 

126 

127 

128if __name__ == "__main__": 

129 unittest.main()