Coverage for test/provenance_conversion_test.py: 97%

106 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1import unittest 

2from unittest.mock import patch 

3import tempfile 

4import zipfile 

5import shutil 

6from pathlib import Path 

7from rdflib import Dataset, URIRef, Literal, Graph 

8 

9 

10from oc_meta.run import provenance_conversion 

11 

12SAMPLE_JSONLD = ''' 

13{ 

14 "@context": "https://schema.org", 

15 "@id": "http://example.org/entity1", 

16 "@type": "CreativeWork", 

17 "name": "Test Entity" 

18} 

19''' 

20EXPECTED_NQUADS_CONTENT = '<http://example.org/entity1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> .\n<http://example.org/entity1> <https://schema.org/name> "Test Entity" .\n' 

21INVALID_JSONLD = "{\"@context\": \"bad context\", \"@id\": \"bad_id\"}" 

22 

23class TestProvenanceConversionIntegration(unittest.TestCase): 

24 """Integration test suite for provenance_conversion.py script using real files.""" 

25 

26 def setUp(self): 

27 """Create temporary directories and a sample zip file for testing.""" 

28 self.test_dir = Path(tempfile.mkdtemp()) 

29 self.input_dir = self.test_dir / "input" 

30 self.output_dir = self.test_dir / "output" 

31 self.input_dir.mkdir() 

32 self.output_dir.mkdir() 

33 

34 # Create a nested structure and the zip file 

35 self.prov_dir = self.input_dir / "ra" / "0610" / "10000" / "1000" / "prov" 

36 self.prov_dir.mkdir(parents=True) 

37 self.zip_path = self.prov_dir / "se.zip" 

38 self.json_filename = "data.json" 

39 with zipfile.ZipFile(self.zip_path, 'w') as zf: 

40 zf.writestr(self.json_filename, SAMPLE_JSONLD) 

41 

42 def tearDown(self): 

43 """Remove the temporary directory after tests.""" 

44 shutil.rmtree(self.test_dir) 

45 

46 def test_count_quads(self): 

47 """Test the count_quads function.""" 

48 graph = Dataset() 

49 graph.add((URIRef("ex:s1"), URIRef("ex:p1"), Literal("o1"))) 

50 graph.add((URIRef("ex:s2"), URIRef("ex:p2"), Literal("o2"), URIRef("ex:g1"))) 

51 self.assertEqual(provenance_conversion.count_quads(graph), 2) 

52 self.assertEqual(provenance_conversion.count_quads(Dataset()), 0) 

53 

54 def test_convert_jsonld_to_nquads_success(self): 

55 """Test successful conversion from JSON-LD to N-Quads.""" 

56 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(SAMPLE_JSONLD) 

57 self.assertIsNotNone(graph) 

58 self.assertIsNotNone(nquads) 

59 self.assertIsInstance(graph, Dataset) 

60 

61 expected_dataset = Dataset() 

62 subj = URIRef("http://example.org/entity1") 

63 type_pred = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") 

64 schema_type = URIRef("http://schema.org/CreativeWork") 

65 name_pred = URIRef("http://schema.org/name") 

66 name_obj = Literal("Test Entity") 

67 expected_dataset.add((subj, type_pred, schema_type)) 

68 expected_dataset.add((subj, name_pred, name_obj)) 

69 

70 self.assertEqual(len(graph), len(expected_dataset)) 

71 # Compare quads since Dataset iterates as quads 

72 actual_quads = set(graph.quads()) 

73 expected_quads = set(expected_dataset.quads()) 

74 self.assertEqual(actual_quads, expected_quads) 

75 

76 def test_convert_jsonld_to_nquads_failure(self): 

77 """Test conversion failure with invalid JSON-LD.""" 

78 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(INVALID_JSONLD) 

79 self.assertIsNone(graph) 

80 self.assertIsNone(nquads) 

81 

82 def test_process_zip_file_success_integration(self): 

83 """Test successful processing using real files and directories.""" 

84 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir) 

85 

86 self.assertTrue(result, "process_zip_file should return True on success") 

87 

88 expected_output_filename = "ra-0610-10000-1000-prov-se.nq" 

89 expected_output_path = self.output_dir / expected_output_filename 

90 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} was not created") 

91 self.assertTrue(expected_output_path.is_file()) 

92 

93 output_graph = Dataset() 

94 try: 

95 output_graph.parse(expected_output_path, format='nquads') 

96 except Exception as e: 

97 self.fail(f"Failed to parse the generated N-Quads file {expected_output_path}: {e}") 

98 

99 input_graph_for_check = Dataset() 

100 input_graph_for_check.parse(data=SAMPLE_JSONLD, format='json-ld') 

101 

102 self.assertEqual(len(output_graph), len(input_graph_for_check), 

103 f"Quad count mismatch: Output={len(output_graph)}, Expected={len(input_graph_for_check)}") 

104 # Compare quads since Dataset iterates as quads 

105 actual_quads = set(output_graph.quads()) 

106 expected_quads = set(input_graph_for_check.quads()) 

107 self.assertEqual(actual_quads, expected_quads, 

108 "Output graph content does not match expected content") 

109 

110 def test_process_zip_file_no_json_integration(self): 

111 """Test processing a zip file with no JSON content.""" 

112 no_json_zip_path = self.prov_dir / "no_json_se.zip" 

113 with zipfile.ZipFile(no_json_zip_path, 'w') as zf: 

114 zf.writestr("readme.txt", "This is not json") 

115 

116 result = provenance_conversion.process_zip_file(no_json_zip_path, self.output_dir, self.input_dir) 

117 self.assertFalse(result) 

118 expected_output_filename = "ra-0610-10000-1000-prov-no_json_se.nq" 

119 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

120 

121 def test_process_zip_file_bad_zip_integration(self): 

122 """Test processing a corrupt zip file.""" 

123 bad_zip_path = self.prov_dir / "bad_se.zip" 

124 with open(bad_zip_path, 'wb') as f: 

125 f.write(b"This is not a zip file content") 

126 

127 result = provenance_conversion.process_zip_file(bad_zip_path, self.output_dir, self.input_dir) 

128 self.assertFalse(result) 

129 expected_output_filename = "ra-0610-10000-1000-prov-bad_se.nq" 

130 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

131 

132 def test_process_zip_file_conversion_fail_integration(self): 

133 """Test processing a zip file with invalid JSON-LD content.""" 

134 invalid_json_zip_path = self.prov_dir / "invalid_json_se.zip" 

135 with zipfile.ZipFile(invalid_json_zip_path, 'w') as zf: 

136 zf.writestr("data.json", INVALID_JSONLD) 

137 

138 result = provenance_conversion.process_zip_file(invalid_json_zip_path, self.output_dir, self.input_dir) 

139 self.assertFalse(result) 

140 expected_output_filename = "ra-0610-10000-1000-prov-invalid_json_se.nq" 

141 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

142 

143 @patch('oc_meta.run.provenance_conversion.count_quads') 

144 def test_process_zip_file_checksum_fail_mocked_count(self, mock_count_quads): 

145 """Test checksum failure by mocking the second count_quads call.""" 

146 # Let the real conversion and file writing happen 

147 # Mock only the quad counting to force a mismatch 

148 mock_count_quads.side_effect = [2, 1] # Input=2 (from real JSON-LD), Output=1 (mocked) 

149 

150 # Use the standard zip created in setUp 

151 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir) 

152 

153 self.assertFalse(result, "process_zip_file should return False when checksum fails") 

154 self.assertEqual(mock_count_quads.call_count, 2) 

155 

156 # Verify the output file WAS created (as checksum fails after writing) 

157 expected_output_filename = "ra-0610-10000-1000-prov-se.nq" 

158 expected_output_path = self.output_dir / expected_output_filename 

159 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} should still exist after checksum failure") 

160 

161 

162if __name__ == '__main__': 

163 unittest.main()