Coverage for test/provenance_conversion_test.py: 97%

102 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1import unittest 

2from unittest.mock import patch 

3import tempfile 

4import zipfile 

5import shutil 

6from pathlib import Path 

7from rdflib import ConjunctiveGraph, URIRef, Literal, Graph 

8 

9 

10from oc_meta.run import provenance_conversion 

11 

12SAMPLE_JSONLD = ''' 

13{ 

14 "@context": "https://schema.org", 

15 "@id": "http://example.org/entity1", 

16 "@type": "CreativeWork", 

17 "name": "Test Entity" 

18} 

19''' 

20EXPECTED_NQUADS_CONTENT = '<http://example.org/entity1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> .\n<http://example.org/entity1> <https://schema.org/name> "Test Entity" .\n' 

21INVALID_JSONLD = "{\"@context\": \"bad context\", \"@id\": \"bad_id\"}" 

22 

23class TestProvenanceConversionIntegration(unittest.TestCase): 

24 """Integration test suite for provenance_conversion.py script using real files.""" 

25 

26 def setUp(self): 

27 """Create temporary directories and a sample zip file for testing.""" 

28 self.test_dir = Path(tempfile.mkdtemp()) 

29 self.input_dir = self.test_dir / "input" 

30 self.output_dir = self.test_dir / "output" 

31 self.input_dir.mkdir() 

32 self.output_dir.mkdir() 

33 

34 # Create a nested structure and the zip file 

35 self.prov_dir = self.input_dir / "ra" / "0610" / "10000" / "1000" / "prov" 

36 self.prov_dir.mkdir(parents=True) 

37 self.zip_path = self.prov_dir / "se.zip" 

38 self.json_filename = "data.json" 

39 with zipfile.ZipFile(self.zip_path, 'w') as zf: 

40 zf.writestr(self.json_filename, SAMPLE_JSONLD) 

41 

42 def tearDown(self): 

43 """Remove the temporary directory after tests.""" 

44 shutil.rmtree(self.test_dir) 

45 

46 def test_count_quads(self): 

47 """Test the count_quads function.""" 

48 graph = ConjunctiveGraph() 

49 graph.add((URIRef("ex:s1"), URIRef("ex:p1"), Literal("o1"))) 

50 graph.add((URIRef("ex:s2"), URIRef("ex:p2"), Literal("o2"), URIRef("ex:g1"))) 

51 self.assertEqual(provenance_conversion.count_quads(graph), 2) 

52 self.assertEqual(provenance_conversion.count_quads(ConjunctiveGraph()), 0) 

53 

54 def test_convert_jsonld_to_nquads_success(self): 

55 """Test successful conversion from JSON-LD to N-Quads.""" 

56 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(SAMPLE_JSONLD) 

57 self.assertIsNotNone(graph) 

58 self.assertIsNotNone(nquads) 

59 self.assertIsInstance(graph, ConjunctiveGraph) 

60 

61 expected_graph = Graph() 

62 subj = URIRef("http://example.org/entity1") 

63 type_pred = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") 

64 schema_type = URIRef("http://schema.org/CreativeWork") 

65 name_pred = URIRef("http://schema.org/name") 

66 name_obj = Literal("Test Entity") 

67 expected_graph.add((subj, type_pred, schema_type)) 

68 expected_graph.add((subj, name_pred, name_obj)) 

69 

70 self.assertEqual(len(graph), len(expected_graph)) 

71 self.assertTrue(graph.isomorphic(expected_graph)) 

72 

73 def test_convert_jsonld_to_nquads_failure(self): 

74 """Test conversion failure with invalid JSON-LD.""" 

75 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(INVALID_JSONLD) 

76 self.assertIsNone(graph) 

77 self.assertIsNone(nquads) 

78 

79 def test_process_zip_file_success_integration(self): 

80 """Test successful processing using real files and directories.""" 

81 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir) 

82 

83 self.assertTrue(result, "process_zip_file should return True on success") 

84 

85 expected_output_filename = "ra-0610-10000-1000-prov-se.nq" 

86 expected_output_path = self.output_dir / expected_output_filename 

87 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} was not created") 

88 self.assertTrue(expected_output_path.is_file()) 

89 

90 output_graph = ConjunctiveGraph() 

91 try: 

92 output_graph.parse(expected_output_path, format='nquads') 

93 except Exception as e: 

94 self.fail(f"Failed to parse the generated N-Quads file {expected_output_path}: {e}") 

95 

96 input_graph_for_check = ConjunctiveGraph() 

97 input_graph_for_check.parse(data=SAMPLE_JSONLD, format='json-ld') 

98 

99 self.assertEqual(len(output_graph), len(input_graph_for_check), 

100 f"Quad count mismatch: Output={len(output_graph)}, Expected={len(input_graph_for_check)}") 

101 self.assertTrue(output_graph.isomorphic(input_graph_for_check), 

102 "Output graph content does not match expected content") 

103 

104 def test_process_zip_file_no_json_integration(self): 

105 """Test processing a zip file with no JSON content.""" 

106 no_json_zip_path = self.prov_dir / "no_json_se.zip" 

107 with zipfile.ZipFile(no_json_zip_path, 'w') as zf: 

108 zf.writestr("readme.txt", "This is not json") 

109 

110 result = provenance_conversion.process_zip_file(no_json_zip_path, self.output_dir, self.input_dir) 

111 self.assertFalse(result) 

112 expected_output_filename = "ra-0610-10000-1000-prov-no_json_se.nq" 

113 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

114 

115 def test_process_zip_file_bad_zip_integration(self): 

116 """Test processing a corrupt zip file.""" 

117 bad_zip_path = self.prov_dir / "bad_se.zip" 

118 with open(bad_zip_path, 'wb') as f: 

119 f.write(b"This is not a zip file content") 

120 

121 result = provenance_conversion.process_zip_file(bad_zip_path, self.output_dir, self.input_dir) 

122 self.assertFalse(result) 

123 expected_output_filename = "ra-0610-10000-1000-prov-bad_se.nq" 

124 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

125 

126 def test_process_zip_file_conversion_fail_integration(self): 

127 """Test processing a zip file with invalid JSON-LD content.""" 

128 invalid_json_zip_path = self.prov_dir / "invalid_json_se.zip" 

129 with zipfile.ZipFile(invalid_json_zip_path, 'w') as zf: 

130 zf.writestr("data.json", INVALID_JSONLD) 

131 

132 result = provenance_conversion.process_zip_file(invalid_json_zip_path, self.output_dir, self.input_dir) 

133 self.assertFalse(result) 

134 expected_output_filename = "ra-0610-10000-1000-prov-invalid_json_se.nq" 

135 self.assertFalse((self.output_dir / expected_output_filename).exists()) 

136 

137 @patch('oc_meta.run.provenance_conversion.count_quads') 

138 def test_process_zip_file_checksum_fail_mocked_count(self, mock_count_quads): 

139 """Test checksum failure by mocking the second count_quads call.""" 

140 # Let the real conversion and file writing happen 

141 # Mock only the quad counting to force a mismatch 

142 mock_count_quads.side_effect = [2, 1] # Input=2 (from real JSON-LD), Output=1 (mocked) 

143 

144 # Use the standard zip created in setUp 

145 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir) 

146 

147 self.assertFalse(result, "process_zip_file should return False when checksum fails") 

148 self.assertEqual(mock_count_quads.call_count, 2) 

149 

150 # Verify the output file WAS created (as checksum fails after writing) 

151 expected_output_filename = "ra-0610-10000-1000-prov-se.nq" 

152 expected_output_path = self.output_dir / expected_output_filename 

153 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} should still exist after checksum failure") 

154 

155 

156if __name__ == '__main__': 

157 unittest.main()