Coverage for test/provenance_conversion_test.py: 97%
106 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1import unittest
2from unittest.mock import patch
3import tempfile
4import zipfile
5import shutil
6from pathlib import Path
7from rdflib import Dataset, URIRef, Literal, Graph
10from oc_meta.run import provenance_conversion
12SAMPLE_JSONLD = '''
13{
14 "@context": "https://schema.org",
15 "@id": "http://example.org/entity1",
16 "@type": "CreativeWork",
17 "name": "Test Entity"
18}
19'''
20EXPECTED_NQUADS_CONTENT = '<http://example.org/entity1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> .\n<http://example.org/entity1> <https://schema.org/name> "Test Entity" .\n'
21INVALID_JSONLD = "{\"@context\": \"bad context\", \"@id\": \"bad_id\"}"
23class TestProvenanceConversionIntegration(unittest.TestCase):
24 """Integration test suite for provenance_conversion.py script using real files."""
26 def setUp(self):
27 """Create temporary directories and a sample zip file for testing."""
28 self.test_dir = Path(tempfile.mkdtemp())
29 self.input_dir = self.test_dir / "input"
30 self.output_dir = self.test_dir / "output"
31 self.input_dir.mkdir()
32 self.output_dir.mkdir()
34 # Create a nested structure and the zip file
35 self.prov_dir = self.input_dir / "ra" / "0610" / "10000" / "1000" / "prov"
36 self.prov_dir.mkdir(parents=True)
37 self.zip_path = self.prov_dir / "se.zip"
38 self.json_filename = "data.json"
39 with zipfile.ZipFile(self.zip_path, 'w') as zf:
40 zf.writestr(self.json_filename, SAMPLE_JSONLD)
42 def tearDown(self):
43 """Remove the temporary directory after tests."""
44 shutil.rmtree(self.test_dir)
46 def test_count_quads(self):
47 """Test the count_quads function."""
48 graph = Dataset()
49 graph.add((URIRef("ex:s1"), URIRef("ex:p1"), Literal("o1")))
50 graph.add((URIRef("ex:s2"), URIRef("ex:p2"), Literal("o2"), URIRef("ex:g1")))
51 self.assertEqual(provenance_conversion.count_quads(graph), 2)
52 self.assertEqual(provenance_conversion.count_quads(Dataset()), 0)
54 def test_convert_jsonld_to_nquads_success(self):
55 """Test successful conversion from JSON-LD to N-Quads."""
56 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(SAMPLE_JSONLD)
57 self.assertIsNotNone(graph)
58 self.assertIsNotNone(nquads)
59 self.assertIsInstance(graph, Dataset)
61 expected_dataset = Dataset()
62 subj = URIRef("http://example.org/entity1")
63 type_pred = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
64 schema_type = URIRef("http://schema.org/CreativeWork")
65 name_pred = URIRef("http://schema.org/name")
66 name_obj = Literal("Test Entity")
67 expected_dataset.add((subj, type_pred, schema_type))
68 expected_dataset.add((subj, name_pred, name_obj))
70 self.assertEqual(len(graph), len(expected_dataset))
71 # Compare quads since Dataset iterates as quads
72 actual_quads = set(graph.quads())
73 expected_quads = set(expected_dataset.quads())
74 self.assertEqual(actual_quads, expected_quads)
76 def test_convert_jsonld_to_nquads_failure(self):
77 """Test conversion failure with invalid JSON-LD."""
78 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(INVALID_JSONLD)
79 self.assertIsNone(graph)
80 self.assertIsNone(nquads)
82 def test_process_zip_file_success_integration(self):
83 """Test successful processing using real files and directories."""
84 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir)
86 self.assertTrue(result, "process_zip_file should return True on success")
88 expected_output_filename = "ra-0610-10000-1000-prov-se.nq"
89 expected_output_path = self.output_dir / expected_output_filename
90 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} was not created")
91 self.assertTrue(expected_output_path.is_file())
93 output_graph = Dataset()
94 try:
95 output_graph.parse(expected_output_path, format='nquads')
96 except Exception as e:
97 self.fail(f"Failed to parse the generated N-Quads file {expected_output_path}: {e}")
99 input_graph_for_check = Dataset()
100 input_graph_for_check.parse(data=SAMPLE_JSONLD, format='json-ld')
102 self.assertEqual(len(output_graph), len(input_graph_for_check),
103 f"Quad count mismatch: Output={len(output_graph)}, Expected={len(input_graph_for_check)}")
104 # Compare quads since Dataset iterates as quads
105 actual_quads = set(output_graph.quads())
106 expected_quads = set(input_graph_for_check.quads())
107 self.assertEqual(actual_quads, expected_quads,
108 "Output graph content does not match expected content")
110 def test_process_zip_file_no_json_integration(self):
111 """Test processing a zip file with no JSON content."""
112 no_json_zip_path = self.prov_dir / "no_json_se.zip"
113 with zipfile.ZipFile(no_json_zip_path, 'w') as zf:
114 zf.writestr("readme.txt", "This is not json")
116 result = provenance_conversion.process_zip_file(no_json_zip_path, self.output_dir, self.input_dir)
117 self.assertFalse(result)
118 expected_output_filename = "ra-0610-10000-1000-prov-no_json_se.nq"
119 self.assertFalse((self.output_dir / expected_output_filename).exists())
121 def test_process_zip_file_bad_zip_integration(self):
122 """Test processing a corrupt zip file."""
123 bad_zip_path = self.prov_dir / "bad_se.zip"
124 with open(bad_zip_path, 'wb') as f:
125 f.write(b"This is not a zip file content")
127 result = provenance_conversion.process_zip_file(bad_zip_path, self.output_dir, self.input_dir)
128 self.assertFalse(result)
129 expected_output_filename = "ra-0610-10000-1000-prov-bad_se.nq"
130 self.assertFalse((self.output_dir / expected_output_filename).exists())
132 def test_process_zip_file_conversion_fail_integration(self):
133 """Test processing a zip file with invalid JSON-LD content."""
134 invalid_json_zip_path = self.prov_dir / "invalid_json_se.zip"
135 with zipfile.ZipFile(invalid_json_zip_path, 'w') as zf:
136 zf.writestr("data.json", INVALID_JSONLD)
138 result = provenance_conversion.process_zip_file(invalid_json_zip_path, self.output_dir, self.input_dir)
139 self.assertFalse(result)
140 expected_output_filename = "ra-0610-10000-1000-prov-invalid_json_se.nq"
141 self.assertFalse((self.output_dir / expected_output_filename).exists())
143 @patch('oc_meta.run.provenance_conversion.count_quads')
144 def test_process_zip_file_checksum_fail_mocked_count(self, mock_count_quads):
145 """Test checksum failure by mocking the second count_quads call."""
146 # Let the real conversion and file writing happen
147 # Mock only the quad counting to force a mismatch
148 mock_count_quads.side_effect = [2, 1] # Input=2 (from real JSON-LD), Output=1 (mocked)
150 # Use the standard zip created in setUp
151 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir)
153 self.assertFalse(result, "process_zip_file should return False when checksum fails")
154 self.assertEqual(mock_count_quads.call_count, 2)
156 # Verify the output file WAS created (as checksum fails after writing)
157 expected_output_filename = "ra-0610-10000-1000-prov-se.nq"
158 expected_output_path = self.output_dir / expected_output_filename
159 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} should still exist after checksum failure")
162if __name__ == '__main__':
163 unittest.main()