Coverage for test/provenance_conversion_test.py: 97%
102 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1import unittest
2from unittest.mock import patch
3import tempfile
4import zipfile
5import shutil
6from pathlib import Path
7from rdflib import ConjunctiveGraph, URIRef, Literal, Graph
10from oc_meta.run import provenance_conversion
12SAMPLE_JSONLD = '''
13{
14 "@context": "https://schema.org",
15 "@id": "http://example.org/entity1",
16 "@type": "CreativeWork",
17 "name": "Test Entity"
18}
19'''
20EXPECTED_NQUADS_CONTENT = '<http://example.org/entity1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> .\n<http://example.org/entity1> <https://schema.org/name> "Test Entity" .\n'
21INVALID_JSONLD = "{\"@context\": \"bad context\", \"@id\": \"bad_id\"}"
23class TestProvenanceConversionIntegration(unittest.TestCase):
24 """Integration test suite for provenance_conversion.py script using real files."""
26 def setUp(self):
27 """Create temporary directories and a sample zip file for testing."""
28 self.test_dir = Path(tempfile.mkdtemp())
29 self.input_dir = self.test_dir / "input"
30 self.output_dir = self.test_dir / "output"
31 self.input_dir.mkdir()
32 self.output_dir.mkdir()
34 # Create a nested structure and the zip file
35 self.prov_dir = self.input_dir / "ra" / "0610" / "10000" / "1000" / "prov"
36 self.prov_dir.mkdir(parents=True)
37 self.zip_path = self.prov_dir / "se.zip"
38 self.json_filename = "data.json"
39 with zipfile.ZipFile(self.zip_path, 'w') as zf:
40 zf.writestr(self.json_filename, SAMPLE_JSONLD)
42 def tearDown(self):
43 """Remove the temporary directory after tests."""
44 shutil.rmtree(self.test_dir)
46 def test_count_quads(self):
47 """Test the count_quads function."""
48 graph = ConjunctiveGraph()
49 graph.add((URIRef("ex:s1"), URIRef("ex:p1"), Literal("o1")))
50 graph.add((URIRef("ex:s2"), URIRef("ex:p2"), Literal("o2"), URIRef("ex:g1")))
51 self.assertEqual(provenance_conversion.count_quads(graph), 2)
52 self.assertEqual(provenance_conversion.count_quads(ConjunctiveGraph()), 0)
54 def test_convert_jsonld_to_nquads_success(self):
55 """Test successful conversion from JSON-LD to N-Quads."""
56 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(SAMPLE_JSONLD)
57 self.assertIsNotNone(graph)
58 self.assertIsNotNone(nquads)
59 self.assertIsInstance(graph, ConjunctiveGraph)
61 expected_graph = Graph()
62 subj = URIRef("http://example.org/entity1")
63 type_pred = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
64 schema_type = URIRef("http://schema.org/CreativeWork")
65 name_pred = URIRef("http://schema.org/name")
66 name_obj = Literal("Test Entity")
67 expected_graph.add((subj, type_pred, schema_type))
68 expected_graph.add((subj, name_pred, name_obj))
70 self.assertEqual(len(graph), len(expected_graph))
71 self.assertTrue(graph.isomorphic(expected_graph))
73 def test_convert_jsonld_to_nquads_failure(self):
74 """Test conversion failure with invalid JSON-LD."""
75 graph, nquads = provenance_conversion.convert_jsonld_to_nquads(INVALID_JSONLD)
76 self.assertIsNone(graph)
77 self.assertIsNone(nquads)
79 def test_process_zip_file_success_integration(self):
80 """Test successful processing using real files and directories."""
81 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir)
83 self.assertTrue(result, "process_zip_file should return True on success")
85 expected_output_filename = "ra-0610-10000-1000-prov-se.nq"
86 expected_output_path = self.output_dir / expected_output_filename
87 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} was not created")
88 self.assertTrue(expected_output_path.is_file())
90 output_graph = ConjunctiveGraph()
91 try:
92 output_graph.parse(expected_output_path, format='nquads')
93 except Exception as e:
94 self.fail(f"Failed to parse the generated N-Quads file {expected_output_path}: {e}")
96 input_graph_for_check = ConjunctiveGraph()
97 input_graph_for_check.parse(data=SAMPLE_JSONLD, format='json-ld')
99 self.assertEqual(len(output_graph), len(input_graph_for_check),
100 f"Quad count mismatch: Output={len(output_graph)}, Expected={len(input_graph_for_check)}")
101 self.assertTrue(output_graph.isomorphic(input_graph_for_check),
102 "Output graph content does not match expected content")
104 def test_process_zip_file_no_json_integration(self):
105 """Test processing a zip file with no JSON content."""
106 no_json_zip_path = self.prov_dir / "no_json_se.zip"
107 with zipfile.ZipFile(no_json_zip_path, 'w') as zf:
108 zf.writestr("readme.txt", "This is not json")
110 result = provenance_conversion.process_zip_file(no_json_zip_path, self.output_dir, self.input_dir)
111 self.assertFalse(result)
112 expected_output_filename = "ra-0610-10000-1000-prov-no_json_se.nq"
113 self.assertFalse((self.output_dir / expected_output_filename).exists())
115 def test_process_zip_file_bad_zip_integration(self):
116 """Test processing a corrupt zip file."""
117 bad_zip_path = self.prov_dir / "bad_se.zip"
118 with open(bad_zip_path, 'wb') as f:
119 f.write(b"This is not a zip file content")
121 result = provenance_conversion.process_zip_file(bad_zip_path, self.output_dir, self.input_dir)
122 self.assertFalse(result)
123 expected_output_filename = "ra-0610-10000-1000-prov-bad_se.nq"
124 self.assertFalse((self.output_dir / expected_output_filename).exists())
126 def test_process_zip_file_conversion_fail_integration(self):
127 """Test processing a zip file with invalid JSON-LD content."""
128 invalid_json_zip_path = self.prov_dir / "invalid_json_se.zip"
129 with zipfile.ZipFile(invalid_json_zip_path, 'w') as zf:
130 zf.writestr("data.json", INVALID_JSONLD)
132 result = provenance_conversion.process_zip_file(invalid_json_zip_path, self.output_dir, self.input_dir)
133 self.assertFalse(result)
134 expected_output_filename = "ra-0610-10000-1000-prov-invalid_json_se.nq"
135 self.assertFalse((self.output_dir / expected_output_filename).exists())
137 @patch('oc_meta.run.provenance_conversion.count_quads')
138 def test_process_zip_file_checksum_fail_mocked_count(self, mock_count_quads):
139 """Test checksum failure by mocking the second count_quads call."""
140 # Let the real conversion and file writing happen
141 # Mock only the quad counting to force a mismatch
142 mock_count_quads.side_effect = [2, 1] # Input=2 (from real JSON-LD), Output=1 (mocked)
144 # Use the standard zip created in setUp
145 result = provenance_conversion.process_zip_file(self.zip_path, self.output_dir, self.input_dir)
147 self.assertFalse(result, "process_zip_file should return False when checksum fails")
148 self.assertEqual(mock_count_quads.call_count, 2)
150 # Verify the output file WAS created (as checksum fails after writing)
151 expected_output_filename = "ra-0610-10000-1000-prov-se.nq"
152 expected_output_path = self.output_dir / expected_output_filename
153 self.assertTrue(expected_output_path.exists(), f"Output file {expected_output_path} should still exist after checksum failure")
156if __name__ == '__main__':
157 unittest.main()