Coverage for virtuoso_utilities / rebuild_fulltext_index.py: 56%
108 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-04-14 09:16 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-04-14 09:16 +0000
1#!/usr/bin/env python3
3# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7"""
8Utility for rebuilding the Virtuoso full-text index.
10This module provides functionality to rebuild the Virtuoso RDF Quad store's
11full-text index, which is used for optimal querying of RDF object values
12using the bif:contains function in SPARQL queries.
13"""
14import argparse
15import shutil
16import subprocess
17import sys
18import time
19from typing import Tuple
21from virtuoso_utilities.isql_helpers import run_isql_command
26def drop_fulltext_tables(args: argparse.Namespace) -> Tuple[bool, str, str]:
27 """
28 Drop the full-text index tables.
30 Args:
31 args: Command-line arguments containing connection details
33 Returns:
34 Tuple of (success, stdout, stderr)
35 """
36 sql_commands = [
37 "drop table DB.DBA.VTLOG_DB_DBA_RDF_OBJ;",
38 "drop table DB.DBA.RDF_OBJ_RO_FLAGS_WORDS;"
39 ]
41 print("Dropping existing full-text index tables...", file=sys.stderr)
43 for sql_command in sql_commands:
44 success, stdout, stderr = run_isql_command(args, sql_command=sql_command, ignore_errors=True)
46 # Don't fail if tables don't exist - this is expected on first run
47 if not success and "does not exist" not in stderr.lower():
48 return False, stdout, stderr
50 return True, "", ""
53def recreate_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]:
54 """
55 Recreate the full-text index.
57 Args:
58 args: Command-line arguments containing connection details
60 Returns:
61 Tuple of (success, stdout, stderr)
62 """
63 sql_command = """
64 DB.DBA.vt_create_text_index (
65 fix_identifier_case ('DB.DBA.RDF_OBJ'),
66 fix_identifier_case ('RO_FLAGS'),
67 fix_identifier_case ('RO_ID'),
68 0, 0, vector (), 1, '*ini*', 'UTF-8-QR');
69 """
71 print("Recreating full-text index...", file=sys.stderr)
73 return run_isql_command(args, sql_command=sql_command)
76def enable_batch_update(args: argparse.Namespace) -> Tuple[bool, str, str]:
77 """
78 Enable batch update for the full-text index.
80 Args:
81 args: Command-line arguments containing connection details
83 Returns:
84 Tuple of (success, stdout, stderr)
85 """
86 sql_command = "DB.DBA.vt_batch_update (fix_identifier_case ('DB.DBA.RDF_OBJ'), 'ON', 1);"
88 print("Enabling batch update for full-text index...", file=sys.stderr)
90 return run_isql_command(args, sql_command=sql_command)
93def refill_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]:
94 """
95 Refill the full-text index.
97 Args:
98 args: Command-line arguments containing connection details
100 Returns:
101 Tuple of (success, stdout, stderr)
102 """
103 sql_command = "DB.DBA.RDF_OBJ_FT_RECOVER();"
105 print("Refilling full-text index (this may take a while)...", file=sys.stderr)
107 return run_isql_command(args, sql_command=sql_command)
110def rebuild_fulltext_index(args: argparse.Namespace) -> bool:
111 """
112 Complete process to rebuild the Virtuoso full-text index.
114 This function will:
115 1. Drop existing full-text index tables
116 2. Recreate the index
117 3. Enable batch update
118 4. Refill the index
120 After this process completes, the Virtuoso database should be restarted
121 for optimal text index performance.
123 Args:
124 args: Command-line arguments containing connection details
126 Returns:
127 True if the rebuild process completed successfully, False otherwise
128 """
129 # Check prerequisites
130 if args.docker_container:
131 if not shutil.which('docker'):
132 print("Error: Docker command not found in PATH", file=sys.stderr)
133 return False
135 # Check if container is running
136 try:
137 result = subprocess.run(
138 ['docker', 'ps', '--format', '{{.Names}}'],
139 capture_output=True,
140 text=True,
141 timeout=30
142 )
144 if args.docker_container not in result.stdout:
145 print(f"Error: Container '{args.docker_container}' not found or not running", file=sys.stderr)
146 return False
148 except Exception as e:
149 print(f"Error checking container status: {e}", file=sys.stderr)
150 return False
151 else:
152 if not shutil.which('isql'):
153 print("Error: isql command not found in PATH", file=sys.stderr)
154 return False
156 # Step 1: Drop existing tables
157 success, _, stderr = drop_fulltext_tables(args)
158 if not success:
159 print(f"Error dropping full-text index tables: {stderr}", file=sys.stderr)
160 return False
162 # Step 2: Recreate index
163 success, _, stderr = recreate_fulltext_index(args)
164 if not success:
165 print(f"Error recreating full-text index: {stderr}", file=sys.stderr)
166 return False
168 # Step 3: Enable batch update
169 success, _, stderr = enable_batch_update(args)
170 if not success:
171 print(f"Error enabling batch update: {stderr}", file=sys.stderr)
172 return False
174 # Step 4: Refill index
175 success, _, stderr = refill_fulltext_index(args)
176 if not success:
177 print(f"Error refilling full-text index: {stderr}", file=sys.stderr)
178 return False
180 print("Full-text index rebuild completed successfully.", file=sys.stderr)
182 # Restart container if requested and using Docker
183 if args.restart_container and args.docker_container:
184 print("Restarting Docker container to activate text index...", file=sys.stderr)
186 try:
187 # Restart the container
188 result = subprocess.run(
189 ['docker', 'restart', args.docker_container],
190 capture_output=True,
191 text=True,
192 timeout=60
193 )
195 if result.returncode != 0:
196 print(f"Error restarting container: {result.stderr}", file=sys.stderr)
197 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr)
198 return True
200 print("Waiting for container to be ready after restart...", file=sys.stderr)
202 # Wait for container to be ready
203 max_wait = 60
204 wait_time = 0
205 while wait_time < max_wait:
206 try:
207 success, _, _ = run_isql_command(args, sql_command="status();", ignore_errors=True)
209 if success:
210 print("Container restarted and ready!", file=sys.stderr)
211 return True
213 except subprocess.TimeoutExpired:
214 pass
216 time.sleep(2)
217 wait_time += 2
219 print("Container restarted but may still be initializing.", file=sys.stderr)
221 except Exception as e:
222 print(f"Error during container restart: {e}", file=sys.stderr)
223 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr)
225 elif args.restart_container and not args.docker_container:
226 print("Note: --restart-container requires --docker-container to be specified.", file=sys.stderr)
227 print("Note: Restart the Virtuoso database manually for optimal text index performance.", file=sys.stderr)
229 else:
230 print("Note: Restart the Virtuoso database for optimal text index performance.", file=sys.stderr)
232 return True
235def parse_args() -> argparse.Namespace:
236 """Parse command-line arguments."""
237 parser = argparse.ArgumentParser(
238 description="Rebuild the Virtuoso full-text index."
239 )
241 parser.add_argument("--host", default="localhost", help="Virtuoso host")
242 parser.add_argument("--port", default="1111", help="Virtuoso port")
243 parser.add_argument("--user", default="dba", help="Virtuoso username")
244 parser.add_argument("--password", default="dba", help="Virtuoso password")
245 parser.add_argument(
246 "--docker-container",
247 help="Docker container name/ID to execute isql inside"
248 )
249 parser.add_argument(
250 "--restart-container",
251 action="store_true",
252 help="Restart the Docker container after rebuilding the index (recommended for full activation)"
253 )
255 # Add arguments required by isql_helpers
256 parser.add_argument("--docker-path", default="docker", help="Path to docker executable")
257 parser.add_argument("--docker-isql-path", default="isql", help="Path to isql inside container")
258 parser.add_argument("--isql-path", default="isql", help="Path to isql executable")
260 return parser.parse_args()
263def main() -> int:
264 """Main entry point."""
265 args = parse_args()
266 success = rebuild_fulltext_index(args)
267 return 0 if success else 1
270if __name__ == "__main__":
271 sys.exit(main())