Coverage for virtuoso_utilities / rebuild_fulltext_index.py: 56%
108 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 14:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 14:45 +0000
1#!/usr/bin/env python3
2"""
3Utility for rebuilding the Virtuoso full-text index.
5This module provides functionality to rebuild the Virtuoso RDF Quad store's
6full-text index, which is used for optimal querying of RDF object values
7using the bif:contains function in SPARQL queries.
8"""
9import argparse
10import shutil
11import subprocess
12import sys
13import time
14from typing import Tuple
16from virtuoso_utilities.isql_helpers import run_isql_command
21def drop_fulltext_tables(args: argparse.Namespace) -> Tuple[bool, str, str]:
22 """
23 Drop the full-text index tables.
25 Args:
26 args: Command-line arguments containing connection details
28 Returns:
29 Tuple of (success, stdout, stderr)
30 """
31 sql_commands = [
32 "drop table DB.DBA.VTLOG_DB_DBA_RDF_OBJ;",
33 "drop table DB.DBA.RDF_OBJ_RO_FLAGS_WORDS;"
34 ]
36 print("Dropping existing full-text index tables...", file=sys.stderr)
38 for sql_command in sql_commands:
39 success, stdout, stderr = run_isql_command(args, sql_command=sql_command, ignore_errors=True)
41 # Don't fail if tables don't exist - this is expected on first run
42 if not success and "does not exist" not in stderr.lower():
43 return False, stdout, stderr
45 return True, "", ""
48def recreate_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]:
49 """
50 Recreate the full-text index.
52 Args:
53 args: Command-line arguments containing connection details
55 Returns:
56 Tuple of (success, stdout, stderr)
57 """
58 sql_command = """
59 DB.DBA.vt_create_text_index (
60 fix_identifier_case ('DB.DBA.RDF_OBJ'),
61 fix_identifier_case ('RO_FLAGS'),
62 fix_identifier_case ('RO_ID'),
63 0, 0, vector (), 1, '*ini*', 'UTF-8-QR');
64 """
66 print("Recreating full-text index...", file=sys.stderr)
68 return run_isql_command(args, sql_command=sql_command)
71def enable_batch_update(args: argparse.Namespace) -> Tuple[bool, str, str]:
72 """
73 Enable batch update for the full-text index.
75 Args:
76 args: Command-line arguments containing connection details
78 Returns:
79 Tuple of (success, stdout, stderr)
80 """
81 sql_command = "DB.DBA.vt_batch_update (fix_identifier_case ('DB.DBA.RDF_OBJ'), 'ON', 1);"
83 print("Enabling batch update for full-text index...", file=sys.stderr)
85 return run_isql_command(args, sql_command=sql_command)
88def refill_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]:
89 """
90 Refill the full-text index.
92 Args:
93 args: Command-line arguments containing connection details
95 Returns:
96 Tuple of (success, stdout, stderr)
97 """
98 sql_command = "DB.DBA.RDF_OBJ_FT_RECOVER();"
100 print("Refilling full-text index (this may take a while)...", file=sys.stderr)
102 return run_isql_command(args, sql_command=sql_command)
105def rebuild_fulltext_index(args: argparse.Namespace) -> bool:
106 """
107 Complete process to rebuild the Virtuoso full-text index.
109 This function will:
110 1. Drop existing full-text index tables
111 2. Recreate the index
112 3. Enable batch update
113 4. Refill the index
115 After this process completes, the Virtuoso database should be restarted
116 for optimal text index performance.
118 Args:
119 args: Command-line arguments containing connection details
121 Returns:
122 True if the rebuild process completed successfully, False otherwise
123 """
124 # Check prerequisites
125 if args.docker_container:
126 if not shutil.which('docker'):
127 print("Error: Docker command not found in PATH", file=sys.stderr)
128 return False
130 # Check if container is running
131 try:
132 result = subprocess.run(
133 ['docker', 'ps', '--format', '{{.Names}}'],
134 capture_output=True,
135 text=True,
136 timeout=30
137 )
139 if args.docker_container not in result.stdout:
140 print(f"Error: Container '{args.docker_container}' not found or not running", file=sys.stderr)
141 return False
143 except Exception as e:
144 print(f"Error checking container status: {e}", file=sys.stderr)
145 return False
146 else:
147 if not shutil.which('isql'):
148 print("Error: isql command not found in PATH", file=sys.stderr)
149 return False
151 # Step 1: Drop existing tables
152 success, _, stderr = drop_fulltext_tables(args)
153 if not success:
154 print(f"Error dropping full-text index tables: {stderr}", file=sys.stderr)
155 return False
157 # Step 2: Recreate index
158 success, _, stderr = recreate_fulltext_index(args)
159 if not success:
160 print(f"Error recreating full-text index: {stderr}", file=sys.stderr)
161 return False
163 # Step 3: Enable batch update
164 success, _, stderr = enable_batch_update(args)
165 if not success:
166 print(f"Error enabling batch update: {stderr}", file=sys.stderr)
167 return False
169 # Step 4: Refill index
170 success, _, stderr = refill_fulltext_index(args)
171 if not success:
172 print(f"Error refilling full-text index: {stderr}", file=sys.stderr)
173 return False
175 print("Full-text index rebuild completed successfully.", file=sys.stderr)
177 # Restart container if requested and using Docker
178 if args.restart_container and args.docker_container:
179 print("Restarting Docker container to activate text index...", file=sys.stderr)
181 try:
182 # Restart the container
183 result = subprocess.run(
184 ['docker', 'restart', args.docker_container],
185 capture_output=True,
186 text=True,
187 timeout=60
188 )
190 if result.returncode != 0:
191 print(f"Error restarting container: {result.stderr}", file=sys.stderr)
192 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr)
193 return True
195 print("Waiting for container to be ready after restart...", file=sys.stderr)
197 # Wait for container to be ready
198 max_wait = 60
199 wait_time = 0
200 while wait_time < max_wait:
201 try:
202 success, _, _ = run_isql_command(args, sql_command="status();", ignore_errors=True)
204 if success:
205 print("Container restarted and ready!", file=sys.stderr)
206 return True
208 except subprocess.TimeoutExpired:
209 pass
211 time.sleep(2)
212 wait_time += 2
214 print("Container restarted but may still be initializing.", file=sys.stderr)
216 except Exception as e:
217 print(f"Error during container restart: {e}", file=sys.stderr)
218 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr)
220 elif args.restart_container and not args.docker_container:
221 print("Note: --restart-container requires --docker-container to be specified.", file=sys.stderr)
222 print("Note: Restart the Virtuoso database manually for optimal text index performance.", file=sys.stderr)
224 else:
225 print("Note: Restart the Virtuoso database for optimal text index performance.", file=sys.stderr)
227 return True
230def parse_args() -> argparse.Namespace:
231 """Parse command-line arguments."""
232 parser = argparse.ArgumentParser(
233 description="Rebuild the Virtuoso full-text index."
234 )
236 parser.add_argument("--host", default="localhost", help="Virtuoso host")
237 parser.add_argument("--port", default="1111", help="Virtuoso port")
238 parser.add_argument("--user", default="dba", help="Virtuoso username")
239 parser.add_argument("--password", default="dba", help="Virtuoso password")
240 parser.add_argument(
241 "--docker-container",
242 help="Docker container name/ID to execute isql inside"
243 )
244 parser.add_argument(
245 "--restart-container",
246 action="store_true",
247 help="Restart the Docker container after rebuilding the index (recommended for full activation)"
248 )
250 # Add arguments required by isql_helpers
251 parser.add_argument("--docker-path", default="docker", help="Path to docker executable")
252 parser.add_argument("--docker-isql-path", default="isql", help="Path to isql inside container")
253 parser.add_argument("--isql-path", default="isql", help="Path to isql executable")
255 return parser.parse_args()
258def main() -> int:
259 """Main entry point."""
260 args = parse_args()
261 success = rebuild_fulltext_index(args)
262 return 0 if success else 1
265if __name__ == "__main__":
266 sys.exit(main())