Coverage for virtuoso_utilities / launch_virtuoso.py: 74%
407 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 14:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 14:45 +0000
1#!/usr/bin/env python3
2"""
3Virtuoso Docker Launcher
5This script launches an OpenLink Virtuoso database instance using Docker.
6Configuration parameters can be customized through command-line arguments.
7"""
9import argparse
10import configparser
11import os
12import re
13import subprocess
14import sys
15import time
16from typing import List, Tuple
18import psutil
20DEFAULT_WAIT_TIMEOUT = 120
21DOCKER_EXEC_PATH = "docker"
22DOCKER_ISQL_PATH_INSIDE_CONTAINER = "isql"
24# Default values for container configuration
25DEFAULT_IMAGE = "openlink/virtuoso-opensource-7@sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010"
26DEFAULT_CONTAINER_DATA_DIR = "/opt/virtuoso-opensource/database"
27DEFAULT_MAX_ROWS = 100000
29VIRTUOSO_MEMORY_PERCENTAGE = 0.85
30BYTES_PER_BUFFER = 8700 # Each buffer occupies ~8700 bytes (8K page + overhead) according to https://docs.openlinksw.com/virtuoso/ch-server/
32from virtuoso_utilities.isql_helpers import run_isql_command
34# Minimum database size in bytes to trigger MaxCheckpointRemap calculation
35MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB = 1
36MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP = MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB * 1024**3
38# Default directories allowed in Virtuoso
39DEFAULT_DIRS_ALLOWED = {".", "../vad", "/usr/share/proj", "../virtuoso_input"}
41# Connection error patterns for retry logic
42CONNECTION_ERROR_PATTERNS = [
43 "connection refused",
44 "connect failed",
45 "connection failed",
46 "cannot connect",
47 "no route to host",
48]
51def bytes_to_docker_mem_str(num_bytes: int) -> str:
52 """
53 Convert a number of bytes to a Docker memory string (e.g., "85g", "512m").
54 Tries to find the largest unit (G, M, K) without losing precision for integers.
55 """
56 if num_bytes % (1024**3) == 0:
57 return f"{num_bytes // (1024**3)}g"
58 elif num_bytes % (1024**2) == 0:
59 return f"{num_bytes // (1024**2)}m"
60 elif num_bytes % 1024 == 0:
61 return f"{num_bytes // 1024}k"
62 else:
63 # Fallback for non-exact multiples (shouldn't happen often with RAM)
64 # Prefer GiB for consistency
65 gb_val = num_bytes / (1024**3)
66 return f"{int(gb_val)}g"
69def parse_memory_value(memory_str: str) -> int:
70 """
71 Parse memory value from Docker memory format (e.g., "2g", "4096m") to bytes.
73 Args:
74 memory_str: Memory string in Docker format
76 Returns:
77 int: Memory size in bytes
78 """
79 memory_str = memory_str.lower()
81 match = re.match(r'^(\d+)([kmg]?)$', memory_str)
82 if not match:
83 # Default to 2GB if parsing fails
84 print(f"Warning: Could not parse memory string '{memory_str}'. Defaulting to 2g.", file=sys.stderr)
85 return 2 * 1024 * 1024 * 1024
87 value, unit = match.groups()
88 value = int(value)
90 if unit == 'k':
91 return value * 1024
92 elif unit == 'm':
93 return value * 1024 * 1024
94 elif unit == 'g':
95 return value * 1024 * 1024 * 1024
96 else: # No unit, assume bytes
97 return value
100def get_directory_size(directory_path: str) -> int:
101 """
102 Calculate the total size of all files within a directory.
104 Args:
105 directory_path: The path to the directory.
107 Returns:
108 Total size in bytes.
109 """
110 total_size = 0
111 if not os.path.isdir(directory_path):
112 return 0
113 try:
114 for dirpath, dirnames, filenames in os.walk(directory_path):
115 for f in filenames:
116 fp = os.path.join(dirpath, f)
117 # skip if it is symbolic link
118 if not os.path.islink(fp):
119 try:
120 total_size += os.path.getsize(fp)
121 except OSError as e:
122 print(f"Warning: Could not get size of file '{fp}': {e}", file=sys.stderr)
123 except OSError as e:
124 print(f"Warning: Could not walk directory '{directory_path}': {e}", file=sys.stderr)
126 return total_size
129def get_optimal_buffer_values(memory_limit: str) -> Tuple[int, int]:
130 """
131 Determine optimal values for NumberOfBuffers and MaxDirtyBuffers
132 based on the specified container memory limit.
134 Uses the formula recommended by OpenLink:
135 NumberOfBuffers = (MemoryInBytes * VIRTUOSO_MEMORY_PERCENTAGE * 0.66) / 8000
136 MaxDirtyBuffers = NumberOfBuffers * 0.75
138 The memory_limit is reduced by VIRTUOSO_MEMORY_PERCENTAGE to leave
139 headroom for Virtuoso process overhead and prevent container OOM crashes.
141 Args:
142 memory_limit: Memory limit string in Docker format (e.g., "2g", "4096m")
144 Returns:
145 Tuple[int, int]: Calculated values for NumberOfBuffers and MaxDirtyBuffers
146 """
147 try:
148 memory_bytes = parse_memory_value(memory_limit)
150 memory_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE)
152 number_of_buffers = int((memory_bytes * 0.66) / BYTES_PER_BUFFER)
154 max_dirty_buffers = int(number_of_buffers * 0.75)
156 return number_of_buffers, max_dirty_buffers
158 except Exception as e:
159 print(f"Warning: Error calculating buffer values: {e}. Using default values.", file=sys.stderr)
160 # Default values approximately suitable for 1-2GB RAM if calculation fails
161 return 170000, 130000
164def calculate_max_checkpoint_remap(size_bytes: int) -> int:
165 return int(size_bytes / 8192 / 4)
168def get_default_memory() -> str:
169 try:
170 total_ram = psutil.virtual_memory().total
171 default_mem = max(int(total_ram * (2 / 3)), 1 * 1024**3)
172 return bytes_to_docker_mem_str(default_mem)
173 except Exception:
174 return "2g"
177def calculate_threading_config(parallel_threads=None):
178 cpu_cores = parallel_threads if parallel_threads else (os.cpu_count() or 1)
179 return {
180 "async_queue_max_threads": int(cpu_cores * 1.5),
181 "threads_per_query": cpu_cores,
182 "max_client_connections": cpu_cores * 2,
183 }
186def calculate_max_query_mem(memory, number_of_buffers):
187 buffer_memory_bytes = number_of_buffers * BYTES_PER_BUFFER
188 effective_memory_bytes = int(parse_memory_value(memory) * VIRTUOSO_MEMORY_PERCENTAGE)
189 max_query_mem_bytes = int((effective_memory_bytes - buffer_memory_bytes) * 0.8)
190 if max_query_mem_bytes > 0:
191 return bytes_to_docker_mem_str(max_query_mem_bytes)
192 return None
195def get_virt_env_vars(memory, number_of_buffers, max_dirty_buffers, parallel_threads, estimated_db_size_gb=0.0, dirs_allowed=None):
196 env_vars = {}
197 env_vars["VIRT_Parameters_NumberOfBuffers"] = str(number_of_buffers)
198 env_vars["VIRT_Parameters_MaxDirtyBuffers"] = str(max_dirty_buffers)
200 threading = calculate_threading_config(parallel_threads)
201 env_vars["VIRT_Parameters_AsyncQueueMaxThreads"] = str(threading["async_queue_max_threads"])
202 env_vars["VIRT_Parameters_ThreadsPerQuery"] = str(threading["threads_per_query"])
203 env_vars["VIRT_Parameters_MaxClientConnections"] = str(threading["max_client_connections"])
204 env_vars["VIRT_HTTPServer_ServerThreads"] = str(threading["max_client_connections"])
206 env_vars["VIRT_Parameters_AdjustVectorSize"] = "0"
207 env_vars["VIRT_Parameters_VectorSize"] = "1000"
208 env_vars["VIRT_Parameters_CheckpointInterval"] = "1"
209 env_vars["VIRT_Parameters_ThreadCleanupInterval"] = "1"
210 env_vars["VIRT_Parameters_ResourcesCleanupInterval"] = "1"
212 max_query_mem = calculate_max_query_mem(memory, number_of_buffers)
213 max_query_mem_str = max_query_mem if max_query_mem else "N/A"
214 if max_query_mem:
215 env_vars["VIRT_Parameters_MaxQueryMem"] = max_query_mem
217 env_vars["VIRT_Client_SQL_QUERY_TIMEOUT"] = "0"
218 env_vars["VIRT_Client_SQL_TXN_TIMEOUT"] = "0"
220 if estimated_db_size_gb > 0:
221 estimated_size_bytes = int(estimated_db_size_gb * 1024**3)
222 if estimated_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP:
223 max_checkpoint_remap = calculate_max_checkpoint_remap(estimated_size_bytes)
224 env_vars["VIRT_Database_MaxCheckpointRemap"] = str(max_checkpoint_remap)
225 env_vars["VIRT_TempDatabase_MaxCheckpointRemap"] = str(max_checkpoint_remap)
226 print(f"Info: Using estimated database size of {estimated_db_size_gb} GB to set MaxCheckpointRemap to {max_checkpoint_remap}")
228 if dirs_allowed:
229 env_vars["VIRT_Parameters_DirsAllowed"] = dirs_allowed
231 print(f"Info: Threading: AsyncQueueMaxThreads={threading['async_queue_max_threads']}, "
232 f"ThreadsPerQuery={threading['threads_per_query']}, "
233 f"MaxClientConnections={threading['max_client_connections']}")
234 print(f"Info: MaxQueryMem={max_query_mem_str}, AdjustVectorSize=0, VectorSize=1000, CheckpointInterval=1, ThreadCleanupInterval=1, ResourcesCleanupInterval=1")
236 return env_vars
239def is_connection_error(stderr):
240 stderr_lower = stderr.lower()
241 return any(err in stderr_lower for err in CONNECTION_ERROR_PATTERNS)
244def create_isql_args(dba_password, docker_container=None):
245 if docker_container:
246 return argparse.Namespace(
247 host="localhost",
248 port=1111,
249 user="dba",
250 password=dba_password,
251 docker_container=docker_container,
252 docker_path=DOCKER_EXEC_PATH,
253 docker_isql_path=DOCKER_ISQL_PATH_INSIDE_CONTAINER,
254 isql_path=None,
255 )
256 return argparse.Namespace(
257 host="localhost",
258 port=1111,
259 user="dba",
260 password=dba_password,
261 docker_container=None,
262 docker_path=None,
263 docker_isql_path=None,
264 isql_path="isql",
265 )
268def update_ini_memory_settings(
269 ini_path: str,
270 data_dir_path: str,
271 number_of_buffers: int = None,
272 max_dirty_buffers: int = None,
273 dirs_allowed: str = None,
274 async_queue_max_threads: int = None,
275 threads_per_query: int = None,
276 max_client_connections: int = None,
277 adjust_vector_size: int = None,
278 vector_size: int = None,
279 checkpoint_interval: int = None,
280 max_query_mem: str = None,
281 http_server_threads: int = None,
282 thread_cleanup_interval: int = None,
283 resources_cleanup_interval: int = None,
284):
285 if not os.path.exists(ini_path):
286 print(f"Info: virtuoso.ini not found at '{ini_path}'. Likely first run. Skipping settings update.")
287 return
289 print(f"Info: Checking existing virtuoso.ini at '{ini_path}' for settings update...")
290 actual_db_size_bytes = get_directory_size(data_dir_path)
292 # Calculate MaxCheckpointRemap if database is large enough
293 calculate_remap = actual_db_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP
294 calculated_remap_value = calculate_max_checkpoint_remap(actual_db_size_bytes) if calculate_remap else None
296 config = configparser.ConfigParser(interpolation=None, strict=False)
297 config.optionxform = str # Keep case sensitivity
298 made_changes = False
299 try:
300 # Read with UTF-8, ignore errors initially if file has issues
301 config.read(ini_path, encoding='utf-8')
303 # Update [Parameters] section for buffer settings and DirsAllowed
304 if not config.has_section('Parameters'):
305 config.add_section('Parameters')
306 print(f"Info: Added [Parameters] section to '{ini_path}'.")
308 # Update NumberOfBuffers if provided
309 if number_of_buffers is not None:
310 current_number_of_buffers = config.get('Parameters', 'NumberOfBuffers', fallback=None)
311 number_of_buffers_str = str(number_of_buffers)
312 if current_number_of_buffers != number_of_buffers_str:
313 config.set('Parameters', 'NumberOfBuffers', number_of_buffers_str)
314 print(f"Info: Updating [Parameters] NumberOfBuffers from '{current_number_of_buffers}' to '{number_of_buffers_str}' in '{ini_path}'.")
315 made_changes = True
317 # Ensure [Client] section has SQL timeouts set to 0
318 if not config.has_section('Client'):
319 config.add_section('Client')
320 print(f"Info: Added [Client] section to '{ini_path}'.")
322 current_sql_query_timeout = config.get('Client', 'SQL_QUERY_TIMEOUT', fallback=None)
323 if current_sql_query_timeout != '0':
324 config.set('Client', 'SQL_QUERY_TIMEOUT', '0')
325 print(f"Info: Setting [Client] SQL_QUERY_TIMEOUT to '0' in '{ini_path}'.")
326 made_changes = True
328 current_sql_txn_timeout = config.get('Client', 'SQL_TXN_TIMEOUT', fallback=None)
329 if current_sql_txn_timeout != '0':
330 config.set('Client', 'SQL_TXN_TIMEOUT', '0')
331 print(f"Info: Setting [Client] SQL_TXN_TIMEOUT to '0' in '{ini_path}'.")
332 made_changes = True
334 # Update MaxDirtyBuffers if provided
335 if max_dirty_buffers is not None:
336 current_max_dirty_buffers = config.get('Parameters', 'MaxDirtyBuffers', fallback=None)
337 max_dirty_buffers_str = str(max_dirty_buffers)
338 if current_max_dirty_buffers != max_dirty_buffers_str:
339 config.set('Parameters', 'MaxDirtyBuffers', max_dirty_buffers_str)
340 print(f"Info: Updating [Parameters] MaxDirtyBuffers from '{current_max_dirty_buffers}' to '{max_dirty_buffers_str}' in '{ini_path}'.")
341 made_changes = True
343 if dirs_allowed is not None:
344 current_dirs_allowed = config.get('Parameters', 'DirsAllowed', fallback=None)
345 def normalize_dirs(val):
346 if val is None:
347 return set()
348 return set([x.strip() for x in val.split(',') if x.strip()])
349 if normalize_dirs(current_dirs_allowed) != normalize_dirs(dirs_allowed):
350 config.set('Parameters', 'DirsAllowed', dirs_allowed)
351 print(f"Info: Updating [Parameters] DirsAllowed from '{current_dirs_allowed}' to '{dirs_allowed}' in '{ini_path}'.")
352 made_changes = True
354 if async_queue_max_threads is not None:
355 current_val = config.get('Parameters', 'AsyncQueueMaxThreads', fallback=None)
356 new_val = str(async_queue_max_threads)
357 if current_val != new_val:
358 config.set('Parameters', 'AsyncQueueMaxThreads', new_val)
359 print(f"Info: Updating [Parameters] AsyncQueueMaxThreads from '{current_val}' to '{new_val}' in '{ini_path}'.")
360 made_changes = True
362 if threads_per_query is not None:
363 current_val = config.get('Parameters', 'ThreadsPerQuery', fallback=None)
364 new_val = str(threads_per_query)
365 if current_val != new_val:
366 config.set('Parameters', 'ThreadsPerQuery', new_val)
367 print(f"Info: Updating [Parameters] ThreadsPerQuery from '{current_val}' to '{new_val}' in '{ini_path}'.")
368 made_changes = True
370 if max_client_connections is not None:
371 current_val = config.get('Parameters', 'MaxClientConnections', fallback=None)
372 new_val = str(max_client_connections)
373 if current_val != new_val:
374 config.set('Parameters', 'MaxClientConnections', new_val)
375 print(f"Info: Updating [Parameters] MaxClientConnections from '{current_val}' to '{new_val}' in '{ini_path}'.")
376 made_changes = True
378 if adjust_vector_size is not None:
379 current_val = config.get('Parameters', 'AdjustVectorSize', fallback=None)
380 new_val = str(adjust_vector_size)
381 if current_val != new_val:
382 config.set('Parameters', 'AdjustVectorSize', new_val)
383 print(f"Info: Updating [Parameters] AdjustVectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.")
384 made_changes = True
386 if vector_size is not None:
387 current_val = config.get('Parameters', 'VectorSize', fallback=None)
388 new_val = str(vector_size)
389 if current_val != new_val:
390 config.set('Parameters', 'VectorSize', new_val)
391 print(f"Info: Updating [Parameters] VectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.")
392 made_changes = True
394 if checkpoint_interval is not None:
395 current_val = config.get('Parameters', 'CheckpointInterval', fallback=None)
396 new_val = str(checkpoint_interval)
397 if current_val != new_val:
398 config.set('Parameters', 'CheckpointInterval', new_val)
399 print(f"Info: Updating [Parameters] CheckpointInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
400 made_changes = True
402 if max_query_mem is not None:
403 current_val = config.get('Parameters', 'MaxQueryMem', fallback=None)
404 if current_val != max_query_mem:
405 config.set('Parameters', 'MaxQueryMem', max_query_mem)
406 print(f"Info: Updating [Parameters] MaxQueryMem from '{current_val}' to '{max_query_mem}' in '{ini_path}'.")
407 made_changes = True
409 if http_server_threads is not None:
410 if not config.has_section('HTTPServer'):
411 config.add_section('HTTPServer')
412 print(f"Info: Added [HTTPServer] section to '{ini_path}'.")
413 current_val = config.get('HTTPServer', 'ServerThreads', fallback=None)
414 new_val = str(http_server_threads)
415 if current_val != new_val:
416 config.set('HTTPServer', 'ServerThreads', new_val)
417 print(f"Info: Updating [HTTPServer] ServerThreads from '{current_val}' to '{new_val}' in '{ini_path}'.")
418 made_changes = True
420 if thread_cleanup_interval is not None:
421 current_val = config.get('Parameters', 'ThreadCleanupInterval', fallback=None)
422 new_val = str(thread_cleanup_interval)
423 if current_val != new_val:
424 config.set('Parameters', 'ThreadCleanupInterval', new_val)
425 print(f"Info: Updating [Parameters] ThreadCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
426 made_changes = True
428 if resources_cleanup_interval is not None:
429 current_val = config.get('Parameters', 'ResourcesCleanupInterval', fallback=None)
430 new_val = str(resources_cleanup_interval)
431 if current_val != new_val:
432 config.set('Parameters', 'ResourcesCleanupInterval', new_val)
433 print(f"Info: Updating [Parameters] ResourcesCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
434 made_changes = True
436 # Update MaxCheckpointRemap if database is large enough
437 if calculate_remap:
438 # Update [Database] section
439 if not config.has_section('Database'):
440 config.add_section('Database')
441 print(f"Info: Added [Database] section to '{ini_path}'.")
443 current_db_remap = config.get('Database', 'MaxCheckpointRemap', fallback=None)
444 calculated_remap_str = str(calculated_remap_value)
445 if current_db_remap != calculated_remap_str:
446 config.set('Database', 'MaxCheckpointRemap', calculated_remap_str)
447 print(f"Info: Updating [Database] MaxCheckpointRemap from '{current_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.")
448 made_changes = True
450 # Update [TempDatabase] section
451 if not config.has_section('TempDatabase'):
452 config.add_section('TempDatabase')
453 print(f"Info: Added [TempDatabase] section to '{ini_path}'.")
455 current_temp_db_remap = config.get('TempDatabase', 'MaxCheckpointRemap', fallback=None)
456 if current_temp_db_remap != calculated_remap_str:
457 config.set('TempDatabase', 'MaxCheckpointRemap', calculated_remap_str)
458 print(f"Info: Updating [TempDatabase] MaxCheckpointRemap from '{current_temp_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.")
459 made_changes = True
460 else:
461 print(f"Info: Host data directory '{data_dir_path}' size ({actual_db_size_bytes / (1024**3):.2f} GiB) is below threshold ({MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB} GiB). No changes made to MaxCheckpointRemap in virtuoso.ini.")
463 if made_changes:
464 # Write changes back with UTF-8 encoding
465 with open(ini_path, 'w', encoding='utf-8') as configfile:
466 config.write(configfile)
467 print(f"Info: Successfully saved changes to '{ini_path}'.")
468 else:
469 print(f"Info: No changes needed in '{ini_path}'.")
471 except configparser.Error as e:
472 print(f"Error: Failed to parse or update virtuoso.ini at '{ini_path}': {e}", file=sys.stderr)
473 except IOError as e:
474 print(f"Error: Failed to read or write virtuoso.ini at '{ini_path}': {e}", file=sys.stderr)
475 except Exception as e:
476 print(f"Error: An unexpected error occurred while updating virtuoso.ini: {e}", file=sys.stderr)
479def parse_arguments() -> argparse.Namespace: # pragma: no cover
480 """
481 Parse command-line arguments for Virtuoso Docker launcher.
483 Returns:
484 argparse.Namespace: Parsed command-line arguments
485 """
486 # First create a parser for a preliminary parse to check if --memory is provided
487 preliminary_parser = argparse.ArgumentParser(add_help=False)
488 preliminary_parser.add_argument("--memory", default=None)
489 preliminary_args, _ = preliminary_parser.parse_known_args()
490 memory_specified = preliminary_args.memory is not None
492 # Full parser with all arguments
493 parser = argparse.ArgumentParser(
494 description="Launch a Virtuoso database using Docker",
495 formatter_class=argparse.ArgumentDefaultsHelpFormatter
496 )
498 # --- Calculate default memory based on host RAM (2/3) ---
499 default_memory_str = "2g" # Fallback default
500 if psutil and not memory_specified:
501 try:
502 total_host_ram = psutil.virtual_memory().total
503 # Calculate 2/3 of total RAM in bytes
504 default_mem_bytes = int(total_host_ram * (2/3))
505 # Ensure at least 1GB is allocated as a minimum default
506 min_default_bytes = 1 * 1024 * 1024 * 1024
507 if default_mem_bytes < min_default_bytes:
508 default_mem_bytes = min_default_bytes
510 default_memory_str = bytes_to_docker_mem_str(default_mem_bytes)
511 print(f"Info: Detected {total_host_ram / (1024**3):.1f} GiB total host RAM. "
512 f"Setting default container memory limit to {default_memory_str} (approx. 2/3). "
513 f"Use --memory to override.")
514 except Exception as e:
515 print(f"Warning: Could not auto-detect host RAM using psutil: {e}. "
516 f"Falling back to default memory limit '{default_memory_str}'.", file=sys.stderr)
517 elif psutil and memory_specified:
518 # Silently use the user-specified value
519 pass
520 else:
521 print(f"Warning: psutil not found. Cannot auto-detect host RAM. "
522 f"Falling back to default memory limit '{default_memory_str}'. "
523 f"Install psutil for automatic calculation.", file=sys.stderr)
525 parser.add_argument(
526 "--name",
527 default="virtuoso",
528 help="Name for the Docker container"
529 )
530 parser.add_argument(
531 "--http-port",
532 type=int,
533 default=8890,
534 help="HTTP port to expose Virtuoso on"
535 )
536 parser.add_argument(
537 "--isql-port",
538 type=int,
539 default=1111,
540 help="ISQL port to expose Virtuoso on"
541 )
543 parser.add_argument(
544 "--data-dir",
545 default="./virtuoso-data",
546 help="Host directory to mount as Virtuoso data directory"
547 )
549 parser.add_argument(
550 "--mount-volume",
551 action="append",
552 dest="extra_volumes",
553 metavar="HOST_PATH:CONTAINER_PATH",
554 help="Mount an additional host directory into the container. "
555 "Format: /path/on/host:/path/in/container. "
556 "Can be specified multiple times."
557 )
559 parser.add_argument(
560 "--memory",
561 default=default_memory_str,
562 help="Memory limit for the container (e.g., 2g, 4g). "
563 f"Defaults to approx. 2/3 of host RAM if psutil is installed, otherwise '{default_memory_str}'."
564 )
565 parser.add_argument(
566 "--cpu-limit",
567 type=float,
568 default=0,
569 help="CPU limit for the container (0 means no limit)"
570 )
572 parser.add_argument(
573 "--dba-password",
574 default="dba",
575 help="Password for the Virtuoso dba user"
576 )
578 parser.add_argument(
579 "--force-remove",
580 action="store_true",
581 help="Force removal of existing container with the same name"
582 )
584 parser.add_argument(
585 "--network",
586 help="Docker network to connect the container to (must be a pre-existing network)"
587 )
589 parser.add_argument(
590 "--wait-ready",
591 action="store_true",
592 help="Wait until Virtuoso is ready to accept connections"
593 )
594 parser.add_argument(
595 "--detach",
596 action="store_true",
597 help="Run container in detached mode"
598 )
600 parser.add_argument(
601 "--enable-write-permissions",
602 action="store_true",
603 help="Enable write permissions for 'nobody' and 'SPARQL' users. "
604 "This makes the database publicly writable. "
605 "Forces waiting for the container to be ready."
606 )
608 parser.add_argument(
609 "--estimated-db-size-gb",
610 type=float,
611 default=0,
612 help="Estimated database size in GB. If provided, MaxCheckpointRemap will be preconfigured "
613 "based on this estimate rather than measuring existing data."
614 )
616 parser.add_argument(
617 "--virtuoso-version",
618 default=None,
619 help="Virtuoso Docker image version/tag to use (e.g., 'latest', '7.2.11', '7.2.12'). If not specified, uses the default pinned version."
620 )
622 parser.add_argument(
623 "--virtuoso-sha",
624 default=None,
625 help="Virtuoso Docker image SHA256 digest to use (e.g., 'sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010'). Takes precedence over --virtuoso-version."
626 )
628 parser.add_argument(
629 "--parallel-threads",
630 type=int,
631 default=None,
632 help="Maximum parallel threads for query execution. "
633 "If not specified, uses all available CPU cores. "
634 "Sets AsyncQueueMaxThreads to cores * 1.5 and ThreadsPerQuery to cores."
635 )
637 args_temp, _ = parser.parse_known_args()
639 optimal_number_of_buffers, optimal_max_dirty_buffers = get_optimal_buffer_values(args_temp.memory)
641 parser.add_argument(
642 "--max-dirty-buffers",
643 type=int,
644 default=optimal_max_dirty_buffers,
645 help="Maximum dirty buffers before checkpoint (auto-calculated based on --memory value, requires integer)"
646 )
647 parser.add_argument(
648 "--number-of-buffers",
649 type=int,
650 default=optimal_number_of_buffers,
651 help="Number of buffers (auto-calculated based on --memory value, requires integer)"
652 )
654 return parser.parse_args()
657def check_docker_installed() -> bool:
658 """
659 Check if Docker is installed and accessible.
661 Returns:
662 bool: True if Docker is installed, False otherwise
663 """
664 try:
665 subprocess.run(
666 ["docker", "--version"],
667 stdout=subprocess.PIPE,
668 stderr=subprocess.PIPE,
669 check=True
670 )
671 return True
672 except (subprocess.SubprocessError, FileNotFoundError):
673 return False
676def check_container_exists(container_name: str) -> bool:
677 """
678 Check if a Docker container with the specified name exists.
680 Args:
681 container_name: Name of the container to check
683 Returns:
684 bool: True if container exists, False otherwise
685 """
686 result = subprocess.run(
687 ["docker", "ps", "-a", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"],
688 stdout=subprocess.PIPE,
689 stderr=subprocess.PIPE,
690 text=True
691 )
693 return container_name in result.stdout.strip()
696def remove_container(container_name: str) -> bool:
697 """
698 Remove a Docker container.
700 Args:
701 container_name: Name of the container to remove
703 Returns:
704 bool: True if container was removed successfully, False otherwise
705 """
706 try:
707 subprocess.run(
708 ["docker", "rm", "-f", container_name],
709 stdout=subprocess.PIPE,
710 stderr=subprocess.PIPE,
711 check=True
712 )
713 return True
714 except subprocess.SubprocessError:
715 return False
718def get_docker_image(version: str, sha: str) -> str:
719 """
720 Get the appropriate Docker image based on version or SHA parameter.
722 Args:
723 version: Version string (e.g., 'latest', '7.2.11', '7.2.12') or None for default
724 sha: SHA256 digest string or None
726 Returns:
727 str: Full Docker image reference
728 """
729 if sha is not None:
730 return f"openlink/virtuoso-opensource-7@{sha}"
731 elif version is None:
732 return DEFAULT_IMAGE
733 elif version == "latest":
734 return "openlink/virtuoso-opensource-7:latest"
735 else:
736 return f"openlink/virtuoso-opensource-7:{version}"
739def build_docker_run_command(args: argparse.Namespace) -> Tuple[List[str], List[str]]:
740 """
741 Build the Docker run command based on provided arguments.
743 Args:
744 args: Command-line arguments
746 Returns:
747 Tuple[List[str], List[str]]:
748 - Command parts for subprocess.run
749 - List of unique container paths intended for DirsAllowed
750 """
751 host_data_dir_abs = os.path.abspath(args.data_dir)
752 os.makedirs(host_data_dir_abs, exist_ok=True)
754 cmd = [DOCKER_EXEC_PATH, "run"]
756 cmd.extend(["--name", args.name])
758 # Add user mapping to run as the host user
759 try:
760 cmd.extend(["--user", f"{os.getuid()}:{os.getgid()}"])
761 except AttributeError:
762 print("Warning: os.getuid/os.getgid not available on this system (likely Windows). Skipping user mapping.", file=sys.stderr)
764 cmd.extend(["-p", f"{args.http_port}:8890"])
765 cmd.extend(["-p", f"{args.isql_port}:1111"])
767 if args.network:
768 cmd.extend(["--network", args.network])
770 # Ensure container_data_dir is absolute-like for consistency
771 container_data_dir_path = DEFAULT_CONTAINER_DATA_DIR
772 cmd.extend(["-v", f"{host_data_dir_abs}:{container_data_dir_path}"])
774 # Mount additional volumes
775 if args.extra_volumes:
776 for volume_spec in args.extra_volumes:
777 if ':' in volume_spec:
778 host_path, container_path = volume_spec.split(':', 1)
779 host_path_abs = os.path.abspath(host_path)
780 cmd.extend(["-v", f"{host_path_abs}:{container_path}"])
782 # Start with default Virtuoso paths
783 paths_to_allow_in_container = DEFAULT_DIRS_ALLOWED.copy()
784 paths_to_allow_in_container.add(container_data_dir_path)
786 # Add extra mounted volumes to paths_to_allow_in_container
787 if args.extra_volumes:
788 for volume_spec in args.extra_volumes:
789 if ':' in volume_spec:
790 _, container_path = volume_spec.split(':', 1)
791 container_path_abs = container_path if container_path.startswith('/') else '/' + container_path
792 paths_to_allow_in_container.add(container_path_abs)
793 print(f"Info: Adding mounted volume path '{container_path_abs}' to DirsAllowed.")
795 memory_bytes = parse_memory_value(args.memory)
796 reservation_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE)
797 reservation_str = bytes_to_docker_mem_str(reservation_bytes)
798 cmd.extend(["--memory-reservation", reservation_str])
799 cmd.extend(["--memory", args.memory])
800 if args.cpu_limit > 0:
801 cmd.extend(["--cpus", str(args.cpu_limit)])
803 env_vars = {
804 "DBA_PASSWORD": args.dba_password,
805 "VIRT_Parameters_ResultSetMaxRows": str(DEFAULT_MAX_ROWS),
806 "VIRT_SPARQL_DefaultQuery": "SELECT (COUNT(*) AS ?quadCount) WHERE { GRAPH ?g { ?s ?p ?o } }",
807 }
809 virt_env_vars = get_virt_env_vars(
810 memory=args.memory,
811 number_of_buffers=args.number_of_buffers,
812 max_dirty_buffers=args.max_dirty_buffers,
813 parallel_threads=args.parallel_threads,
814 estimated_db_size_gb=args.estimated_db_size_gb,
815 dirs_allowed=",".join(paths_to_allow_in_container),
816 )
817 env_vars.update(virt_env_vars)
819 for key, value in env_vars.items():
820 cmd.extend(["-e", f"{key}={value}"])
822 if args.detach:
823 cmd.append("-d")
825 # Ensure --rm is added if not running detached
826 if not args.detach:
827 cmd.insert(2, "--rm") # Insert after "docker run"
829 # Append image name
830 docker_image = get_docker_image(args.virtuoso_version, args.virtuoso_sha)
831 cmd.append(docker_image)
833 return cmd, paths_to_allow_in_container
836def wait_for_virtuoso_ready(
837 dba_password: str,
838 docker_container: str = None,
839 timeout: int = DEFAULT_WAIT_TIMEOUT,
840 poll_interval: int = 3,
841) -> bool:
842 print(f"Waiting for Virtuoso to be ready (timeout: {timeout}s)...")
843 start_time = time.time()
844 isql_args = create_isql_args(dba_password, docker_container)
846 while time.time() - start_time < timeout:
847 try:
848 success, _, stderr = run_isql_command(isql_args, sql_command="status();")
849 if success:
850 print("Virtuoso is ready.")
851 return True
852 if is_connection_error(stderr):
853 elapsed = int(time.time() - start_time)
854 if elapsed % 10 == 0:
855 print(f" Waiting for Virtuoso... ({elapsed}s elapsed)")
856 else:
857 print(f"ISQL check failed: {stderr}", file=sys.stderr)
858 return False
859 time.sleep(poll_interval)
860 except Exception as e:
861 print(f"Warning: Error in readiness check: {e}", file=sys.stderr)
862 time.sleep(poll_interval + 2)
864 print(f"Timeout ({timeout}s) waiting for Virtuoso.", file=sys.stderr)
865 return False
868def run_docker_command(cmd: List[str], capture_output=False, check=True, suppress_error=False):
869 """Helper to run Docker commands and handle errors."""
870 print(f"Executing: {' '.join(cmd)}")
871 try:
872 result = subprocess.run(
873 cmd,
874 stdout=subprocess.PIPE if capture_output else sys.stdout,
875 stderr=subprocess.PIPE if capture_output else sys.stderr,
876 text=True,
877 check=check
878 )
879 return result
880 except subprocess.CalledProcessError as e:
881 if not suppress_error:
882 print(f"Error executing Docker command: {e}", file=sys.stderr)
883 if capture_output:
884 print(f"Stderr: {e.stderr}", file=sys.stderr)
885 print(f"Stdout: {e.stdout}", file=sys.stderr)
886 raise
887 except FileNotFoundError:
888 if not suppress_error:
889 print("Error: 'docker' command not found. Make sure Docker is installed and in your PATH.", file=sys.stderr)
890 raise
893def grant_write_permissions(dba_password: str, docker_container: str = None) -> bool:
894 print("Granting write permissions...")
895 isql_args = create_isql_args(dba_password, docker_container)
897 success1, _, stderr1 = run_isql_command(
898 isql_args, sql_command="DB.DBA.RDF_DEFAULT_USER_PERMS_SET('nobody', 7);"
899 )
900 if success1:
901 print(" Set permissions for 'nobody' user.")
902 else:
903 print(f" Warning: Failed to set 'nobody' permissions: {stderr1}", file=sys.stderr)
905 success2, _, stderr2 = run_isql_command(
906 isql_args, sql_command="DB.DBA.USER_GRANT_ROLE('SPARQL', 'SPARQL_UPDATE');"
907 )
908 if success2:
909 print(" Granted SPARQL_UPDATE role to 'SPARQL' user.")
910 else:
911 print(f" Warning: Failed to grant SPARQL_UPDATE: {stderr2}", file=sys.stderr)
913 return success1 and success2
916def launch_virtuoso( # pragma: no cover
917 name: str = "virtuoso",
918 data_dir: str = "./virtuoso-data",
919 http_port: int = 8890,
920 isql_port: int = 1111,
921 memory: str = None,
922 dba_password: str = "dba",
923 detach: bool = True,
924 wait_ready: bool = True,
925 enable_write_permissions: bool = False,
926 force_remove: bool = False,
927 extra_volumes: list = None,
928 network: str = None,
929 cpu_limit: float = 0,
930 virtuoso_version: str = None,
931 virtuoso_sha: str = None,
932 estimated_db_size_gb: float = 0,
933 parallel_threads: int = None,
934) -> None:
935 """
936 Launch Virtuoso Docker container.
938 Args:
939 name: Container name
940 data_dir: Host directory for Virtuoso data
941 http_port: HTTP port to expose
942 isql_port: ISQL port to expose
943 memory: Memory limit (e.g., "4g"). Auto-calculated from host RAM if None.
944 dba_password: DBA password
945 detach: Run in detached mode
946 wait_ready: Wait for Virtuoso to be ready
947 enable_write_permissions: Enable SPARQL write permissions for 'nobody' and 'SPARQL' users
948 force_remove: Force remove existing container with same name
949 extra_volumes: Additional volumes to mount (list of "host:container" strings)
950 network: Docker network to connect
951 cpu_limit: CPU limit (0 = no limit)
952 virtuoso_version: Docker image version tag
953 virtuoso_sha: Docker image SHA digest (takes precedence over version)
954 estimated_db_size_gb: Estimated DB size for MaxCheckpointRemap config
955 parallel_threads: Max parallel threads for query execution. If None, uses all CPU cores.
957 Raises:
958 RuntimeError: If Docker is not installed or launch fails
959 """
960 if not check_docker_installed():
961 raise RuntimeError("Docker command not found. Please install Docker.")
963 if memory is None:
964 if psutil:
965 try:
966 total_host_ram = psutil.virtual_memory().total
967 default_mem_bytes = max(int(total_host_ram * (2/3)), 1 * 1024**3)
968 memory = bytes_to_docker_mem_str(default_mem_bytes)
969 except Exception:
970 memory = "2g"
971 else:
972 memory = "2g"
974 number_of_buffers, max_dirty_buffers = get_optimal_buffer_values(memory)
976 args = argparse.Namespace(
977 name=name,
978 data_dir=data_dir,
979 http_port=http_port,
980 isql_port=isql_port,
981 memory=memory,
982 dba_password=dba_password,
983 detach=detach,
984 wait_ready=wait_ready,
985 enable_write_permissions=enable_write_permissions,
986 force_remove=force_remove,
987 extra_volumes=extra_volumes,
988 network=network,
989 cpu_limit=cpu_limit,
990 virtuoso_version=virtuoso_version,
991 virtuoso_sha=virtuoso_sha,
992 estimated_db_size_gb=estimated_db_size_gb,
993 number_of_buffers=number_of_buffers,
994 max_dirty_buffers=max_dirty_buffers,
995 parallel_threads=parallel_threads,
996 )
998 host_data_dir_abs = os.path.abspath(data_dir)
999 ini_file_path = os.path.join(host_data_dir_abs, "virtuoso.ini")
1001 docker_cmd, unique_paths_to_allow = build_docker_run_command(args)
1002 dirs_allowed_str = ",".join(unique_paths_to_allow) if unique_paths_to_allow else None
1004 threading = calculate_threading_config(parallel_threads)
1005 max_query_mem_value = calculate_max_query_mem(memory, number_of_buffers)
1007 update_ini_memory_settings(
1008 ini_path=ini_file_path,
1009 data_dir_path=host_data_dir_abs,
1010 number_of_buffers=number_of_buffers,
1011 max_dirty_buffers=max_dirty_buffers,
1012 dirs_allowed=dirs_allowed_str,
1013 async_queue_max_threads=threading["async_queue_max_threads"],
1014 threads_per_query=threading["threads_per_query"],
1015 max_client_connections=threading["max_client_connections"],
1016 adjust_vector_size=0,
1017 vector_size=1000,
1018 checkpoint_interval=1,
1019 max_query_mem=max_query_mem_value,
1020 http_server_threads=threading["max_client_connections"],
1021 thread_cleanup_interval=1,
1022 resources_cleanup_interval=1,
1023 )
1025 if check_container_exists(name):
1026 result = subprocess.run(
1027 [DOCKER_EXEC_PATH, "ps", "--filter", f"name=^{name}$", "--format", "{{.Status}}"],
1028 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
1029 )
1030 is_running = "Up" in result.stdout
1032 if force_remove:
1033 print(f"Container '{name}' already exists. Forcing removal...")
1034 if not remove_container(name):
1035 raise RuntimeError(f"Failed to remove existing container '{name}'")
1036 elif is_running:
1037 raise RuntimeError(f"Container '{name}' is already running. Stop it first or use force_remove=True.")
1038 else:
1039 print(f"Container '{name}' exists but is stopped. Removing...")
1040 if not remove_container(name):
1041 raise RuntimeError(f"Failed to remove existing stopped container '{name}'")
1043 try:
1044 run_docker_command(docker_cmd, check=not detach)
1046 should_wait = wait_ready or enable_write_permissions
1048 if detach and should_wait:
1049 print("Waiting for Virtuoso readiness...")
1050 ready = wait_for_virtuoso_ready(dba_password, docker_container=name)
1051 if not ready:
1052 raise RuntimeError("Virtuoso readiness check timed out or failed.")
1054 if enable_write_permissions:
1055 if not grant_write_permissions(dba_password, docker_container=name):
1056 print("Warning: One or more commands to enable write permissions failed.", file=sys.stderr)
1058 print(f"Virtuoso launched successfully on http://localhost:{http_port}/sparql")
1060 except subprocess.CalledProcessError as e:
1061 if detach and check_container_exists(name):
1062 run_docker_command([DOCKER_EXEC_PATH, "stop", name], suppress_error=True, check=False)
1063 run_docker_command([DOCKER_EXEC_PATH, "rm", name], suppress_error=True, check=False)
1064 raise RuntimeError(f"Virtuoso launch failed: {e}")
1065 except FileNotFoundError:
1066 raise RuntimeError("Docker command not found.")
1069def main() -> int: # pragma: no cover
1070 """
1071 CLI entry point that parses arguments and calls launch_virtuoso().
1072 """
1073 args = parse_arguments()
1075 try:
1076 launch_virtuoso(
1077 name=args.name,
1078 data_dir=args.data_dir,
1079 http_port=args.http_port,
1080 isql_port=args.isql_port,
1081 memory=args.memory,
1082 dba_password=args.dba_password,
1083 detach=args.detach,
1084 wait_ready=args.wait_ready,
1085 enable_write_permissions=args.enable_write_permissions,
1086 force_remove=args.force_remove,
1087 extra_volumes=args.extra_volumes,
1088 network=args.network,
1089 cpu_limit=args.cpu_limit,
1090 virtuoso_version=args.virtuoso_version,
1091 virtuoso_sha=args.virtuoso_sha,
1092 estimated_db_size_gb=args.estimated_db_size_gb,
1093 parallel_threads=args.parallel_threads,
1094 )
1095 return 0
1096 except RuntimeError as e:
1097 print(f"Error: {e}", file=sys.stderr)
1098 return 1
1101if __name__ == "__main__": # pragma: no cover
1102 sys.exit(main())