Coverage for virtuoso_utilities / launch_virtuoso.py: 74%
407 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-04-14 09:16 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-04-14 09:16 +0000
1#!/usr/bin/env python3
3# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7"""
8Virtuoso Docker Launcher
10This script launches an OpenLink Virtuoso database instance using Docker.
11Configuration parameters can be customized through command-line arguments.
12"""
14import argparse
15import configparser
16import os
17import re
18import subprocess
19import sys
20import time
21from typing import List, Tuple
23import psutil
25DEFAULT_WAIT_TIMEOUT = 120
26DOCKER_EXEC_PATH = "docker"
27DOCKER_ISQL_PATH_INSIDE_CONTAINER = "isql"
29# Default values for container configuration
30DEFAULT_IMAGE = "openlink/virtuoso-opensource-7@sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010"
31DEFAULT_CONTAINER_DATA_DIR = "/opt/virtuoso-opensource/database"
32DEFAULT_MAX_ROWS = 100000
34VIRTUOSO_MEMORY_PERCENTAGE = 0.85
35BYTES_PER_BUFFER = 8700 # Each buffer occupies ~8700 bytes (8K page + overhead) according to https://docs.openlinksw.com/virtuoso/ch-server/
37from virtuoso_utilities.isql_helpers import run_isql_command
39# Minimum database size in bytes to trigger MaxCheckpointRemap calculation
40MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB = 1
41MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP = MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB * 1024**3
43# Default directories allowed in Virtuoso
44DEFAULT_DIRS_ALLOWED = {".", "../vad", "/usr/share/proj", "../virtuoso_input"}
46# Connection error patterns for retry logic
47CONNECTION_ERROR_PATTERNS = [
48 "connection refused",
49 "connect failed",
50 "connection failed",
51 "cannot connect",
52 "no route to host",
53]
56def bytes_to_docker_mem_str(num_bytes: int) -> str:
57 """
58 Convert a number of bytes to a Docker memory string (e.g., "85g", "512m").
59 Tries to find the largest unit (G, M, K) without losing precision for integers.
60 """
61 if num_bytes % (1024**3) == 0:
62 return f"{num_bytes // (1024**3)}g"
63 elif num_bytes % (1024**2) == 0:
64 return f"{num_bytes // (1024**2)}m"
65 elif num_bytes % 1024 == 0:
66 return f"{num_bytes // 1024}k"
67 else:
68 # Fallback for non-exact multiples (shouldn't happen often with RAM)
69 # Prefer GiB for consistency
70 gb_val = num_bytes / (1024**3)
71 return f"{int(gb_val)}g"
74def parse_memory_value(memory_str: str) -> int:
75 """
76 Parse memory value from Docker memory format (e.g., "2g", "4096m") to bytes.
78 Args:
79 memory_str: Memory string in Docker format
81 Returns:
82 int: Memory size in bytes
83 """
84 memory_str = memory_str.lower()
86 match = re.match(r'^(\d+)([kmg]?)$', memory_str)
87 if not match:
88 # Default to 2GB if parsing fails
89 print(f"Warning: Could not parse memory string '{memory_str}'. Defaulting to 2g.", file=sys.stderr)
90 return 2 * 1024 * 1024 * 1024
92 value, unit = match.groups()
93 value = int(value)
95 if unit == 'k':
96 return value * 1024
97 elif unit == 'm':
98 return value * 1024 * 1024
99 elif unit == 'g':
100 return value * 1024 * 1024 * 1024
101 else: # No unit, assume bytes
102 return value
105def get_directory_size(directory_path: str) -> int:
106 """
107 Calculate the total size of all files within a directory.
109 Args:
110 directory_path: The path to the directory.
112 Returns:
113 Total size in bytes.
114 """
115 total_size = 0
116 if not os.path.isdir(directory_path):
117 return 0
118 try:
119 for dirpath, dirnames, filenames in os.walk(directory_path):
120 for f in filenames:
121 fp = os.path.join(dirpath, f)
122 # skip if it is symbolic link
123 if not os.path.islink(fp):
124 try:
125 total_size += os.path.getsize(fp)
126 except OSError as e:
127 print(f"Warning: Could not get size of file '{fp}': {e}", file=sys.stderr)
128 except OSError as e:
129 print(f"Warning: Could not walk directory '{directory_path}': {e}", file=sys.stderr)
131 return total_size
134def get_optimal_buffer_values(memory_limit: str) -> Tuple[int, int]:
135 """
136 Determine optimal values for NumberOfBuffers and MaxDirtyBuffers
137 based on the specified container memory limit.
139 Uses the formula recommended by OpenLink:
140 NumberOfBuffers = (MemoryInBytes * VIRTUOSO_MEMORY_PERCENTAGE * 0.66) / 8000
141 MaxDirtyBuffers = NumberOfBuffers * 0.75
143 The memory_limit is reduced by VIRTUOSO_MEMORY_PERCENTAGE to leave
144 headroom for Virtuoso process overhead and prevent container OOM crashes.
146 Args:
147 memory_limit: Memory limit string in Docker format (e.g., "2g", "4096m")
149 Returns:
150 Tuple[int, int]: Calculated values for NumberOfBuffers and MaxDirtyBuffers
151 """
152 try:
153 memory_bytes = parse_memory_value(memory_limit)
155 memory_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE)
157 number_of_buffers = int((memory_bytes * 0.66) / BYTES_PER_BUFFER)
159 max_dirty_buffers = int(number_of_buffers * 0.75)
161 return number_of_buffers, max_dirty_buffers
163 except Exception as e:
164 print(f"Warning: Error calculating buffer values: {e}. Using default values.", file=sys.stderr)
165 # Default values approximately suitable for 1-2GB RAM if calculation fails
166 return 170000, 130000
169def calculate_max_checkpoint_remap(size_bytes: int) -> int:
170 return int(size_bytes / 8192 / 4)
173def get_default_memory() -> str:
174 try:
175 total_ram = psutil.virtual_memory().total
176 default_mem = max(int(total_ram * (2 / 3)), 1 * 1024**3)
177 return bytes_to_docker_mem_str(default_mem)
178 except Exception:
179 return "2g"
182def calculate_threading_config(parallel_threads=None):
183 cpu_cores = parallel_threads if parallel_threads else (os.cpu_count() or 1)
184 return {
185 "async_queue_max_threads": int(cpu_cores * 1.5),
186 "threads_per_query": cpu_cores,
187 "max_client_connections": cpu_cores * 2,
188 }
191def calculate_max_query_mem(memory, number_of_buffers):
192 buffer_memory_bytes = number_of_buffers * BYTES_PER_BUFFER
193 effective_memory_bytes = int(parse_memory_value(memory) * VIRTUOSO_MEMORY_PERCENTAGE)
194 max_query_mem_bytes = int((effective_memory_bytes - buffer_memory_bytes) * 0.8)
195 if max_query_mem_bytes > 0:
196 return bytes_to_docker_mem_str(max_query_mem_bytes)
197 return None
200def get_virt_env_vars(memory, number_of_buffers, max_dirty_buffers, parallel_threads, estimated_db_size_gb=0.0, dirs_allowed=None):
201 env_vars = {}
202 env_vars["VIRT_Parameters_NumberOfBuffers"] = str(number_of_buffers)
203 env_vars["VIRT_Parameters_MaxDirtyBuffers"] = str(max_dirty_buffers)
205 threading = calculate_threading_config(parallel_threads)
206 env_vars["VIRT_Parameters_AsyncQueueMaxThreads"] = str(threading["async_queue_max_threads"])
207 env_vars["VIRT_Parameters_ThreadsPerQuery"] = str(threading["threads_per_query"])
208 env_vars["VIRT_Parameters_MaxClientConnections"] = str(threading["max_client_connections"])
209 env_vars["VIRT_HTTPServer_ServerThreads"] = str(threading["max_client_connections"])
211 env_vars["VIRT_Parameters_AdjustVectorSize"] = "0"
212 env_vars["VIRT_Parameters_VectorSize"] = "1000"
213 env_vars["VIRT_Parameters_CheckpointInterval"] = "1"
214 env_vars["VIRT_Parameters_ThreadCleanupInterval"] = "1"
215 env_vars["VIRT_Parameters_ResourcesCleanupInterval"] = "1"
217 max_query_mem = calculate_max_query_mem(memory, number_of_buffers)
218 max_query_mem_str = max_query_mem if max_query_mem else "N/A"
219 if max_query_mem:
220 env_vars["VIRT_Parameters_MaxQueryMem"] = max_query_mem
222 env_vars["VIRT_Client_SQL_QUERY_TIMEOUT"] = "0"
223 env_vars["VIRT_Client_SQL_TXN_TIMEOUT"] = "0"
225 if estimated_db_size_gb > 0:
226 estimated_size_bytes = int(estimated_db_size_gb * 1024**3)
227 if estimated_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP:
228 max_checkpoint_remap = calculate_max_checkpoint_remap(estimated_size_bytes)
229 env_vars["VIRT_Database_MaxCheckpointRemap"] = str(max_checkpoint_remap)
230 env_vars["VIRT_TempDatabase_MaxCheckpointRemap"] = str(max_checkpoint_remap)
231 print(f"Info: Using estimated database size of {estimated_db_size_gb} GB to set MaxCheckpointRemap to {max_checkpoint_remap}")
233 if dirs_allowed:
234 env_vars["VIRT_Parameters_DirsAllowed"] = dirs_allowed
236 print(f"Info: Threading: AsyncQueueMaxThreads={threading['async_queue_max_threads']}, "
237 f"ThreadsPerQuery={threading['threads_per_query']}, "
238 f"MaxClientConnections={threading['max_client_connections']}")
239 print(f"Info: MaxQueryMem={max_query_mem_str}, AdjustVectorSize=0, VectorSize=1000, CheckpointInterval=1, ThreadCleanupInterval=1, ResourcesCleanupInterval=1")
241 return env_vars
244def is_connection_error(stderr):
245 stderr_lower = stderr.lower()
246 return any(err in stderr_lower for err in CONNECTION_ERROR_PATTERNS)
249def create_isql_args(dba_password, docker_container=None):
250 if docker_container:
251 return argparse.Namespace(
252 host="localhost",
253 port=1111,
254 user="dba",
255 password=dba_password,
256 docker_container=docker_container,
257 docker_path=DOCKER_EXEC_PATH,
258 docker_isql_path=DOCKER_ISQL_PATH_INSIDE_CONTAINER,
259 isql_path=None,
260 )
261 return argparse.Namespace(
262 host="localhost",
263 port=1111,
264 user="dba",
265 password=dba_password,
266 docker_container=None,
267 docker_path=None,
268 docker_isql_path=None,
269 isql_path="isql",
270 )
273def update_ini_memory_settings(
274 ini_path: str,
275 data_dir_path: str,
276 number_of_buffers: int = None,
277 max_dirty_buffers: int = None,
278 dirs_allowed: str = None,
279 async_queue_max_threads: int = None,
280 threads_per_query: int = None,
281 max_client_connections: int = None,
282 adjust_vector_size: int = None,
283 vector_size: int = None,
284 checkpoint_interval: int = None,
285 max_query_mem: str = None,
286 http_server_threads: int = None,
287 thread_cleanup_interval: int = None,
288 resources_cleanup_interval: int = None,
289):
290 if not os.path.exists(ini_path):
291 print(f"Info: virtuoso.ini not found at '{ini_path}'. Likely first run. Skipping settings update.")
292 return
294 print(f"Info: Checking existing virtuoso.ini at '{ini_path}' for settings update...")
295 actual_db_size_bytes = get_directory_size(data_dir_path)
297 # Calculate MaxCheckpointRemap if database is large enough
298 calculate_remap = actual_db_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP
299 calculated_remap_value = calculate_max_checkpoint_remap(actual_db_size_bytes) if calculate_remap else None
301 config = configparser.ConfigParser(interpolation=None, strict=False)
302 config.optionxform = str # Keep case sensitivity
303 made_changes = False
304 try:
305 # Read with UTF-8, ignore errors initially if file has issues
306 config.read(ini_path, encoding='utf-8')
308 # Update [Parameters] section for buffer settings and DirsAllowed
309 if not config.has_section('Parameters'):
310 config.add_section('Parameters')
311 print(f"Info: Added [Parameters] section to '{ini_path}'.")
313 # Update NumberOfBuffers if provided
314 if number_of_buffers is not None:
315 current_number_of_buffers = config.get('Parameters', 'NumberOfBuffers', fallback=None)
316 number_of_buffers_str = str(number_of_buffers)
317 if current_number_of_buffers != number_of_buffers_str:
318 config.set('Parameters', 'NumberOfBuffers', number_of_buffers_str)
319 print(f"Info: Updating [Parameters] NumberOfBuffers from '{current_number_of_buffers}' to '{number_of_buffers_str}' in '{ini_path}'.")
320 made_changes = True
322 # Ensure [Client] section has SQL timeouts set to 0
323 if not config.has_section('Client'):
324 config.add_section('Client')
325 print(f"Info: Added [Client] section to '{ini_path}'.")
327 current_sql_query_timeout = config.get('Client', 'SQL_QUERY_TIMEOUT', fallback=None)
328 if current_sql_query_timeout != '0':
329 config.set('Client', 'SQL_QUERY_TIMEOUT', '0')
330 print(f"Info: Setting [Client] SQL_QUERY_TIMEOUT to '0' in '{ini_path}'.")
331 made_changes = True
333 current_sql_txn_timeout = config.get('Client', 'SQL_TXN_TIMEOUT', fallback=None)
334 if current_sql_txn_timeout != '0':
335 config.set('Client', 'SQL_TXN_TIMEOUT', '0')
336 print(f"Info: Setting [Client] SQL_TXN_TIMEOUT to '0' in '{ini_path}'.")
337 made_changes = True
339 # Update MaxDirtyBuffers if provided
340 if max_dirty_buffers is not None:
341 current_max_dirty_buffers = config.get('Parameters', 'MaxDirtyBuffers', fallback=None)
342 max_dirty_buffers_str = str(max_dirty_buffers)
343 if current_max_dirty_buffers != max_dirty_buffers_str:
344 config.set('Parameters', 'MaxDirtyBuffers', max_dirty_buffers_str)
345 print(f"Info: Updating [Parameters] MaxDirtyBuffers from '{current_max_dirty_buffers}' to '{max_dirty_buffers_str}' in '{ini_path}'.")
346 made_changes = True
348 if dirs_allowed is not None:
349 current_dirs_allowed = config.get('Parameters', 'DirsAllowed', fallback=None)
350 def normalize_dirs(val):
351 if val is None:
352 return set()
353 return set([x.strip() for x in val.split(',') if x.strip()])
354 if normalize_dirs(current_dirs_allowed) != normalize_dirs(dirs_allowed):
355 config.set('Parameters', 'DirsAllowed', dirs_allowed)
356 print(f"Info: Updating [Parameters] DirsAllowed from '{current_dirs_allowed}' to '{dirs_allowed}' in '{ini_path}'.")
357 made_changes = True
359 if async_queue_max_threads is not None:
360 current_val = config.get('Parameters', 'AsyncQueueMaxThreads', fallback=None)
361 new_val = str(async_queue_max_threads)
362 if current_val != new_val:
363 config.set('Parameters', 'AsyncQueueMaxThreads', new_val)
364 print(f"Info: Updating [Parameters] AsyncQueueMaxThreads from '{current_val}' to '{new_val}' in '{ini_path}'.")
365 made_changes = True
367 if threads_per_query is not None:
368 current_val = config.get('Parameters', 'ThreadsPerQuery', fallback=None)
369 new_val = str(threads_per_query)
370 if current_val != new_val:
371 config.set('Parameters', 'ThreadsPerQuery', new_val)
372 print(f"Info: Updating [Parameters] ThreadsPerQuery from '{current_val}' to '{new_val}' in '{ini_path}'.")
373 made_changes = True
375 if max_client_connections is not None:
376 current_val = config.get('Parameters', 'MaxClientConnections', fallback=None)
377 new_val = str(max_client_connections)
378 if current_val != new_val:
379 config.set('Parameters', 'MaxClientConnections', new_val)
380 print(f"Info: Updating [Parameters] MaxClientConnections from '{current_val}' to '{new_val}' in '{ini_path}'.")
381 made_changes = True
383 if adjust_vector_size is not None:
384 current_val = config.get('Parameters', 'AdjustVectorSize', fallback=None)
385 new_val = str(adjust_vector_size)
386 if current_val != new_val:
387 config.set('Parameters', 'AdjustVectorSize', new_val)
388 print(f"Info: Updating [Parameters] AdjustVectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.")
389 made_changes = True
391 if vector_size is not None:
392 current_val = config.get('Parameters', 'VectorSize', fallback=None)
393 new_val = str(vector_size)
394 if current_val != new_val:
395 config.set('Parameters', 'VectorSize', new_val)
396 print(f"Info: Updating [Parameters] VectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.")
397 made_changes = True
399 if checkpoint_interval is not None:
400 current_val = config.get('Parameters', 'CheckpointInterval', fallback=None)
401 new_val = str(checkpoint_interval)
402 if current_val != new_val:
403 config.set('Parameters', 'CheckpointInterval', new_val)
404 print(f"Info: Updating [Parameters] CheckpointInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
405 made_changes = True
407 if max_query_mem is not None:
408 current_val = config.get('Parameters', 'MaxQueryMem', fallback=None)
409 if current_val != max_query_mem:
410 config.set('Parameters', 'MaxQueryMem', max_query_mem)
411 print(f"Info: Updating [Parameters] MaxQueryMem from '{current_val}' to '{max_query_mem}' in '{ini_path}'.")
412 made_changes = True
414 if http_server_threads is not None:
415 if not config.has_section('HTTPServer'):
416 config.add_section('HTTPServer')
417 print(f"Info: Added [HTTPServer] section to '{ini_path}'.")
418 current_val = config.get('HTTPServer', 'ServerThreads', fallback=None)
419 new_val = str(http_server_threads)
420 if current_val != new_val:
421 config.set('HTTPServer', 'ServerThreads', new_val)
422 print(f"Info: Updating [HTTPServer] ServerThreads from '{current_val}' to '{new_val}' in '{ini_path}'.")
423 made_changes = True
425 if thread_cleanup_interval is not None:
426 current_val = config.get('Parameters', 'ThreadCleanupInterval', fallback=None)
427 new_val = str(thread_cleanup_interval)
428 if current_val != new_val:
429 config.set('Parameters', 'ThreadCleanupInterval', new_val)
430 print(f"Info: Updating [Parameters] ThreadCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
431 made_changes = True
433 if resources_cleanup_interval is not None:
434 current_val = config.get('Parameters', 'ResourcesCleanupInterval', fallback=None)
435 new_val = str(resources_cleanup_interval)
436 if current_val != new_val:
437 config.set('Parameters', 'ResourcesCleanupInterval', new_val)
438 print(f"Info: Updating [Parameters] ResourcesCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.")
439 made_changes = True
441 # Update MaxCheckpointRemap if database is large enough
442 if calculate_remap:
443 # Update [Database] section
444 if not config.has_section('Database'):
445 config.add_section('Database')
446 print(f"Info: Added [Database] section to '{ini_path}'.")
448 current_db_remap = config.get('Database', 'MaxCheckpointRemap', fallback=None)
449 calculated_remap_str = str(calculated_remap_value)
450 if current_db_remap != calculated_remap_str:
451 config.set('Database', 'MaxCheckpointRemap', calculated_remap_str)
452 print(f"Info: Updating [Database] MaxCheckpointRemap from '{current_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.")
453 made_changes = True
455 # Update [TempDatabase] section
456 if not config.has_section('TempDatabase'):
457 config.add_section('TempDatabase')
458 print(f"Info: Added [TempDatabase] section to '{ini_path}'.")
460 current_temp_db_remap = config.get('TempDatabase', 'MaxCheckpointRemap', fallback=None)
461 if current_temp_db_remap != calculated_remap_str:
462 config.set('TempDatabase', 'MaxCheckpointRemap', calculated_remap_str)
463 print(f"Info: Updating [TempDatabase] MaxCheckpointRemap from '{current_temp_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.")
464 made_changes = True
465 else:
466 print(f"Info: Host data directory '{data_dir_path}' size ({actual_db_size_bytes / (1024**3):.2f} GiB) is below threshold ({MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB} GiB). No changes made to MaxCheckpointRemap in virtuoso.ini.")
468 if made_changes:
469 # Write changes back with UTF-8 encoding
470 with open(ini_path, 'w', encoding='utf-8') as configfile:
471 config.write(configfile)
472 print(f"Info: Successfully saved changes to '{ini_path}'.")
473 else:
474 print(f"Info: No changes needed in '{ini_path}'.")
476 except configparser.Error as e:
477 print(f"Error: Failed to parse or update virtuoso.ini at '{ini_path}': {e}", file=sys.stderr)
478 except IOError as e:
479 print(f"Error: Failed to read or write virtuoso.ini at '{ini_path}': {e}", file=sys.stderr)
480 except Exception as e:
481 print(f"Error: An unexpected error occurred while updating virtuoso.ini: {e}", file=sys.stderr)
484def parse_arguments() -> argparse.Namespace: # pragma: no cover
485 """
486 Parse command-line arguments for Virtuoso Docker launcher.
488 Returns:
489 argparse.Namespace: Parsed command-line arguments
490 """
491 # First create a parser for a preliminary parse to check if --memory is provided
492 preliminary_parser = argparse.ArgumentParser(add_help=False)
493 preliminary_parser.add_argument("--memory", default=None)
494 preliminary_args, _ = preliminary_parser.parse_known_args()
495 memory_specified = preliminary_args.memory is not None
497 # Full parser with all arguments
498 parser = argparse.ArgumentParser(
499 description="Launch a Virtuoso database using Docker",
500 formatter_class=argparse.ArgumentDefaultsHelpFormatter
501 )
503 # --- Calculate default memory based on host RAM (2/3) ---
504 default_memory_str = "2g" # Fallback default
505 if psutil and not memory_specified:
506 try:
507 total_host_ram = psutil.virtual_memory().total
508 # Calculate 2/3 of total RAM in bytes
509 default_mem_bytes = int(total_host_ram * (2/3))
510 # Ensure at least 1GB is allocated as a minimum default
511 min_default_bytes = 1 * 1024 * 1024 * 1024
512 if default_mem_bytes < min_default_bytes:
513 default_mem_bytes = min_default_bytes
515 default_memory_str = bytes_to_docker_mem_str(default_mem_bytes)
516 print(f"Info: Detected {total_host_ram / (1024**3):.1f} GiB total host RAM. "
517 f"Setting default container memory limit to {default_memory_str} (approx. 2/3). "
518 f"Use --memory to override.")
519 except Exception as e:
520 print(f"Warning: Could not auto-detect host RAM using psutil: {e}. "
521 f"Falling back to default memory limit '{default_memory_str}'.", file=sys.stderr)
522 elif psutil and memory_specified:
523 # Silently use the user-specified value
524 pass
525 else:
526 print(f"Warning: psutil not found. Cannot auto-detect host RAM. "
527 f"Falling back to default memory limit '{default_memory_str}'. "
528 f"Install psutil for automatic calculation.", file=sys.stderr)
530 parser.add_argument(
531 "--name",
532 default="virtuoso",
533 help="Name for the Docker container"
534 )
535 parser.add_argument(
536 "--http-port",
537 type=int,
538 default=8890,
539 help="HTTP port to expose Virtuoso on"
540 )
541 parser.add_argument(
542 "--isql-port",
543 type=int,
544 default=1111,
545 help="ISQL port to expose Virtuoso on"
546 )
548 parser.add_argument(
549 "--data-dir",
550 default="./virtuoso-data",
551 help="Host directory to mount as Virtuoso data directory"
552 )
554 parser.add_argument(
555 "--mount-volume",
556 action="append",
557 dest="extra_volumes",
558 metavar="HOST_PATH:CONTAINER_PATH",
559 help="Mount an additional host directory into the container. "
560 "Format: /path/on/host:/path/in/container. "
561 "Can be specified multiple times."
562 )
564 parser.add_argument(
565 "--memory",
566 default=default_memory_str,
567 help="Memory limit for the container (e.g., 2g, 4g). "
568 f"Defaults to approx. 2/3 of host RAM if psutil is installed, otherwise '{default_memory_str}'."
569 )
570 parser.add_argument(
571 "--cpu-limit",
572 type=float,
573 default=0,
574 help="CPU limit for the container (0 means no limit)"
575 )
577 parser.add_argument(
578 "--dba-password",
579 default="dba",
580 help="Password for the Virtuoso dba user"
581 )
583 parser.add_argument(
584 "--force-remove",
585 action="store_true",
586 help="Force removal of existing container with the same name"
587 )
589 parser.add_argument(
590 "--network",
591 help="Docker network to connect the container to (must be a pre-existing network)"
592 )
594 parser.add_argument(
595 "--wait-ready",
596 action="store_true",
597 help="Wait until Virtuoso is ready to accept connections"
598 )
599 parser.add_argument(
600 "--detach",
601 action="store_true",
602 help="Run container in detached mode"
603 )
605 parser.add_argument(
606 "--enable-write-permissions",
607 action="store_true",
608 help="Enable write permissions for 'nobody' and 'SPARQL' users. "
609 "This makes the database publicly writable. "
610 "Forces waiting for the container to be ready."
611 )
613 parser.add_argument(
614 "--estimated-db-size-gb",
615 type=float,
616 default=0,
617 help="Estimated database size in GB. If provided, MaxCheckpointRemap will be preconfigured "
618 "based on this estimate rather than measuring existing data."
619 )
621 parser.add_argument(
622 "--virtuoso-version",
623 default=None,
624 help="Virtuoso Docker image version/tag to use (e.g., 'latest', '7.2.11', '7.2.12'). If not specified, uses the default pinned version."
625 )
627 parser.add_argument(
628 "--virtuoso-sha",
629 default=None,
630 help="Virtuoso Docker image SHA256 digest to use (e.g., 'sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010'). Takes precedence over --virtuoso-version."
631 )
633 parser.add_argument(
634 "--parallel-threads",
635 type=int,
636 default=None,
637 help="Maximum parallel threads for query execution. "
638 "If not specified, uses all available CPU cores. "
639 "Sets AsyncQueueMaxThreads to cores * 1.5 and ThreadsPerQuery to cores."
640 )
642 args_temp, _ = parser.parse_known_args()
644 optimal_number_of_buffers, optimal_max_dirty_buffers = get_optimal_buffer_values(args_temp.memory)
646 parser.add_argument(
647 "--max-dirty-buffers",
648 type=int,
649 default=optimal_max_dirty_buffers,
650 help="Maximum dirty buffers before checkpoint (auto-calculated based on --memory value, requires integer)"
651 )
652 parser.add_argument(
653 "--number-of-buffers",
654 type=int,
655 default=optimal_number_of_buffers,
656 help="Number of buffers (auto-calculated based on --memory value, requires integer)"
657 )
659 return parser.parse_args()
662def check_docker_installed() -> bool:
663 """
664 Check if Docker is installed and accessible.
666 Returns:
667 bool: True if Docker is installed, False otherwise
668 """
669 try:
670 subprocess.run(
671 ["docker", "--version"],
672 stdout=subprocess.PIPE,
673 stderr=subprocess.PIPE,
674 check=True
675 )
676 return True
677 except (subprocess.SubprocessError, FileNotFoundError):
678 return False
681def check_container_exists(container_name: str) -> bool:
682 """
683 Check if a Docker container with the specified name exists.
685 Args:
686 container_name: Name of the container to check
688 Returns:
689 bool: True if container exists, False otherwise
690 """
691 result = subprocess.run(
692 ["docker", "ps", "-a", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"],
693 stdout=subprocess.PIPE,
694 stderr=subprocess.PIPE,
695 text=True
696 )
698 return container_name in result.stdout.strip()
701def remove_container(container_name: str) -> bool:
702 """
703 Remove a Docker container.
705 Args:
706 container_name: Name of the container to remove
708 Returns:
709 bool: True if container was removed successfully, False otherwise
710 """
711 try:
712 subprocess.run(
713 ["docker", "rm", "-f", container_name],
714 stdout=subprocess.PIPE,
715 stderr=subprocess.PIPE,
716 check=True
717 )
718 return True
719 except subprocess.SubprocessError:
720 return False
723def get_docker_image(version: str, sha: str) -> str:
724 """
725 Get the appropriate Docker image based on version or SHA parameter.
727 Args:
728 version: Version string (e.g., 'latest', '7.2.11', '7.2.12') or None for default
729 sha: SHA256 digest string or None
731 Returns:
732 str: Full Docker image reference
733 """
734 if sha is not None:
735 return f"openlink/virtuoso-opensource-7@{sha}"
736 elif version is None:
737 return DEFAULT_IMAGE
738 elif version == "latest":
739 return "openlink/virtuoso-opensource-7:latest"
740 else:
741 return f"openlink/virtuoso-opensource-7:{version}"
744def build_docker_run_command(args: argparse.Namespace) -> Tuple[List[str], List[str]]:
745 """
746 Build the Docker run command based on provided arguments.
748 Args:
749 args: Command-line arguments
751 Returns:
752 Tuple[List[str], List[str]]:
753 - Command parts for subprocess.run
754 - List of unique container paths intended for DirsAllowed
755 """
756 host_data_dir_abs = os.path.abspath(args.data_dir)
757 os.makedirs(host_data_dir_abs, exist_ok=True)
759 cmd = [DOCKER_EXEC_PATH, "run"]
761 cmd.extend(["--name", args.name])
763 # Add user mapping to run as the host user
764 try:
765 cmd.extend(["--user", f"{os.getuid()}:{os.getgid()}"])
766 except AttributeError:
767 print("Warning: os.getuid/os.getgid not available on this system (likely Windows). Skipping user mapping.", file=sys.stderr)
769 cmd.extend(["-p", f"{args.http_port}:8890"])
770 cmd.extend(["-p", f"{args.isql_port}:1111"])
772 if args.network:
773 cmd.extend(["--network", args.network])
775 # Ensure container_data_dir is absolute-like for consistency
776 container_data_dir_path = DEFAULT_CONTAINER_DATA_DIR
777 cmd.extend(["-v", f"{host_data_dir_abs}:{container_data_dir_path}"])
779 # Mount additional volumes
780 if args.extra_volumes:
781 for volume_spec in args.extra_volumes:
782 if ':' in volume_spec:
783 host_path, container_path = volume_spec.split(':', 1)
784 host_path_abs = os.path.abspath(host_path)
785 cmd.extend(["-v", f"{host_path_abs}:{container_path}"])
787 # Start with default Virtuoso paths
788 paths_to_allow_in_container = DEFAULT_DIRS_ALLOWED.copy()
789 paths_to_allow_in_container.add(container_data_dir_path)
791 # Add extra mounted volumes to paths_to_allow_in_container
792 if args.extra_volumes:
793 for volume_spec in args.extra_volumes:
794 if ':' in volume_spec:
795 _, container_path = volume_spec.split(':', 1)
796 container_path_abs = container_path if container_path.startswith('/') else '/' + container_path
797 paths_to_allow_in_container.add(container_path_abs)
798 print(f"Info: Adding mounted volume path '{container_path_abs}' to DirsAllowed.")
800 memory_bytes = parse_memory_value(args.memory)
801 reservation_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE)
802 reservation_str = bytes_to_docker_mem_str(reservation_bytes)
803 cmd.extend(["--memory-reservation", reservation_str])
804 cmd.extend(["--memory", args.memory])
805 if args.cpu_limit > 0:
806 cmd.extend(["--cpus", str(args.cpu_limit)])
808 env_vars = {
809 "DBA_PASSWORD": args.dba_password,
810 "VIRT_Parameters_ResultSetMaxRows": str(DEFAULT_MAX_ROWS),
811 "VIRT_SPARQL_DefaultQuery": "SELECT (COUNT(*) AS ?quadCount) WHERE { GRAPH ?g { ?s ?p ?o } }",
812 }
814 virt_env_vars = get_virt_env_vars(
815 memory=args.memory,
816 number_of_buffers=args.number_of_buffers,
817 max_dirty_buffers=args.max_dirty_buffers,
818 parallel_threads=args.parallel_threads,
819 estimated_db_size_gb=args.estimated_db_size_gb,
820 dirs_allowed=",".join(paths_to_allow_in_container),
821 )
822 env_vars.update(virt_env_vars)
824 for key, value in env_vars.items():
825 cmd.extend(["-e", f"{key}={value}"])
827 if args.detach:
828 cmd.append("-d")
830 # Ensure --rm is added if not running detached
831 if not args.detach:
832 cmd.insert(2, "--rm") # Insert after "docker run"
834 # Append image name
835 docker_image = get_docker_image(args.virtuoso_version, args.virtuoso_sha)
836 cmd.append(docker_image)
838 return cmd, paths_to_allow_in_container
841def wait_for_virtuoso_ready(
842 dba_password: str,
843 docker_container: str = None,
844 timeout: int = DEFAULT_WAIT_TIMEOUT,
845 poll_interval: int = 3,
846) -> bool:
847 print(f"Waiting for Virtuoso to be ready (timeout: {timeout}s)...")
848 start_time = time.time()
849 isql_args = create_isql_args(dba_password, docker_container)
851 while time.time() - start_time < timeout:
852 try:
853 success, _, stderr = run_isql_command(isql_args, sql_command="status();")
854 if success:
855 print("Virtuoso is ready.")
856 return True
857 if is_connection_error(stderr):
858 elapsed = int(time.time() - start_time)
859 if elapsed % 10 == 0:
860 print(f" Waiting for Virtuoso... ({elapsed}s elapsed)")
861 else:
862 print(f"ISQL check failed: {stderr}", file=sys.stderr)
863 return False
864 time.sleep(poll_interval)
865 except Exception as e:
866 print(f"Warning: Error in readiness check: {e}", file=sys.stderr)
867 time.sleep(poll_interval + 2)
869 print(f"Timeout ({timeout}s) waiting for Virtuoso.", file=sys.stderr)
870 return False
873def run_docker_command(cmd: List[str], capture_output=False, check=True, suppress_error=False):
874 """Helper to run Docker commands and handle errors."""
875 print(f"Executing: {' '.join(cmd)}")
876 try:
877 result = subprocess.run(
878 cmd,
879 stdout=subprocess.PIPE if capture_output else sys.stdout,
880 stderr=subprocess.PIPE if capture_output else sys.stderr,
881 text=True,
882 check=check
883 )
884 return result
885 except subprocess.CalledProcessError as e:
886 if not suppress_error:
887 print(f"Error executing Docker command: {e}", file=sys.stderr)
888 if capture_output:
889 print(f"Stderr: {e.stderr}", file=sys.stderr)
890 print(f"Stdout: {e.stdout}", file=sys.stderr)
891 raise
892 except FileNotFoundError:
893 if not suppress_error:
894 print("Error: 'docker' command not found. Make sure Docker is installed and in your PATH.", file=sys.stderr)
895 raise
898def grant_write_permissions(dba_password: str, docker_container: str = None) -> bool:
899 print("Granting write permissions...")
900 isql_args = create_isql_args(dba_password, docker_container)
902 success1, _, stderr1 = run_isql_command(
903 isql_args, sql_command="DB.DBA.RDF_DEFAULT_USER_PERMS_SET('nobody', 7);"
904 )
905 if success1:
906 print(" Set permissions for 'nobody' user.")
907 else:
908 print(f" Warning: Failed to set 'nobody' permissions: {stderr1}", file=sys.stderr)
910 success2, _, stderr2 = run_isql_command(
911 isql_args, sql_command="DB.DBA.USER_GRANT_ROLE('SPARQL', 'SPARQL_UPDATE');"
912 )
913 if success2:
914 print(" Granted SPARQL_UPDATE role to 'SPARQL' user.")
915 else:
916 print(f" Warning: Failed to grant SPARQL_UPDATE: {stderr2}", file=sys.stderr)
918 return success1 and success2
921def launch_virtuoso( # pragma: no cover
922 name: str = "virtuoso",
923 data_dir: str = "./virtuoso-data",
924 http_port: int = 8890,
925 isql_port: int = 1111,
926 memory: str = None,
927 dba_password: str = "dba",
928 detach: bool = True,
929 wait_ready: bool = True,
930 enable_write_permissions: bool = False,
931 force_remove: bool = False,
932 extra_volumes: list = None,
933 network: str = None,
934 cpu_limit: float = 0,
935 virtuoso_version: str = None,
936 virtuoso_sha: str = None,
937 estimated_db_size_gb: float = 0,
938 parallel_threads: int = None,
939) -> None:
940 """
941 Launch Virtuoso Docker container.
943 Args:
944 name: Container name
945 data_dir: Host directory for Virtuoso data
946 http_port: HTTP port to expose
947 isql_port: ISQL port to expose
948 memory: Memory limit (e.g., "4g"). Auto-calculated from host RAM if None.
949 dba_password: DBA password
950 detach: Run in detached mode
951 wait_ready: Wait for Virtuoso to be ready
952 enable_write_permissions: Enable SPARQL write permissions for 'nobody' and 'SPARQL' users
953 force_remove: Force remove existing container with same name
954 extra_volumes: Additional volumes to mount (list of "host:container" strings)
955 network: Docker network to connect
956 cpu_limit: CPU limit (0 = no limit)
957 virtuoso_version: Docker image version tag
958 virtuoso_sha: Docker image SHA digest (takes precedence over version)
959 estimated_db_size_gb: Estimated DB size for MaxCheckpointRemap config
960 parallel_threads: Max parallel threads for query execution. If None, uses all CPU cores.
962 Raises:
963 RuntimeError: If Docker is not installed or launch fails
964 """
965 if not check_docker_installed():
966 raise RuntimeError("Docker command not found. Please install Docker.")
968 if memory is None:
969 if psutil:
970 try:
971 total_host_ram = psutil.virtual_memory().total
972 default_mem_bytes = max(int(total_host_ram * (2/3)), 1 * 1024**3)
973 memory = bytes_to_docker_mem_str(default_mem_bytes)
974 except Exception:
975 memory = "2g"
976 else:
977 memory = "2g"
979 number_of_buffers, max_dirty_buffers = get_optimal_buffer_values(memory)
981 args = argparse.Namespace(
982 name=name,
983 data_dir=data_dir,
984 http_port=http_port,
985 isql_port=isql_port,
986 memory=memory,
987 dba_password=dba_password,
988 detach=detach,
989 wait_ready=wait_ready,
990 enable_write_permissions=enable_write_permissions,
991 force_remove=force_remove,
992 extra_volumes=extra_volumes,
993 network=network,
994 cpu_limit=cpu_limit,
995 virtuoso_version=virtuoso_version,
996 virtuoso_sha=virtuoso_sha,
997 estimated_db_size_gb=estimated_db_size_gb,
998 number_of_buffers=number_of_buffers,
999 max_dirty_buffers=max_dirty_buffers,
1000 parallel_threads=parallel_threads,
1001 )
1003 host_data_dir_abs = os.path.abspath(data_dir)
1004 ini_file_path = os.path.join(host_data_dir_abs, "virtuoso.ini")
1006 docker_cmd, unique_paths_to_allow = build_docker_run_command(args)
1007 dirs_allowed_str = ",".join(unique_paths_to_allow) if unique_paths_to_allow else None
1009 threading = calculate_threading_config(parallel_threads)
1010 max_query_mem_value = calculate_max_query_mem(memory, number_of_buffers)
1012 update_ini_memory_settings(
1013 ini_path=ini_file_path,
1014 data_dir_path=host_data_dir_abs,
1015 number_of_buffers=number_of_buffers,
1016 max_dirty_buffers=max_dirty_buffers,
1017 dirs_allowed=dirs_allowed_str,
1018 async_queue_max_threads=threading["async_queue_max_threads"],
1019 threads_per_query=threading["threads_per_query"],
1020 max_client_connections=threading["max_client_connections"],
1021 adjust_vector_size=0,
1022 vector_size=1000,
1023 checkpoint_interval=1,
1024 max_query_mem=max_query_mem_value,
1025 http_server_threads=threading["max_client_connections"],
1026 thread_cleanup_interval=1,
1027 resources_cleanup_interval=1,
1028 )
1030 if check_container_exists(name):
1031 result = subprocess.run(
1032 [DOCKER_EXEC_PATH, "ps", "--filter", f"name=^{name}$", "--format", "{{.Status}}"],
1033 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
1034 )
1035 is_running = "Up" in result.stdout
1037 if force_remove:
1038 print(f"Container '{name}' already exists. Forcing removal...")
1039 if not remove_container(name):
1040 raise RuntimeError(f"Failed to remove existing container '{name}'")
1041 elif is_running:
1042 raise RuntimeError(f"Container '{name}' is already running. Stop it first or use force_remove=True.")
1043 else:
1044 print(f"Container '{name}' exists but is stopped. Removing...")
1045 if not remove_container(name):
1046 raise RuntimeError(f"Failed to remove existing stopped container '{name}'")
1048 try:
1049 run_docker_command(docker_cmd, check=not detach)
1051 should_wait = wait_ready or enable_write_permissions
1053 if detach and should_wait:
1054 print("Waiting for Virtuoso readiness...")
1055 ready = wait_for_virtuoso_ready(dba_password, docker_container=name)
1056 if not ready:
1057 raise RuntimeError("Virtuoso readiness check timed out or failed.")
1059 if enable_write_permissions:
1060 if not grant_write_permissions(dba_password, docker_container=name):
1061 print("Warning: One or more commands to enable write permissions failed.", file=sys.stderr)
1063 print(f"Virtuoso launched successfully on http://localhost:{http_port}/sparql")
1065 except subprocess.CalledProcessError as e:
1066 if detach and check_container_exists(name):
1067 run_docker_command([DOCKER_EXEC_PATH, "stop", name], suppress_error=True, check=False)
1068 run_docker_command([DOCKER_EXEC_PATH, "rm", name], suppress_error=True, check=False)
1069 raise RuntimeError(f"Virtuoso launch failed: {e}")
1070 except FileNotFoundError:
1071 raise RuntimeError("Docker command not found.")
1074def main() -> int: # pragma: no cover
1075 """
1076 CLI entry point that parses arguments and calls launch_virtuoso().
1077 """
1078 args = parse_arguments()
1080 try:
1081 launch_virtuoso(
1082 name=args.name,
1083 data_dir=args.data_dir,
1084 http_port=args.http_port,
1085 isql_port=args.isql_port,
1086 memory=args.memory,
1087 dba_password=args.dba_password,
1088 detach=args.detach,
1089 wait_ready=args.wait_ready,
1090 enable_write_permissions=args.enable_write_permissions,
1091 force_remove=args.force_remove,
1092 extra_volumes=args.extra_volumes,
1093 network=args.network,
1094 cpu_limit=args.cpu_limit,
1095 virtuoso_version=args.virtuoso_version,
1096 virtuoso_sha=args.virtuoso_sha,
1097 estimated_db_size_gb=args.estimated_db_size_gb,
1098 parallel_threads=args.parallel_threads,
1099 )
1100 return 0
1101 except RuntimeError as e:
1102 print(f"Error: {e}", file=sys.stderr)
1103 return 1
1106if __name__ == "__main__": # pragma: no cover
1107 sys.exit(main())