Coverage for virtuoso_utilities / launch_virtuoso.py: 74%

407 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-04-14 09:16 +0000

1#!/usr/bin/env python3 

2 

3# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7""" 

8Virtuoso Docker Launcher 

9 

10This script launches an OpenLink Virtuoso database instance using Docker. 

11Configuration parameters can be customized through command-line arguments. 

12""" 

13 

14import argparse 

15import configparser 

16import os 

17import re 

18import subprocess 

19import sys 

20import time 

21from typing import List, Tuple 

22 

23import psutil 

24 

25DEFAULT_WAIT_TIMEOUT = 120 

26DOCKER_EXEC_PATH = "docker" 

27DOCKER_ISQL_PATH_INSIDE_CONTAINER = "isql" 

28 

29# Default values for container configuration 

30DEFAULT_IMAGE = "openlink/virtuoso-opensource-7@sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010" 

31DEFAULT_CONTAINER_DATA_DIR = "/opt/virtuoso-opensource/database" 

32DEFAULT_MAX_ROWS = 100000 

33 

34VIRTUOSO_MEMORY_PERCENTAGE = 0.85 

35BYTES_PER_BUFFER = 8700 # Each buffer occupies ~8700 bytes (8K page + overhead) according to https://docs.openlinksw.com/virtuoso/ch-server/ 

36 

37from virtuoso_utilities.isql_helpers import run_isql_command 

38 

39# Minimum database size in bytes to trigger MaxCheckpointRemap calculation 

40MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB = 1 

41MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP = MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB * 1024**3 

42 

43# Default directories allowed in Virtuoso 

44DEFAULT_DIRS_ALLOWED = {".", "../vad", "/usr/share/proj", "../virtuoso_input"} 

45 

46# Connection error patterns for retry logic 

47CONNECTION_ERROR_PATTERNS = [ 

48 "connection refused", 

49 "connect failed", 

50 "connection failed", 

51 "cannot connect", 

52 "no route to host", 

53] 

54 

55 

56def bytes_to_docker_mem_str(num_bytes: int) -> str: 

57 """ 

58 Convert a number of bytes to a Docker memory string (e.g., "85g", "512m"). 

59 Tries to find the largest unit (G, M, K) without losing precision for integers. 

60 """ 

61 if num_bytes % (1024**3) == 0: 

62 return f"{num_bytes // (1024**3)}g" 

63 elif num_bytes % (1024**2) == 0: 

64 return f"{num_bytes // (1024**2)}m" 

65 elif num_bytes % 1024 == 0: 

66 return f"{num_bytes // 1024}k" 

67 else: 

68 # Fallback for non-exact multiples (shouldn't happen often with RAM) 

69 # Prefer GiB for consistency 

70 gb_val = num_bytes / (1024**3) 

71 return f"{int(gb_val)}g" 

72 

73 

74def parse_memory_value(memory_str: str) -> int: 

75 """ 

76 Parse memory value from Docker memory format (e.g., "2g", "4096m") to bytes. 

77  

78 Args: 

79 memory_str: Memory string in Docker format 

80  

81 Returns: 

82 int: Memory size in bytes 

83 """ 

84 memory_str = memory_str.lower() 

85 

86 match = re.match(r'^(\d+)([kmg]?)$', memory_str) 

87 if not match: 

88 # Default to 2GB if parsing fails 

89 print(f"Warning: Could not parse memory string '{memory_str}'. Defaulting to 2g.", file=sys.stderr) 

90 return 2 * 1024 * 1024 * 1024 

91 

92 value, unit = match.groups() 

93 value = int(value) 

94 

95 if unit == 'k': 

96 return value * 1024 

97 elif unit == 'm': 

98 return value * 1024 * 1024 

99 elif unit == 'g': 

100 return value * 1024 * 1024 * 1024 

101 else: # No unit, assume bytes 

102 return value 

103 

104 

105def get_directory_size(directory_path: str) -> int: 

106 """ 

107 Calculate the total size of all files within a directory. 

108 

109 Args: 

110 directory_path: The path to the directory. 

111 

112 Returns: 

113 Total size in bytes. 

114 """ 

115 total_size = 0 

116 if not os.path.isdir(directory_path): 

117 return 0 

118 try: 

119 for dirpath, dirnames, filenames in os.walk(directory_path): 

120 for f in filenames: 

121 fp = os.path.join(dirpath, f) 

122 # skip if it is symbolic link 

123 if not os.path.islink(fp): 

124 try: 

125 total_size += os.path.getsize(fp) 

126 except OSError as e: 

127 print(f"Warning: Could not get size of file '{fp}': {e}", file=sys.stderr) 

128 except OSError as e: 

129 print(f"Warning: Could not walk directory '{directory_path}': {e}", file=sys.stderr) 

130 

131 return total_size 

132 

133 

134def get_optimal_buffer_values(memory_limit: str) -> Tuple[int, int]: 

135 """ 

136 Determine optimal values for NumberOfBuffers and MaxDirtyBuffers 

137 based on the specified container memory limit. 

138 

139 Uses the formula recommended by OpenLink: 

140 NumberOfBuffers = (MemoryInBytes * VIRTUOSO_MEMORY_PERCENTAGE * 0.66) / 8000 

141 MaxDirtyBuffers = NumberOfBuffers * 0.75 

142 

143 The memory_limit is reduced by VIRTUOSO_MEMORY_PERCENTAGE to leave 

144 headroom for Virtuoso process overhead and prevent container OOM crashes. 

145 

146 Args: 

147 memory_limit: Memory limit string in Docker format (e.g., "2g", "4096m") 

148 

149 Returns: 

150 Tuple[int, int]: Calculated values for NumberOfBuffers and MaxDirtyBuffers 

151 """ 

152 try: 

153 memory_bytes = parse_memory_value(memory_limit) 

154 

155 memory_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE) 

156 

157 number_of_buffers = int((memory_bytes * 0.66) / BYTES_PER_BUFFER) 

158 

159 max_dirty_buffers = int(number_of_buffers * 0.75) 

160 

161 return number_of_buffers, max_dirty_buffers 

162 

163 except Exception as e: 

164 print(f"Warning: Error calculating buffer values: {e}. Using default values.", file=sys.stderr) 

165 # Default values approximately suitable for 1-2GB RAM if calculation fails 

166 return 170000, 130000 

167 

168 

169def calculate_max_checkpoint_remap(size_bytes: int) -> int: 

170 return int(size_bytes / 8192 / 4) 

171 

172 

173def get_default_memory() -> str: 

174 try: 

175 total_ram = psutil.virtual_memory().total 

176 default_mem = max(int(total_ram * (2 / 3)), 1 * 1024**3) 

177 return bytes_to_docker_mem_str(default_mem) 

178 except Exception: 

179 return "2g" 

180 

181 

182def calculate_threading_config(parallel_threads=None): 

183 cpu_cores = parallel_threads if parallel_threads else (os.cpu_count() or 1) 

184 return { 

185 "async_queue_max_threads": int(cpu_cores * 1.5), 

186 "threads_per_query": cpu_cores, 

187 "max_client_connections": cpu_cores * 2, 

188 } 

189 

190 

191def calculate_max_query_mem(memory, number_of_buffers): 

192 buffer_memory_bytes = number_of_buffers * BYTES_PER_BUFFER 

193 effective_memory_bytes = int(parse_memory_value(memory) * VIRTUOSO_MEMORY_PERCENTAGE) 

194 max_query_mem_bytes = int((effective_memory_bytes - buffer_memory_bytes) * 0.8) 

195 if max_query_mem_bytes > 0: 

196 return bytes_to_docker_mem_str(max_query_mem_bytes) 

197 return None 

198 

199 

200def get_virt_env_vars(memory, number_of_buffers, max_dirty_buffers, parallel_threads, estimated_db_size_gb=0.0, dirs_allowed=None): 

201 env_vars = {} 

202 env_vars["VIRT_Parameters_NumberOfBuffers"] = str(number_of_buffers) 

203 env_vars["VIRT_Parameters_MaxDirtyBuffers"] = str(max_dirty_buffers) 

204 

205 threading = calculate_threading_config(parallel_threads) 

206 env_vars["VIRT_Parameters_AsyncQueueMaxThreads"] = str(threading["async_queue_max_threads"]) 

207 env_vars["VIRT_Parameters_ThreadsPerQuery"] = str(threading["threads_per_query"]) 

208 env_vars["VIRT_Parameters_MaxClientConnections"] = str(threading["max_client_connections"]) 

209 env_vars["VIRT_HTTPServer_ServerThreads"] = str(threading["max_client_connections"]) 

210 

211 env_vars["VIRT_Parameters_AdjustVectorSize"] = "0" 

212 env_vars["VIRT_Parameters_VectorSize"] = "1000" 

213 env_vars["VIRT_Parameters_CheckpointInterval"] = "1" 

214 env_vars["VIRT_Parameters_ThreadCleanupInterval"] = "1" 

215 env_vars["VIRT_Parameters_ResourcesCleanupInterval"] = "1" 

216 

217 max_query_mem = calculate_max_query_mem(memory, number_of_buffers) 

218 max_query_mem_str = max_query_mem if max_query_mem else "N/A" 

219 if max_query_mem: 

220 env_vars["VIRT_Parameters_MaxQueryMem"] = max_query_mem 

221 

222 env_vars["VIRT_Client_SQL_QUERY_TIMEOUT"] = "0" 

223 env_vars["VIRT_Client_SQL_TXN_TIMEOUT"] = "0" 

224 

225 if estimated_db_size_gb > 0: 

226 estimated_size_bytes = int(estimated_db_size_gb * 1024**3) 

227 if estimated_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP: 

228 max_checkpoint_remap = calculate_max_checkpoint_remap(estimated_size_bytes) 

229 env_vars["VIRT_Database_MaxCheckpointRemap"] = str(max_checkpoint_remap) 

230 env_vars["VIRT_TempDatabase_MaxCheckpointRemap"] = str(max_checkpoint_remap) 

231 print(f"Info: Using estimated database size of {estimated_db_size_gb} GB to set MaxCheckpointRemap to {max_checkpoint_remap}") 

232 

233 if dirs_allowed: 

234 env_vars["VIRT_Parameters_DirsAllowed"] = dirs_allowed 

235 

236 print(f"Info: Threading: AsyncQueueMaxThreads={threading['async_queue_max_threads']}, " 

237 f"ThreadsPerQuery={threading['threads_per_query']}, " 

238 f"MaxClientConnections={threading['max_client_connections']}") 

239 print(f"Info: MaxQueryMem={max_query_mem_str}, AdjustVectorSize=0, VectorSize=1000, CheckpointInterval=1, ThreadCleanupInterval=1, ResourcesCleanupInterval=1") 

240 

241 return env_vars 

242 

243 

244def is_connection_error(stderr): 

245 stderr_lower = stderr.lower() 

246 return any(err in stderr_lower for err in CONNECTION_ERROR_PATTERNS) 

247 

248 

249def create_isql_args(dba_password, docker_container=None): 

250 if docker_container: 

251 return argparse.Namespace( 

252 host="localhost", 

253 port=1111, 

254 user="dba", 

255 password=dba_password, 

256 docker_container=docker_container, 

257 docker_path=DOCKER_EXEC_PATH, 

258 docker_isql_path=DOCKER_ISQL_PATH_INSIDE_CONTAINER, 

259 isql_path=None, 

260 ) 

261 return argparse.Namespace( 

262 host="localhost", 

263 port=1111, 

264 user="dba", 

265 password=dba_password, 

266 docker_container=None, 

267 docker_path=None, 

268 docker_isql_path=None, 

269 isql_path="isql", 

270 ) 

271 

272 

273def update_ini_memory_settings( 

274 ini_path: str, 

275 data_dir_path: str, 

276 number_of_buffers: int = None, 

277 max_dirty_buffers: int = None, 

278 dirs_allowed: str = None, 

279 async_queue_max_threads: int = None, 

280 threads_per_query: int = None, 

281 max_client_connections: int = None, 

282 adjust_vector_size: int = None, 

283 vector_size: int = None, 

284 checkpoint_interval: int = None, 

285 max_query_mem: str = None, 

286 http_server_threads: int = None, 

287 thread_cleanup_interval: int = None, 

288 resources_cleanup_interval: int = None, 

289): 

290 if not os.path.exists(ini_path): 

291 print(f"Info: virtuoso.ini not found at '{ini_path}'. Likely first run. Skipping settings update.") 

292 return 

293 

294 print(f"Info: Checking existing virtuoso.ini at '{ini_path}' for settings update...") 

295 actual_db_size_bytes = get_directory_size(data_dir_path) 

296 

297 # Calculate MaxCheckpointRemap if database is large enough 

298 calculate_remap = actual_db_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP 

299 calculated_remap_value = calculate_max_checkpoint_remap(actual_db_size_bytes) if calculate_remap else None 

300 

301 config = configparser.ConfigParser(interpolation=None, strict=False) 

302 config.optionxform = str # Keep case sensitivity 

303 made_changes = False 

304 try: 

305 # Read with UTF-8, ignore errors initially if file has issues 

306 config.read(ini_path, encoding='utf-8') 

307 

308 # Update [Parameters] section for buffer settings and DirsAllowed 

309 if not config.has_section('Parameters'): 

310 config.add_section('Parameters') 

311 print(f"Info: Added [Parameters] section to '{ini_path}'.") 

312 

313 # Update NumberOfBuffers if provided 

314 if number_of_buffers is not None: 

315 current_number_of_buffers = config.get('Parameters', 'NumberOfBuffers', fallback=None) 

316 number_of_buffers_str = str(number_of_buffers) 

317 if current_number_of_buffers != number_of_buffers_str: 

318 config.set('Parameters', 'NumberOfBuffers', number_of_buffers_str) 

319 print(f"Info: Updating [Parameters] NumberOfBuffers from '{current_number_of_buffers}' to '{number_of_buffers_str}' in '{ini_path}'.") 

320 made_changes = True 

321 

322 # Ensure [Client] section has SQL timeouts set to 0 

323 if not config.has_section('Client'): 

324 config.add_section('Client') 

325 print(f"Info: Added [Client] section to '{ini_path}'.") 

326 

327 current_sql_query_timeout = config.get('Client', 'SQL_QUERY_TIMEOUT', fallback=None) 

328 if current_sql_query_timeout != '0': 

329 config.set('Client', 'SQL_QUERY_TIMEOUT', '0') 

330 print(f"Info: Setting [Client] SQL_QUERY_TIMEOUT to '0' in '{ini_path}'.") 

331 made_changes = True 

332 

333 current_sql_txn_timeout = config.get('Client', 'SQL_TXN_TIMEOUT', fallback=None) 

334 if current_sql_txn_timeout != '0': 

335 config.set('Client', 'SQL_TXN_TIMEOUT', '0') 

336 print(f"Info: Setting [Client] SQL_TXN_TIMEOUT to '0' in '{ini_path}'.") 

337 made_changes = True 

338 

339 # Update MaxDirtyBuffers if provided 

340 if max_dirty_buffers is not None: 

341 current_max_dirty_buffers = config.get('Parameters', 'MaxDirtyBuffers', fallback=None) 

342 max_dirty_buffers_str = str(max_dirty_buffers) 

343 if current_max_dirty_buffers != max_dirty_buffers_str: 

344 config.set('Parameters', 'MaxDirtyBuffers', max_dirty_buffers_str) 

345 print(f"Info: Updating [Parameters] MaxDirtyBuffers from '{current_max_dirty_buffers}' to '{max_dirty_buffers_str}' in '{ini_path}'.") 

346 made_changes = True 

347 

348 if dirs_allowed is not None: 

349 current_dirs_allowed = config.get('Parameters', 'DirsAllowed', fallback=None) 

350 def normalize_dirs(val): 

351 if val is None: 

352 return set() 

353 return set([x.strip() for x in val.split(',') if x.strip()]) 

354 if normalize_dirs(current_dirs_allowed) != normalize_dirs(dirs_allowed): 

355 config.set('Parameters', 'DirsAllowed', dirs_allowed) 

356 print(f"Info: Updating [Parameters] DirsAllowed from '{current_dirs_allowed}' to '{dirs_allowed}' in '{ini_path}'.") 

357 made_changes = True 

358 

359 if async_queue_max_threads is not None: 

360 current_val = config.get('Parameters', 'AsyncQueueMaxThreads', fallback=None) 

361 new_val = str(async_queue_max_threads) 

362 if current_val != new_val: 

363 config.set('Parameters', 'AsyncQueueMaxThreads', new_val) 

364 print(f"Info: Updating [Parameters] AsyncQueueMaxThreads from '{current_val}' to '{new_val}' in '{ini_path}'.") 

365 made_changes = True 

366 

367 if threads_per_query is not None: 

368 current_val = config.get('Parameters', 'ThreadsPerQuery', fallback=None) 

369 new_val = str(threads_per_query) 

370 if current_val != new_val: 

371 config.set('Parameters', 'ThreadsPerQuery', new_val) 

372 print(f"Info: Updating [Parameters] ThreadsPerQuery from '{current_val}' to '{new_val}' in '{ini_path}'.") 

373 made_changes = True 

374 

375 if max_client_connections is not None: 

376 current_val = config.get('Parameters', 'MaxClientConnections', fallback=None) 

377 new_val = str(max_client_connections) 

378 if current_val != new_val: 

379 config.set('Parameters', 'MaxClientConnections', new_val) 

380 print(f"Info: Updating [Parameters] MaxClientConnections from '{current_val}' to '{new_val}' in '{ini_path}'.") 

381 made_changes = True 

382 

383 if adjust_vector_size is not None: 

384 current_val = config.get('Parameters', 'AdjustVectorSize', fallback=None) 

385 new_val = str(adjust_vector_size) 

386 if current_val != new_val: 

387 config.set('Parameters', 'AdjustVectorSize', new_val) 

388 print(f"Info: Updating [Parameters] AdjustVectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.") 

389 made_changes = True 

390 

391 if vector_size is not None: 

392 current_val = config.get('Parameters', 'VectorSize', fallback=None) 

393 new_val = str(vector_size) 

394 if current_val != new_val: 

395 config.set('Parameters', 'VectorSize', new_val) 

396 print(f"Info: Updating [Parameters] VectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.") 

397 made_changes = True 

398 

399 if checkpoint_interval is not None: 

400 current_val = config.get('Parameters', 'CheckpointInterval', fallback=None) 

401 new_val = str(checkpoint_interval) 

402 if current_val != new_val: 

403 config.set('Parameters', 'CheckpointInterval', new_val) 

404 print(f"Info: Updating [Parameters] CheckpointInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

405 made_changes = True 

406 

407 if max_query_mem is not None: 

408 current_val = config.get('Parameters', 'MaxQueryMem', fallback=None) 

409 if current_val != max_query_mem: 

410 config.set('Parameters', 'MaxQueryMem', max_query_mem) 

411 print(f"Info: Updating [Parameters] MaxQueryMem from '{current_val}' to '{max_query_mem}' in '{ini_path}'.") 

412 made_changes = True 

413 

414 if http_server_threads is not None: 

415 if not config.has_section('HTTPServer'): 

416 config.add_section('HTTPServer') 

417 print(f"Info: Added [HTTPServer] section to '{ini_path}'.") 

418 current_val = config.get('HTTPServer', 'ServerThreads', fallback=None) 

419 new_val = str(http_server_threads) 

420 if current_val != new_val: 

421 config.set('HTTPServer', 'ServerThreads', new_val) 

422 print(f"Info: Updating [HTTPServer] ServerThreads from '{current_val}' to '{new_val}' in '{ini_path}'.") 

423 made_changes = True 

424 

425 if thread_cleanup_interval is not None: 

426 current_val = config.get('Parameters', 'ThreadCleanupInterval', fallback=None) 

427 new_val = str(thread_cleanup_interval) 

428 if current_val != new_val: 

429 config.set('Parameters', 'ThreadCleanupInterval', new_val) 

430 print(f"Info: Updating [Parameters] ThreadCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

431 made_changes = True 

432 

433 if resources_cleanup_interval is not None: 

434 current_val = config.get('Parameters', 'ResourcesCleanupInterval', fallback=None) 

435 new_val = str(resources_cleanup_interval) 

436 if current_val != new_val: 

437 config.set('Parameters', 'ResourcesCleanupInterval', new_val) 

438 print(f"Info: Updating [Parameters] ResourcesCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

439 made_changes = True 

440 

441 # Update MaxCheckpointRemap if database is large enough 

442 if calculate_remap: 

443 # Update [Database] section 

444 if not config.has_section('Database'): 

445 config.add_section('Database') 

446 print(f"Info: Added [Database] section to '{ini_path}'.") 

447 

448 current_db_remap = config.get('Database', 'MaxCheckpointRemap', fallback=None) 

449 calculated_remap_str = str(calculated_remap_value) 

450 if current_db_remap != calculated_remap_str: 

451 config.set('Database', 'MaxCheckpointRemap', calculated_remap_str) 

452 print(f"Info: Updating [Database] MaxCheckpointRemap from '{current_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.") 

453 made_changes = True 

454 

455 # Update [TempDatabase] section 

456 if not config.has_section('TempDatabase'): 

457 config.add_section('TempDatabase') 

458 print(f"Info: Added [TempDatabase] section to '{ini_path}'.") 

459 

460 current_temp_db_remap = config.get('TempDatabase', 'MaxCheckpointRemap', fallback=None) 

461 if current_temp_db_remap != calculated_remap_str: 

462 config.set('TempDatabase', 'MaxCheckpointRemap', calculated_remap_str) 

463 print(f"Info: Updating [TempDatabase] MaxCheckpointRemap from '{current_temp_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.") 

464 made_changes = True 

465 else: 

466 print(f"Info: Host data directory '{data_dir_path}' size ({actual_db_size_bytes / (1024**3):.2f} GiB) is below threshold ({MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB} GiB). No changes made to MaxCheckpointRemap in virtuoso.ini.") 

467 

468 if made_changes: 

469 # Write changes back with UTF-8 encoding 

470 with open(ini_path, 'w', encoding='utf-8') as configfile: 

471 config.write(configfile) 

472 print(f"Info: Successfully saved changes to '{ini_path}'.") 

473 else: 

474 print(f"Info: No changes needed in '{ini_path}'.") 

475 

476 except configparser.Error as e: 

477 print(f"Error: Failed to parse or update virtuoso.ini at '{ini_path}': {e}", file=sys.stderr) 

478 except IOError as e: 

479 print(f"Error: Failed to read or write virtuoso.ini at '{ini_path}': {e}", file=sys.stderr) 

480 except Exception as e: 

481 print(f"Error: An unexpected error occurred while updating virtuoso.ini: {e}", file=sys.stderr) 

482 

483 

484def parse_arguments() -> argparse.Namespace: # pragma: no cover 

485 """ 

486 Parse command-line arguments for Virtuoso Docker launcher. 

487  

488 Returns: 

489 argparse.Namespace: Parsed command-line arguments 

490 """ 

491 # First create a parser for a preliminary parse to check if --memory is provided 

492 preliminary_parser = argparse.ArgumentParser(add_help=False) 

493 preliminary_parser.add_argument("--memory", default=None) 

494 preliminary_args, _ = preliminary_parser.parse_known_args() 

495 memory_specified = preliminary_args.memory is not None 

496 

497 # Full parser with all arguments 

498 parser = argparse.ArgumentParser( 

499 description="Launch a Virtuoso database using Docker", 

500 formatter_class=argparse.ArgumentDefaultsHelpFormatter 

501 ) 

502 

503 # --- Calculate default memory based on host RAM (2/3) --- 

504 default_memory_str = "2g" # Fallback default 

505 if psutil and not memory_specified: 

506 try: 

507 total_host_ram = psutil.virtual_memory().total 

508 # Calculate 2/3 of total RAM in bytes 

509 default_mem_bytes = int(total_host_ram * (2/3)) 

510 # Ensure at least 1GB is allocated as a minimum default 

511 min_default_bytes = 1 * 1024 * 1024 * 1024 

512 if default_mem_bytes < min_default_bytes: 

513 default_mem_bytes = min_default_bytes 

514 

515 default_memory_str = bytes_to_docker_mem_str(default_mem_bytes) 

516 print(f"Info: Detected {total_host_ram / (1024**3):.1f} GiB total host RAM. " 

517 f"Setting default container memory limit to {default_memory_str} (approx. 2/3). " 

518 f"Use --memory to override.") 

519 except Exception as e: 

520 print(f"Warning: Could not auto-detect host RAM using psutil: {e}. " 

521 f"Falling back to default memory limit '{default_memory_str}'.", file=sys.stderr) 

522 elif psutil and memory_specified: 

523 # Silently use the user-specified value 

524 pass 

525 else: 

526 print(f"Warning: psutil not found. Cannot auto-detect host RAM. " 

527 f"Falling back to default memory limit '{default_memory_str}'. " 

528 f"Install psutil for automatic calculation.", file=sys.stderr) 

529 

530 parser.add_argument( 

531 "--name", 

532 default="virtuoso", 

533 help="Name for the Docker container" 

534 ) 

535 parser.add_argument( 

536 "--http-port", 

537 type=int, 

538 default=8890, 

539 help="HTTP port to expose Virtuoso on" 

540 ) 

541 parser.add_argument( 

542 "--isql-port", 

543 type=int, 

544 default=1111, 

545 help="ISQL port to expose Virtuoso on" 

546 ) 

547 

548 parser.add_argument( 

549 "--data-dir", 

550 default="./virtuoso-data", 

551 help="Host directory to mount as Virtuoso data directory" 

552 ) 

553 

554 parser.add_argument( 

555 "--mount-volume", 

556 action="append", 

557 dest="extra_volumes", 

558 metavar="HOST_PATH:CONTAINER_PATH", 

559 help="Mount an additional host directory into the container. " 

560 "Format: /path/on/host:/path/in/container. " 

561 "Can be specified multiple times." 

562 ) 

563 

564 parser.add_argument( 

565 "--memory", 

566 default=default_memory_str, 

567 help="Memory limit for the container (e.g., 2g, 4g). " 

568 f"Defaults to approx. 2/3 of host RAM if psutil is installed, otherwise '{default_memory_str}'." 

569 ) 

570 parser.add_argument( 

571 "--cpu-limit", 

572 type=float, 

573 default=0, 

574 help="CPU limit for the container (0 means no limit)" 

575 ) 

576 

577 parser.add_argument( 

578 "--dba-password", 

579 default="dba", 

580 help="Password for the Virtuoso dba user" 

581 ) 

582 

583 parser.add_argument( 

584 "--force-remove", 

585 action="store_true", 

586 help="Force removal of existing container with the same name" 

587 ) 

588 

589 parser.add_argument( 

590 "--network", 

591 help="Docker network to connect the container to (must be a pre-existing network)" 

592 ) 

593 

594 parser.add_argument( 

595 "--wait-ready", 

596 action="store_true", 

597 help="Wait until Virtuoso is ready to accept connections" 

598 ) 

599 parser.add_argument( 

600 "--detach", 

601 action="store_true", 

602 help="Run container in detached mode" 

603 ) 

604 

605 parser.add_argument( 

606 "--enable-write-permissions", 

607 action="store_true", 

608 help="Enable write permissions for 'nobody' and 'SPARQL' users. " 

609 "This makes the database publicly writable. " 

610 "Forces waiting for the container to be ready." 

611 ) 

612 

613 parser.add_argument( 

614 "--estimated-db-size-gb", 

615 type=float, 

616 default=0, 

617 help="Estimated database size in GB. If provided, MaxCheckpointRemap will be preconfigured " 

618 "based on this estimate rather than measuring existing data." 

619 ) 

620 

621 parser.add_argument( 

622 "--virtuoso-version", 

623 default=None, 

624 help="Virtuoso Docker image version/tag to use (e.g., 'latest', '7.2.11', '7.2.12'). If not specified, uses the default pinned version." 

625 ) 

626 

627 parser.add_argument( 

628 "--virtuoso-sha", 

629 default=None, 

630 help="Virtuoso Docker image SHA256 digest to use (e.g., 'sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010'). Takes precedence over --virtuoso-version." 

631 ) 

632 

633 parser.add_argument( 

634 "--parallel-threads", 

635 type=int, 

636 default=None, 

637 help="Maximum parallel threads for query execution. " 

638 "If not specified, uses all available CPU cores. " 

639 "Sets AsyncQueueMaxThreads to cores * 1.5 and ThreadsPerQuery to cores." 

640 ) 

641 

642 args_temp, _ = parser.parse_known_args() 

643 

644 optimal_number_of_buffers, optimal_max_dirty_buffers = get_optimal_buffer_values(args_temp.memory) 

645 

646 parser.add_argument( 

647 "--max-dirty-buffers", 

648 type=int, 

649 default=optimal_max_dirty_buffers, 

650 help="Maximum dirty buffers before checkpoint (auto-calculated based on --memory value, requires integer)" 

651 ) 

652 parser.add_argument( 

653 "--number-of-buffers", 

654 type=int, 

655 default=optimal_number_of_buffers, 

656 help="Number of buffers (auto-calculated based on --memory value, requires integer)" 

657 ) 

658 

659 return parser.parse_args() 

660 

661 

662def check_docker_installed() -> bool: 

663 """ 

664 Check if Docker is installed and accessible. 

665  

666 Returns: 

667 bool: True if Docker is installed, False otherwise 

668 """ 

669 try: 

670 subprocess.run( 

671 ["docker", "--version"], 

672 stdout=subprocess.PIPE, 

673 stderr=subprocess.PIPE, 

674 check=True 

675 ) 

676 return True 

677 except (subprocess.SubprocessError, FileNotFoundError): 

678 return False 

679 

680 

681def check_container_exists(container_name: str) -> bool: 

682 """ 

683 Check if a Docker container with the specified name exists. 

684  

685 Args: 

686 container_name: Name of the container to check 

687  

688 Returns: 

689 bool: True if container exists, False otherwise 

690 """ 

691 result = subprocess.run( 

692 ["docker", "ps", "-a", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"], 

693 stdout=subprocess.PIPE, 

694 stderr=subprocess.PIPE, 

695 text=True 

696 ) 

697 

698 return container_name in result.stdout.strip() 

699 

700 

701def remove_container(container_name: str) -> bool: 

702 """ 

703 Remove a Docker container. 

704  

705 Args: 

706 container_name: Name of the container to remove 

707  

708 Returns: 

709 bool: True if container was removed successfully, False otherwise 

710 """ 

711 try: 

712 subprocess.run( 

713 ["docker", "rm", "-f", container_name], 

714 stdout=subprocess.PIPE, 

715 stderr=subprocess.PIPE, 

716 check=True 

717 ) 

718 return True 

719 except subprocess.SubprocessError: 

720 return False 

721 

722 

723def get_docker_image(version: str, sha: str) -> str: 

724 """ 

725 Get the appropriate Docker image based on version or SHA parameter. 

726  

727 Args: 

728 version: Version string (e.g., 'latest', '7.2.11', '7.2.12') or None for default 

729 sha: SHA256 digest string or None 

730  

731 Returns: 

732 str: Full Docker image reference 

733 """ 

734 if sha is not None: 

735 return f"openlink/virtuoso-opensource-7@{sha}" 

736 elif version is None: 

737 return DEFAULT_IMAGE 

738 elif version == "latest": 

739 return "openlink/virtuoso-opensource-7:latest" 

740 else: 

741 return f"openlink/virtuoso-opensource-7:{version}" 

742 

743 

744def build_docker_run_command(args: argparse.Namespace) -> Tuple[List[str], List[str]]: 

745 """ 

746 Build the Docker run command based on provided arguments. 

747  

748 Args: 

749 args: Command-line arguments 

750  

751 Returns: 

752 Tuple[List[str], List[str]]:  

753 - Command parts for subprocess.run 

754 - List of unique container paths intended for DirsAllowed 

755 """ 

756 host_data_dir_abs = os.path.abspath(args.data_dir) 

757 os.makedirs(host_data_dir_abs, exist_ok=True) 

758 

759 cmd = [DOCKER_EXEC_PATH, "run"] 

760 

761 cmd.extend(["--name", args.name]) 

762 

763 # Add user mapping to run as the host user 

764 try: 

765 cmd.extend(["--user", f"{os.getuid()}:{os.getgid()}"]) 

766 except AttributeError: 

767 print("Warning: os.getuid/os.getgid not available on this system (likely Windows). Skipping user mapping.", file=sys.stderr) 

768 

769 cmd.extend(["-p", f"{args.http_port}:8890"]) 

770 cmd.extend(["-p", f"{args.isql_port}:1111"]) 

771 

772 if args.network: 

773 cmd.extend(["--network", args.network]) 

774 

775 # Ensure container_data_dir is absolute-like for consistency 

776 container_data_dir_path = DEFAULT_CONTAINER_DATA_DIR 

777 cmd.extend(["-v", f"{host_data_dir_abs}:{container_data_dir_path}"]) 

778 

779 # Mount additional volumes 

780 if args.extra_volumes: 

781 for volume_spec in args.extra_volumes: 

782 if ':' in volume_spec: 

783 host_path, container_path = volume_spec.split(':', 1) 

784 host_path_abs = os.path.abspath(host_path) 

785 cmd.extend(["-v", f"{host_path_abs}:{container_path}"]) 

786 

787 # Start with default Virtuoso paths 

788 paths_to_allow_in_container = DEFAULT_DIRS_ALLOWED.copy() 

789 paths_to_allow_in_container.add(container_data_dir_path) 

790 

791 # Add extra mounted volumes to paths_to_allow_in_container 

792 if args.extra_volumes: 

793 for volume_spec in args.extra_volumes: 

794 if ':' in volume_spec: 

795 _, container_path = volume_spec.split(':', 1) 

796 container_path_abs = container_path if container_path.startswith('/') else '/' + container_path 

797 paths_to_allow_in_container.add(container_path_abs) 

798 print(f"Info: Adding mounted volume path '{container_path_abs}' to DirsAllowed.") 

799 

800 memory_bytes = parse_memory_value(args.memory) 

801 reservation_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE) 

802 reservation_str = bytes_to_docker_mem_str(reservation_bytes) 

803 cmd.extend(["--memory-reservation", reservation_str]) 

804 cmd.extend(["--memory", args.memory]) 

805 if args.cpu_limit > 0: 

806 cmd.extend(["--cpus", str(args.cpu_limit)]) 

807 

808 env_vars = { 

809 "DBA_PASSWORD": args.dba_password, 

810 "VIRT_Parameters_ResultSetMaxRows": str(DEFAULT_MAX_ROWS), 

811 "VIRT_SPARQL_DefaultQuery": "SELECT (COUNT(*) AS ?quadCount) WHERE { GRAPH ?g { ?s ?p ?o } }", 

812 } 

813 

814 virt_env_vars = get_virt_env_vars( 

815 memory=args.memory, 

816 number_of_buffers=args.number_of_buffers, 

817 max_dirty_buffers=args.max_dirty_buffers, 

818 parallel_threads=args.parallel_threads, 

819 estimated_db_size_gb=args.estimated_db_size_gb, 

820 dirs_allowed=",".join(paths_to_allow_in_container), 

821 ) 

822 env_vars.update(virt_env_vars) 

823 

824 for key, value in env_vars.items(): 

825 cmd.extend(["-e", f"{key}={value}"]) 

826 

827 if args.detach: 

828 cmd.append("-d") 

829 

830 # Ensure --rm is added if not running detached 

831 if not args.detach: 

832 cmd.insert(2, "--rm") # Insert after "docker run" 

833 

834 # Append image name 

835 docker_image = get_docker_image(args.virtuoso_version, args.virtuoso_sha) 

836 cmd.append(docker_image) 

837 

838 return cmd, paths_to_allow_in_container 

839 

840 

841def wait_for_virtuoso_ready( 

842 dba_password: str, 

843 docker_container: str = None, 

844 timeout: int = DEFAULT_WAIT_TIMEOUT, 

845 poll_interval: int = 3, 

846) -> bool: 

847 print(f"Waiting for Virtuoso to be ready (timeout: {timeout}s)...") 

848 start_time = time.time() 

849 isql_args = create_isql_args(dba_password, docker_container) 

850 

851 while time.time() - start_time < timeout: 

852 try: 

853 success, _, stderr = run_isql_command(isql_args, sql_command="status();") 

854 if success: 

855 print("Virtuoso is ready.") 

856 return True 

857 if is_connection_error(stderr): 

858 elapsed = int(time.time() - start_time) 

859 if elapsed % 10 == 0: 

860 print(f" Waiting for Virtuoso... ({elapsed}s elapsed)") 

861 else: 

862 print(f"ISQL check failed: {stderr}", file=sys.stderr) 

863 return False 

864 time.sleep(poll_interval) 

865 except Exception as e: 

866 print(f"Warning: Error in readiness check: {e}", file=sys.stderr) 

867 time.sleep(poll_interval + 2) 

868 

869 print(f"Timeout ({timeout}s) waiting for Virtuoso.", file=sys.stderr) 

870 return False 

871 

872 

873def run_docker_command(cmd: List[str], capture_output=False, check=True, suppress_error=False): 

874 """Helper to run Docker commands and handle errors.""" 

875 print(f"Executing: {' '.join(cmd)}") 

876 try: 

877 result = subprocess.run( 

878 cmd, 

879 stdout=subprocess.PIPE if capture_output else sys.stdout, 

880 stderr=subprocess.PIPE if capture_output else sys.stderr, 

881 text=True, 

882 check=check 

883 ) 

884 return result 

885 except subprocess.CalledProcessError as e: 

886 if not suppress_error: 

887 print(f"Error executing Docker command: {e}", file=sys.stderr) 

888 if capture_output: 

889 print(f"Stderr: {e.stderr}", file=sys.stderr) 

890 print(f"Stdout: {e.stdout}", file=sys.stderr) 

891 raise 

892 except FileNotFoundError: 

893 if not suppress_error: 

894 print("Error: 'docker' command not found. Make sure Docker is installed and in your PATH.", file=sys.stderr) 

895 raise 

896 

897 

898def grant_write_permissions(dba_password: str, docker_container: str = None) -> bool: 

899 print("Granting write permissions...") 

900 isql_args = create_isql_args(dba_password, docker_container) 

901 

902 success1, _, stderr1 = run_isql_command( 

903 isql_args, sql_command="DB.DBA.RDF_DEFAULT_USER_PERMS_SET('nobody', 7);" 

904 ) 

905 if success1: 

906 print(" Set permissions for 'nobody' user.") 

907 else: 

908 print(f" Warning: Failed to set 'nobody' permissions: {stderr1}", file=sys.stderr) 

909 

910 success2, _, stderr2 = run_isql_command( 

911 isql_args, sql_command="DB.DBA.USER_GRANT_ROLE('SPARQL', 'SPARQL_UPDATE');" 

912 ) 

913 if success2: 

914 print(" Granted SPARQL_UPDATE role to 'SPARQL' user.") 

915 else: 

916 print(f" Warning: Failed to grant SPARQL_UPDATE: {stderr2}", file=sys.stderr) 

917 

918 return success1 and success2 

919 

920 

921def launch_virtuoso( # pragma: no cover 

922 name: str = "virtuoso", 

923 data_dir: str = "./virtuoso-data", 

924 http_port: int = 8890, 

925 isql_port: int = 1111, 

926 memory: str = None, 

927 dba_password: str = "dba", 

928 detach: bool = True, 

929 wait_ready: bool = True, 

930 enable_write_permissions: bool = False, 

931 force_remove: bool = False, 

932 extra_volumes: list = None, 

933 network: str = None, 

934 cpu_limit: float = 0, 

935 virtuoso_version: str = None, 

936 virtuoso_sha: str = None, 

937 estimated_db_size_gb: float = 0, 

938 parallel_threads: int = None, 

939) -> None: 

940 """ 

941 Launch Virtuoso Docker container. 

942 

943 Args: 

944 name: Container name 

945 data_dir: Host directory for Virtuoso data 

946 http_port: HTTP port to expose 

947 isql_port: ISQL port to expose 

948 memory: Memory limit (e.g., "4g"). Auto-calculated from host RAM if None. 

949 dba_password: DBA password 

950 detach: Run in detached mode 

951 wait_ready: Wait for Virtuoso to be ready 

952 enable_write_permissions: Enable SPARQL write permissions for 'nobody' and 'SPARQL' users 

953 force_remove: Force remove existing container with same name 

954 extra_volumes: Additional volumes to mount (list of "host:container" strings) 

955 network: Docker network to connect 

956 cpu_limit: CPU limit (0 = no limit) 

957 virtuoso_version: Docker image version tag 

958 virtuoso_sha: Docker image SHA digest (takes precedence over version) 

959 estimated_db_size_gb: Estimated DB size for MaxCheckpointRemap config 

960 parallel_threads: Max parallel threads for query execution. If None, uses all CPU cores. 

961 

962 Raises: 

963 RuntimeError: If Docker is not installed or launch fails 

964 """ 

965 if not check_docker_installed(): 

966 raise RuntimeError("Docker command not found. Please install Docker.") 

967 

968 if memory is None: 

969 if psutil: 

970 try: 

971 total_host_ram = psutil.virtual_memory().total 

972 default_mem_bytes = max(int(total_host_ram * (2/3)), 1 * 1024**3) 

973 memory = bytes_to_docker_mem_str(default_mem_bytes) 

974 except Exception: 

975 memory = "2g" 

976 else: 

977 memory = "2g" 

978 

979 number_of_buffers, max_dirty_buffers = get_optimal_buffer_values(memory) 

980 

981 args = argparse.Namespace( 

982 name=name, 

983 data_dir=data_dir, 

984 http_port=http_port, 

985 isql_port=isql_port, 

986 memory=memory, 

987 dba_password=dba_password, 

988 detach=detach, 

989 wait_ready=wait_ready, 

990 enable_write_permissions=enable_write_permissions, 

991 force_remove=force_remove, 

992 extra_volumes=extra_volumes, 

993 network=network, 

994 cpu_limit=cpu_limit, 

995 virtuoso_version=virtuoso_version, 

996 virtuoso_sha=virtuoso_sha, 

997 estimated_db_size_gb=estimated_db_size_gb, 

998 number_of_buffers=number_of_buffers, 

999 max_dirty_buffers=max_dirty_buffers, 

1000 parallel_threads=parallel_threads, 

1001 ) 

1002 

1003 host_data_dir_abs = os.path.abspath(data_dir) 

1004 ini_file_path = os.path.join(host_data_dir_abs, "virtuoso.ini") 

1005 

1006 docker_cmd, unique_paths_to_allow = build_docker_run_command(args) 

1007 dirs_allowed_str = ",".join(unique_paths_to_allow) if unique_paths_to_allow else None 

1008 

1009 threading = calculate_threading_config(parallel_threads) 

1010 max_query_mem_value = calculate_max_query_mem(memory, number_of_buffers) 

1011 

1012 update_ini_memory_settings( 

1013 ini_path=ini_file_path, 

1014 data_dir_path=host_data_dir_abs, 

1015 number_of_buffers=number_of_buffers, 

1016 max_dirty_buffers=max_dirty_buffers, 

1017 dirs_allowed=dirs_allowed_str, 

1018 async_queue_max_threads=threading["async_queue_max_threads"], 

1019 threads_per_query=threading["threads_per_query"], 

1020 max_client_connections=threading["max_client_connections"], 

1021 adjust_vector_size=0, 

1022 vector_size=1000, 

1023 checkpoint_interval=1, 

1024 max_query_mem=max_query_mem_value, 

1025 http_server_threads=threading["max_client_connections"], 

1026 thread_cleanup_interval=1, 

1027 resources_cleanup_interval=1, 

1028 ) 

1029 

1030 if check_container_exists(name): 

1031 result = subprocess.run( 

1032 [DOCKER_EXEC_PATH, "ps", "--filter", f"name=^{name}$", "--format", "{{.Status}}"], 

1033 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True 

1034 ) 

1035 is_running = "Up" in result.stdout 

1036 

1037 if force_remove: 

1038 print(f"Container '{name}' already exists. Forcing removal...") 

1039 if not remove_container(name): 

1040 raise RuntimeError(f"Failed to remove existing container '{name}'") 

1041 elif is_running: 

1042 raise RuntimeError(f"Container '{name}' is already running. Stop it first or use force_remove=True.") 

1043 else: 

1044 print(f"Container '{name}' exists but is stopped. Removing...") 

1045 if not remove_container(name): 

1046 raise RuntimeError(f"Failed to remove existing stopped container '{name}'") 

1047 

1048 try: 

1049 run_docker_command(docker_cmd, check=not detach) 

1050 

1051 should_wait = wait_ready or enable_write_permissions 

1052 

1053 if detach and should_wait: 

1054 print("Waiting for Virtuoso readiness...") 

1055 ready = wait_for_virtuoso_ready(dba_password, docker_container=name) 

1056 if not ready: 

1057 raise RuntimeError("Virtuoso readiness check timed out or failed.") 

1058 

1059 if enable_write_permissions: 

1060 if not grant_write_permissions(dba_password, docker_container=name): 

1061 print("Warning: One or more commands to enable write permissions failed.", file=sys.stderr) 

1062 

1063 print(f"Virtuoso launched successfully on http://localhost:{http_port}/sparql") 

1064 

1065 except subprocess.CalledProcessError as e: 

1066 if detach and check_container_exists(name): 

1067 run_docker_command([DOCKER_EXEC_PATH, "stop", name], suppress_error=True, check=False) 

1068 run_docker_command([DOCKER_EXEC_PATH, "rm", name], suppress_error=True, check=False) 

1069 raise RuntimeError(f"Virtuoso launch failed: {e}") 

1070 except FileNotFoundError: 

1071 raise RuntimeError("Docker command not found.") 

1072 

1073 

1074def main() -> int: # pragma: no cover 

1075 """ 

1076 CLI entry point that parses arguments and calls launch_virtuoso(). 

1077 """ 

1078 args = parse_arguments() 

1079 

1080 try: 

1081 launch_virtuoso( 

1082 name=args.name, 

1083 data_dir=args.data_dir, 

1084 http_port=args.http_port, 

1085 isql_port=args.isql_port, 

1086 memory=args.memory, 

1087 dba_password=args.dba_password, 

1088 detach=args.detach, 

1089 wait_ready=args.wait_ready, 

1090 enable_write_permissions=args.enable_write_permissions, 

1091 force_remove=args.force_remove, 

1092 extra_volumes=args.extra_volumes, 

1093 network=args.network, 

1094 cpu_limit=args.cpu_limit, 

1095 virtuoso_version=args.virtuoso_version, 

1096 virtuoso_sha=args.virtuoso_sha, 

1097 estimated_db_size_gb=args.estimated_db_size_gb, 

1098 parallel_threads=args.parallel_threads, 

1099 ) 

1100 return 0 

1101 except RuntimeError as e: 

1102 print(f"Error: {e}", file=sys.stderr) 

1103 return 1 

1104 

1105 

1106if __name__ == "__main__": # pragma: no cover 

1107 sys.exit(main())