Coverage for virtuoso_utilities / launch_virtuoso.py: 74%

407 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 14:45 +0000

1#!/usr/bin/env python3 

2""" 

3Virtuoso Docker Launcher 

4 

5This script launches an OpenLink Virtuoso database instance using Docker. 

6Configuration parameters can be customized through command-line arguments. 

7""" 

8 

9import argparse 

10import configparser 

11import os 

12import re 

13import subprocess 

14import sys 

15import time 

16from typing import List, Tuple 

17 

18import psutil 

19 

20DEFAULT_WAIT_TIMEOUT = 120 

21DOCKER_EXEC_PATH = "docker" 

22DOCKER_ISQL_PATH_INSIDE_CONTAINER = "isql" 

23 

24# Default values for container configuration 

25DEFAULT_IMAGE = "openlink/virtuoso-opensource-7@sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010" 

26DEFAULT_CONTAINER_DATA_DIR = "/opt/virtuoso-opensource/database" 

27DEFAULT_MAX_ROWS = 100000 

28 

29VIRTUOSO_MEMORY_PERCENTAGE = 0.85 

30BYTES_PER_BUFFER = 8700 # Each buffer occupies ~8700 bytes (8K page + overhead) according to https://docs.openlinksw.com/virtuoso/ch-server/ 

31 

32from virtuoso_utilities.isql_helpers import run_isql_command 

33 

34# Minimum database size in bytes to trigger MaxCheckpointRemap calculation 

35MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB = 1 

36MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP = MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB * 1024**3 

37 

38# Default directories allowed in Virtuoso 

39DEFAULT_DIRS_ALLOWED = {".", "../vad", "/usr/share/proj", "../virtuoso_input"} 

40 

41# Connection error patterns for retry logic 

42CONNECTION_ERROR_PATTERNS = [ 

43 "connection refused", 

44 "connect failed", 

45 "connection failed", 

46 "cannot connect", 

47 "no route to host", 

48] 

49 

50 

51def bytes_to_docker_mem_str(num_bytes: int) -> str: 

52 """ 

53 Convert a number of bytes to a Docker memory string (e.g., "85g", "512m"). 

54 Tries to find the largest unit (G, M, K) without losing precision for integers. 

55 """ 

56 if num_bytes % (1024**3) == 0: 

57 return f"{num_bytes // (1024**3)}g" 

58 elif num_bytes % (1024**2) == 0: 

59 return f"{num_bytes // (1024**2)}m" 

60 elif num_bytes % 1024 == 0: 

61 return f"{num_bytes // 1024}k" 

62 else: 

63 # Fallback for non-exact multiples (shouldn't happen often with RAM) 

64 # Prefer GiB for consistency 

65 gb_val = num_bytes / (1024**3) 

66 return f"{int(gb_val)}g" 

67 

68 

69def parse_memory_value(memory_str: str) -> int: 

70 """ 

71 Parse memory value from Docker memory format (e.g., "2g", "4096m") to bytes. 

72  

73 Args: 

74 memory_str: Memory string in Docker format 

75  

76 Returns: 

77 int: Memory size in bytes 

78 """ 

79 memory_str = memory_str.lower() 

80 

81 match = re.match(r'^(\d+)([kmg]?)$', memory_str) 

82 if not match: 

83 # Default to 2GB if parsing fails 

84 print(f"Warning: Could not parse memory string '{memory_str}'. Defaulting to 2g.", file=sys.stderr) 

85 return 2 * 1024 * 1024 * 1024 

86 

87 value, unit = match.groups() 

88 value = int(value) 

89 

90 if unit == 'k': 

91 return value * 1024 

92 elif unit == 'm': 

93 return value * 1024 * 1024 

94 elif unit == 'g': 

95 return value * 1024 * 1024 * 1024 

96 else: # No unit, assume bytes 

97 return value 

98 

99 

100def get_directory_size(directory_path: str) -> int: 

101 """ 

102 Calculate the total size of all files within a directory. 

103 

104 Args: 

105 directory_path: The path to the directory. 

106 

107 Returns: 

108 Total size in bytes. 

109 """ 

110 total_size = 0 

111 if not os.path.isdir(directory_path): 

112 return 0 

113 try: 

114 for dirpath, dirnames, filenames in os.walk(directory_path): 

115 for f in filenames: 

116 fp = os.path.join(dirpath, f) 

117 # skip if it is symbolic link 

118 if not os.path.islink(fp): 

119 try: 

120 total_size += os.path.getsize(fp) 

121 except OSError as e: 

122 print(f"Warning: Could not get size of file '{fp}': {e}", file=sys.stderr) 

123 except OSError as e: 

124 print(f"Warning: Could not walk directory '{directory_path}': {e}", file=sys.stderr) 

125 

126 return total_size 

127 

128 

129def get_optimal_buffer_values(memory_limit: str) -> Tuple[int, int]: 

130 """ 

131 Determine optimal values for NumberOfBuffers and MaxDirtyBuffers 

132 based on the specified container memory limit. 

133 

134 Uses the formula recommended by OpenLink: 

135 NumberOfBuffers = (MemoryInBytes * VIRTUOSO_MEMORY_PERCENTAGE * 0.66) / 8000 

136 MaxDirtyBuffers = NumberOfBuffers * 0.75 

137 

138 The memory_limit is reduced by VIRTUOSO_MEMORY_PERCENTAGE to leave 

139 headroom for Virtuoso process overhead and prevent container OOM crashes. 

140 

141 Args: 

142 memory_limit: Memory limit string in Docker format (e.g., "2g", "4096m") 

143 

144 Returns: 

145 Tuple[int, int]: Calculated values for NumberOfBuffers and MaxDirtyBuffers 

146 """ 

147 try: 

148 memory_bytes = parse_memory_value(memory_limit) 

149 

150 memory_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE) 

151 

152 number_of_buffers = int((memory_bytes * 0.66) / BYTES_PER_BUFFER) 

153 

154 max_dirty_buffers = int(number_of_buffers * 0.75) 

155 

156 return number_of_buffers, max_dirty_buffers 

157 

158 except Exception as e: 

159 print(f"Warning: Error calculating buffer values: {e}. Using default values.", file=sys.stderr) 

160 # Default values approximately suitable for 1-2GB RAM if calculation fails 

161 return 170000, 130000 

162 

163 

164def calculate_max_checkpoint_remap(size_bytes: int) -> int: 

165 return int(size_bytes / 8192 / 4) 

166 

167 

168def get_default_memory() -> str: 

169 try: 

170 total_ram = psutil.virtual_memory().total 

171 default_mem = max(int(total_ram * (2 / 3)), 1 * 1024**3) 

172 return bytes_to_docker_mem_str(default_mem) 

173 except Exception: 

174 return "2g" 

175 

176 

177def calculate_threading_config(parallel_threads=None): 

178 cpu_cores = parallel_threads if parallel_threads else (os.cpu_count() or 1) 

179 return { 

180 "async_queue_max_threads": int(cpu_cores * 1.5), 

181 "threads_per_query": cpu_cores, 

182 "max_client_connections": cpu_cores * 2, 

183 } 

184 

185 

186def calculate_max_query_mem(memory, number_of_buffers): 

187 buffer_memory_bytes = number_of_buffers * BYTES_PER_BUFFER 

188 effective_memory_bytes = int(parse_memory_value(memory) * VIRTUOSO_MEMORY_PERCENTAGE) 

189 max_query_mem_bytes = int((effective_memory_bytes - buffer_memory_bytes) * 0.8) 

190 if max_query_mem_bytes > 0: 

191 return bytes_to_docker_mem_str(max_query_mem_bytes) 

192 return None 

193 

194 

195def get_virt_env_vars(memory, number_of_buffers, max_dirty_buffers, parallel_threads, estimated_db_size_gb=0.0, dirs_allowed=None): 

196 env_vars = {} 

197 env_vars["VIRT_Parameters_NumberOfBuffers"] = str(number_of_buffers) 

198 env_vars["VIRT_Parameters_MaxDirtyBuffers"] = str(max_dirty_buffers) 

199 

200 threading = calculate_threading_config(parallel_threads) 

201 env_vars["VIRT_Parameters_AsyncQueueMaxThreads"] = str(threading["async_queue_max_threads"]) 

202 env_vars["VIRT_Parameters_ThreadsPerQuery"] = str(threading["threads_per_query"]) 

203 env_vars["VIRT_Parameters_MaxClientConnections"] = str(threading["max_client_connections"]) 

204 env_vars["VIRT_HTTPServer_ServerThreads"] = str(threading["max_client_connections"]) 

205 

206 env_vars["VIRT_Parameters_AdjustVectorSize"] = "0" 

207 env_vars["VIRT_Parameters_VectorSize"] = "1000" 

208 env_vars["VIRT_Parameters_CheckpointInterval"] = "1" 

209 env_vars["VIRT_Parameters_ThreadCleanupInterval"] = "1" 

210 env_vars["VIRT_Parameters_ResourcesCleanupInterval"] = "1" 

211 

212 max_query_mem = calculate_max_query_mem(memory, number_of_buffers) 

213 max_query_mem_str = max_query_mem if max_query_mem else "N/A" 

214 if max_query_mem: 

215 env_vars["VIRT_Parameters_MaxQueryMem"] = max_query_mem 

216 

217 env_vars["VIRT_Client_SQL_QUERY_TIMEOUT"] = "0" 

218 env_vars["VIRT_Client_SQL_TXN_TIMEOUT"] = "0" 

219 

220 if estimated_db_size_gb > 0: 

221 estimated_size_bytes = int(estimated_db_size_gb * 1024**3) 

222 if estimated_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP: 

223 max_checkpoint_remap = calculate_max_checkpoint_remap(estimated_size_bytes) 

224 env_vars["VIRT_Database_MaxCheckpointRemap"] = str(max_checkpoint_remap) 

225 env_vars["VIRT_TempDatabase_MaxCheckpointRemap"] = str(max_checkpoint_remap) 

226 print(f"Info: Using estimated database size of {estimated_db_size_gb} GB to set MaxCheckpointRemap to {max_checkpoint_remap}") 

227 

228 if dirs_allowed: 

229 env_vars["VIRT_Parameters_DirsAllowed"] = dirs_allowed 

230 

231 print(f"Info: Threading: AsyncQueueMaxThreads={threading['async_queue_max_threads']}, " 

232 f"ThreadsPerQuery={threading['threads_per_query']}, " 

233 f"MaxClientConnections={threading['max_client_connections']}") 

234 print(f"Info: MaxQueryMem={max_query_mem_str}, AdjustVectorSize=0, VectorSize=1000, CheckpointInterval=1, ThreadCleanupInterval=1, ResourcesCleanupInterval=1") 

235 

236 return env_vars 

237 

238 

239def is_connection_error(stderr): 

240 stderr_lower = stderr.lower() 

241 return any(err in stderr_lower for err in CONNECTION_ERROR_PATTERNS) 

242 

243 

244def create_isql_args(dba_password, docker_container=None): 

245 if docker_container: 

246 return argparse.Namespace( 

247 host="localhost", 

248 port=1111, 

249 user="dba", 

250 password=dba_password, 

251 docker_container=docker_container, 

252 docker_path=DOCKER_EXEC_PATH, 

253 docker_isql_path=DOCKER_ISQL_PATH_INSIDE_CONTAINER, 

254 isql_path=None, 

255 ) 

256 return argparse.Namespace( 

257 host="localhost", 

258 port=1111, 

259 user="dba", 

260 password=dba_password, 

261 docker_container=None, 

262 docker_path=None, 

263 docker_isql_path=None, 

264 isql_path="isql", 

265 ) 

266 

267 

268def update_ini_memory_settings( 

269 ini_path: str, 

270 data_dir_path: str, 

271 number_of_buffers: int = None, 

272 max_dirty_buffers: int = None, 

273 dirs_allowed: str = None, 

274 async_queue_max_threads: int = None, 

275 threads_per_query: int = None, 

276 max_client_connections: int = None, 

277 adjust_vector_size: int = None, 

278 vector_size: int = None, 

279 checkpoint_interval: int = None, 

280 max_query_mem: str = None, 

281 http_server_threads: int = None, 

282 thread_cleanup_interval: int = None, 

283 resources_cleanup_interval: int = None, 

284): 

285 if not os.path.exists(ini_path): 

286 print(f"Info: virtuoso.ini not found at '{ini_path}'. Likely first run. Skipping settings update.") 

287 return 

288 

289 print(f"Info: Checking existing virtuoso.ini at '{ini_path}' for settings update...") 

290 actual_db_size_bytes = get_directory_size(data_dir_path) 

291 

292 # Calculate MaxCheckpointRemap if database is large enough 

293 calculate_remap = actual_db_size_bytes >= MIN_DB_SIZE_BYTES_FOR_CHECKPOINT_REMAP 

294 calculated_remap_value = calculate_max_checkpoint_remap(actual_db_size_bytes) if calculate_remap else None 

295 

296 config = configparser.ConfigParser(interpolation=None, strict=False) 

297 config.optionxform = str # Keep case sensitivity 

298 made_changes = False 

299 try: 

300 # Read with UTF-8, ignore errors initially if file has issues 

301 config.read(ini_path, encoding='utf-8') 

302 

303 # Update [Parameters] section for buffer settings and DirsAllowed 

304 if not config.has_section('Parameters'): 

305 config.add_section('Parameters') 

306 print(f"Info: Added [Parameters] section to '{ini_path}'.") 

307 

308 # Update NumberOfBuffers if provided 

309 if number_of_buffers is not None: 

310 current_number_of_buffers = config.get('Parameters', 'NumberOfBuffers', fallback=None) 

311 number_of_buffers_str = str(number_of_buffers) 

312 if current_number_of_buffers != number_of_buffers_str: 

313 config.set('Parameters', 'NumberOfBuffers', number_of_buffers_str) 

314 print(f"Info: Updating [Parameters] NumberOfBuffers from '{current_number_of_buffers}' to '{number_of_buffers_str}' in '{ini_path}'.") 

315 made_changes = True 

316 

317 # Ensure [Client] section has SQL timeouts set to 0 

318 if not config.has_section('Client'): 

319 config.add_section('Client') 

320 print(f"Info: Added [Client] section to '{ini_path}'.") 

321 

322 current_sql_query_timeout = config.get('Client', 'SQL_QUERY_TIMEOUT', fallback=None) 

323 if current_sql_query_timeout != '0': 

324 config.set('Client', 'SQL_QUERY_TIMEOUT', '0') 

325 print(f"Info: Setting [Client] SQL_QUERY_TIMEOUT to '0' in '{ini_path}'.") 

326 made_changes = True 

327 

328 current_sql_txn_timeout = config.get('Client', 'SQL_TXN_TIMEOUT', fallback=None) 

329 if current_sql_txn_timeout != '0': 

330 config.set('Client', 'SQL_TXN_TIMEOUT', '0') 

331 print(f"Info: Setting [Client] SQL_TXN_TIMEOUT to '0' in '{ini_path}'.") 

332 made_changes = True 

333 

334 # Update MaxDirtyBuffers if provided 

335 if max_dirty_buffers is not None: 

336 current_max_dirty_buffers = config.get('Parameters', 'MaxDirtyBuffers', fallback=None) 

337 max_dirty_buffers_str = str(max_dirty_buffers) 

338 if current_max_dirty_buffers != max_dirty_buffers_str: 

339 config.set('Parameters', 'MaxDirtyBuffers', max_dirty_buffers_str) 

340 print(f"Info: Updating [Parameters] MaxDirtyBuffers from '{current_max_dirty_buffers}' to '{max_dirty_buffers_str}' in '{ini_path}'.") 

341 made_changes = True 

342 

343 if dirs_allowed is not None: 

344 current_dirs_allowed = config.get('Parameters', 'DirsAllowed', fallback=None) 

345 def normalize_dirs(val): 

346 if val is None: 

347 return set() 

348 return set([x.strip() for x in val.split(',') if x.strip()]) 

349 if normalize_dirs(current_dirs_allowed) != normalize_dirs(dirs_allowed): 

350 config.set('Parameters', 'DirsAllowed', dirs_allowed) 

351 print(f"Info: Updating [Parameters] DirsAllowed from '{current_dirs_allowed}' to '{dirs_allowed}' in '{ini_path}'.") 

352 made_changes = True 

353 

354 if async_queue_max_threads is not None: 

355 current_val = config.get('Parameters', 'AsyncQueueMaxThreads', fallback=None) 

356 new_val = str(async_queue_max_threads) 

357 if current_val != new_val: 

358 config.set('Parameters', 'AsyncQueueMaxThreads', new_val) 

359 print(f"Info: Updating [Parameters] AsyncQueueMaxThreads from '{current_val}' to '{new_val}' in '{ini_path}'.") 

360 made_changes = True 

361 

362 if threads_per_query is not None: 

363 current_val = config.get('Parameters', 'ThreadsPerQuery', fallback=None) 

364 new_val = str(threads_per_query) 

365 if current_val != new_val: 

366 config.set('Parameters', 'ThreadsPerQuery', new_val) 

367 print(f"Info: Updating [Parameters] ThreadsPerQuery from '{current_val}' to '{new_val}' in '{ini_path}'.") 

368 made_changes = True 

369 

370 if max_client_connections is not None: 

371 current_val = config.get('Parameters', 'MaxClientConnections', fallback=None) 

372 new_val = str(max_client_connections) 

373 if current_val != new_val: 

374 config.set('Parameters', 'MaxClientConnections', new_val) 

375 print(f"Info: Updating [Parameters] MaxClientConnections from '{current_val}' to '{new_val}' in '{ini_path}'.") 

376 made_changes = True 

377 

378 if adjust_vector_size is not None: 

379 current_val = config.get('Parameters', 'AdjustVectorSize', fallback=None) 

380 new_val = str(adjust_vector_size) 

381 if current_val != new_val: 

382 config.set('Parameters', 'AdjustVectorSize', new_val) 

383 print(f"Info: Updating [Parameters] AdjustVectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.") 

384 made_changes = True 

385 

386 if vector_size is not None: 

387 current_val = config.get('Parameters', 'VectorSize', fallback=None) 

388 new_val = str(vector_size) 

389 if current_val != new_val: 

390 config.set('Parameters', 'VectorSize', new_val) 

391 print(f"Info: Updating [Parameters] VectorSize from '{current_val}' to '{new_val}' in '{ini_path}'.") 

392 made_changes = True 

393 

394 if checkpoint_interval is not None: 

395 current_val = config.get('Parameters', 'CheckpointInterval', fallback=None) 

396 new_val = str(checkpoint_interval) 

397 if current_val != new_val: 

398 config.set('Parameters', 'CheckpointInterval', new_val) 

399 print(f"Info: Updating [Parameters] CheckpointInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

400 made_changes = True 

401 

402 if max_query_mem is not None: 

403 current_val = config.get('Parameters', 'MaxQueryMem', fallback=None) 

404 if current_val != max_query_mem: 

405 config.set('Parameters', 'MaxQueryMem', max_query_mem) 

406 print(f"Info: Updating [Parameters] MaxQueryMem from '{current_val}' to '{max_query_mem}' in '{ini_path}'.") 

407 made_changes = True 

408 

409 if http_server_threads is not None: 

410 if not config.has_section('HTTPServer'): 

411 config.add_section('HTTPServer') 

412 print(f"Info: Added [HTTPServer] section to '{ini_path}'.") 

413 current_val = config.get('HTTPServer', 'ServerThreads', fallback=None) 

414 new_val = str(http_server_threads) 

415 if current_val != new_val: 

416 config.set('HTTPServer', 'ServerThreads', new_val) 

417 print(f"Info: Updating [HTTPServer] ServerThreads from '{current_val}' to '{new_val}' in '{ini_path}'.") 

418 made_changes = True 

419 

420 if thread_cleanup_interval is not None: 

421 current_val = config.get('Parameters', 'ThreadCleanupInterval', fallback=None) 

422 new_val = str(thread_cleanup_interval) 

423 if current_val != new_val: 

424 config.set('Parameters', 'ThreadCleanupInterval', new_val) 

425 print(f"Info: Updating [Parameters] ThreadCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

426 made_changes = True 

427 

428 if resources_cleanup_interval is not None: 

429 current_val = config.get('Parameters', 'ResourcesCleanupInterval', fallback=None) 

430 new_val = str(resources_cleanup_interval) 

431 if current_val != new_val: 

432 config.set('Parameters', 'ResourcesCleanupInterval', new_val) 

433 print(f"Info: Updating [Parameters] ResourcesCleanupInterval from '{current_val}' to '{new_val}' in '{ini_path}'.") 

434 made_changes = True 

435 

436 # Update MaxCheckpointRemap if database is large enough 

437 if calculate_remap: 

438 # Update [Database] section 

439 if not config.has_section('Database'): 

440 config.add_section('Database') 

441 print(f"Info: Added [Database] section to '{ini_path}'.") 

442 

443 current_db_remap = config.get('Database', 'MaxCheckpointRemap', fallback=None) 

444 calculated_remap_str = str(calculated_remap_value) 

445 if current_db_remap != calculated_remap_str: 

446 config.set('Database', 'MaxCheckpointRemap', calculated_remap_str) 

447 print(f"Info: Updating [Database] MaxCheckpointRemap from '{current_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.") 

448 made_changes = True 

449 

450 # Update [TempDatabase] section 

451 if not config.has_section('TempDatabase'): 

452 config.add_section('TempDatabase') 

453 print(f"Info: Added [TempDatabase] section to '{ini_path}'.") 

454 

455 current_temp_db_remap = config.get('TempDatabase', 'MaxCheckpointRemap', fallback=None) 

456 if current_temp_db_remap != calculated_remap_str: 

457 config.set('TempDatabase', 'MaxCheckpointRemap', calculated_remap_str) 

458 print(f"Info: Updating [TempDatabase] MaxCheckpointRemap from '{current_temp_db_remap}' to '{calculated_remap_str}' in '{ini_path}'.") 

459 made_changes = True 

460 else: 

461 print(f"Info: Host data directory '{data_dir_path}' size ({actual_db_size_bytes / (1024**3):.2f} GiB) is below threshold ({MIN_DB_SIZE_FOR_CHECKPOINT_REMAP_GB} GiB). No changes made to MaxCheckpointRemap in virtuoso.ini.") 

462 

463 if made_changes: 

464 # Write changes back with UTF-8 encoding 

465 with open(ini_path, 'w', encoding='utf-8') as configfile: 

466 config.write(configfile) 

467 print(f"Info: Successfully saved changes to '{ini_path}'.") 

468 else: 

469 print(f"Info: No changes needed in '{ini_path}'.") 

470 

471 except configparser.Error as e: 

472 print(f"Error: Failed to parse or update virtuoso.ini at '{ini_path}': {e}", file=sys.stderr) 

473 except IOError as e: 

474 print(f"Error: Failed to read or write virtuoso.ini at '{ini_path}': {e}", file=sys.stderr) 

475 except Exception as e: 

476 print(f"Error: An unexpected error occurred while updating virtuoso.ini: {e}", file=sys.stderr) 

477 

478 

479def parse_arguments() -> argparse.Namespace: # pragma: no cover 

480 """ 

481 Parse command-line arguments for Virtuoso Docker launcher. 

482  

483 Returns: 

484 argparse.Namespace: Parsed command-line arguments 

485 """ 

486 # First create a parser for a preliminary parse to check if --memory is provided 

487 preliminary_parser = argparse.ArgumentParser(add_help=False) 

488 preliminary_parser.add_argument("--memory", default=None) 

489 preliminary_args, _ = preliminary_parser.parse_known_args() 

490 memory_specified = preliminary_args.memory is not None 

491 

492 # Full parser with all arguments 

493 parser = argparse.ArgumentParser( 

494 description="Launch a Virtuoso database using Docker", 

495 formatter_class=argparse.ArgumentDefaultsHelpFormatter 

496 ) 

497 

498 # --- Calculate default memory based on host RAM (2/3) --- 

499 default_memory_str = "2g" # Fallback default 

500 if psutil and not memory_specified: 

501 try: 

502 total_host_ram = psutil.virtual_memory().total 

503 # Calculate 2/3 of total RAM in bytes 

504 default_mem_bytes = int(total_host_ram * (2/3)) 

505 # Ensure at least 1GB is allocated as a minimum default 

506 min_default_bytes = 1 * 1024 * 1024 * 1024 

507 if default_mem_bytes < min_default_bytes: 

508 default_mem_bytes = min_default_bytes 

509 

510 default_memory_str = bytes_to_docker_mem_str(default_mem_bytes) 

511 print(f"Info: Detected {total_host_ram / (1024**3):.1f} GiB total host RAM. " 

512 f"Setting default container memory limit to {default_memory_str} (approx. 2/3). " 

513 f"Use --memory to override.") 

514 except Exception as e: 

515 print(f"Warning: Could not auto-detect host RAM using psutil: {e}. " 

516 f"Falling back to default memory limit '{default_memory_str}'.", file=sys.stderr) 

517 elif psutil and memory_specified: 

518 # Silently use the user-specified value 

519 pass 

520 else: 

521 print(f"Warning: psutil not found. Cannot auto-detect host RAM. " 

522 f"Falling back to default memory limit '{default_memory_str}'. " 

523 f"Install psutil for automatic calculation.", file=sys.stderr) 

524 

525 parser.add_argument( 

526 "--name", 

527 default="virtuoso", 

528 help="Name for the Docker container" 

529 ) 

530 parser.add_argument( 

531 "--http-port", 

532 type=int, 

533 default=8890, 

534 help="HTTP port to expose Virtuoso on" 

535 ) 

536 parser.add_argument( 

537 "--isql-port", 

538 type=int, 

539 default=1111, 

540 help="ISQL port to expose Virtuoso on" 

541 ) 

542 

543 parser.add_argument( 

544 "--data-dir", 

545 default="./virtuoso-data", 

546 help="Host directory to mount as Virtuoso data directory" 

547 ) 

548 

549 parser.add_argument( 

550 "--mount-volume", 

551 action="append", 

552 dest="extra_volumes", 

553 metavar="HOST_PATH:CONTAINER_PATH", 

554 help="Mount an additional host directory into the container. " 

555 "Format: /path/on/host:/path/in/container. " 

556 "Can be specified multiple times." 

557 ) 

558 

559 parser.add_argument( 

560 "--memory", 

561 default=default_memory_str, 

562 help="Memory limit for the container (e.g., 2g, 4g). " 

563 f"Defaults to approx. 2/3 of host RAM if psutil is installed, otherwise '{default_memory_str}'." 

564 ) 

565 parser.add_argument( 

566 "--cpu-limit", 

567 type=float, 

568 default=0, 

569 help="CPU limit for the container (0 means no limit)" 

570 ) 

571 

572 parser.add_argument( 

573 "--dba-password", 

574 default="dba", 

575 help="Password for the Virtuoso dba user" 

576 ) 

577 

578 parser.add_argument( 

579 "--force-remove", 

580 action="store_true", 

581 help="Force removal of existing container with the same name" 

582 ) 

583 

584 parser.add_argument( 

585 "--network", 

586 help="Docker network to connect the container to (must be a pre-existing network)" 

587 ) 

588 

589 parser.add_argument( 

590 "--wait-ready", 

591 action="store_true", 

592 help="Wait until Virtuoso is ready to accept connections" 

593 ) 

594 parser.add_argument( 

595 "--detach", 

596 action="store_true", 

597 help="Run container in detached mode" 

598 ) 

599 

600 parser.add_argument( 

601 "--enable-write-permissions", 

602 action="store_true", 

603 help="Enable write permissions for 'nobody' and 'SPARQL' users. " 

604 "This makes the database publicly writable. " 

605 "Forces waiting for the container to be ready." 

606 ) 

607 

608 parser.add_argument( 

609 "--estimated-db-size-gb", 

610 type=float, 

611 default=0, 

612 help="Estimated database size in GB. If provided, MaxCheckpointRemap will be preconfigured " 

613 "based on this estimate rather than measuring existing data." 

614 ) 

615 

616 parser.add_argument( 

617 "--virtuoso-version", 

618 default=None, 

619 help="Virtuoso Docker image version/tag to use (e.g., 'latest', '7.2.11', '7.2.12'). If not specified, uses the default pinned version." 

620 ) 

621 

622 parser.add_argument( 

623 "--virtuoso-sha", 

624 default=None, 

625 help="Virtuoso Docker image SHA256 digest to use (e.g., 'sha256:e07868a3db9090400332eaa8ee694b8cf9bf7eebc26db6bbdc3bb92fd30ed010'). Takes precedence over --virtuoso-version." 

626 ) 

627 

628 parser.add_argument( 

629 "--parallel-threads", 

630 type=int, 

631 default=None, 

632 help="Maximum parallel threads for query execution. " 

633 "If not specified, uses all available CPU cores. " 

634 "Sets AsyncQueueMaxThreads to cores * 1.5 and ThreadsPerQuery to cores." 

635 ) 

636 

637 args_temp, _ = parser.parse_known_args() 

638 

639 optimal_number_of_buffers, optimal_max_dirty_buffers = get_optimal_buffer_values(args_temp.memory) 

640 

641 parser.add_argument( 

642 "--max-dirty-buffers", 

643 type=int, 

644 default=optimal_max_dirty_buffers, 

645 help="Maximum dirty buffers before checkpoint (auto-calculated based on --memory value, requires integer)" 

646 ) 

647 parser.add_argument( 

648 "--number-of-buffers", 

649 type=int, 

650 default=optimal_number_of_buffers, 

651 help="Number of buffers (auto-calculated based on --memory value, requires integer)" 

652 ) 

653 

654 return parser.parse_args() 

655 

656 

657def check_docker_installed() -> bool: 

658 """ 

659 Check if Docker is installed and accessible. 

660  

661 Returns: 

662 bool: True if Docker is installed, False otherwise 

663 """ 

664 try: 

665 subprocess.run( 

666 ["docker", "--version"], 

667 stdout=subprocess.PIPE, 

668 stderr=subprocess.PIPE, 

669 check=True 

670 ) 

671 return True 

672 except (subprocess.SubprocessError, FileNotFoundError): 

673 return False 

674 

675 

676def check_container_exists(container_name: str) -> bool: 

677 """ 

678 Check if a Docker container with the specified name exists. 

679  

680 Args: 

681 container_name: Name of the container to check 

682  

683 Returns: 

684 bool: True if container exists, False otherwise 

685 """ 

686 result = subprocess.run( 

687 ["docker", "ps", "-a", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"], 

688 stdout=subprocess.PIPE, 

689 stderr=subprocess.PIPE, 

690 text=True 

691 ) 

692 

693 return container_name in result.stdout.strip() 

694 

695 

696def remove_container(container_name: str) -> bool: 

697 """ 

698 Remove a Docker container. 

699  

700 Args: 

701 container_name: Name of the container to remove 

702  

703 Returns: 

704 bool: True if container was removed successfully, False otherwise 

705 """ 

706 try: 

707 subprocess.run( 

708 ["docker", "rm", "-f", container_name], 

709 stdout=subprocess.PIPE, 

710 stderr=subprocess.PIPE, 

711 check=True 

712 ) 

713 return True 

714 except subprocess.SubprocessError: 

715 return False 

716 

717 

718def get_docker_image(version: str, sha: str) -> str: 

719 """ 

720 Get the appropriate Docker image based on version or SHA parameter. 

721  

722 Args: 

723 version: Version string (e.g., 'latest', '7.2.11', '7.2.12') or None for default 

724 sha: SHA256 digest string or None 

725  

726 Returns: 

727 str: Full Docker image reference 

728 """ 

729 if sha is not None: 

730 return f"openlink/virtuoso-opensource-7@{sha}" 

731 elif version is None: 

732 return DEFAULT_IMAGE 

733 elif version == "latest": 

734 return "openlink/virtuoso-opensource-7:latest" 

735 else: 

736 return f"openlink/virtuoso-opensource-7:{version}" 

737 

738 

739def build_docker_run_command(args: argparse.Namespace) -> Tuple[List[str], List[str]]: 

740 """ 

741 Build the Docker run command based on provided arguments. 

742  

743 Args: 

744 args: Command-line arguments 

745  

746 Returns: 

747 Tuple[List[str], List[str]]:  

748 - Command parts for subprocess.run 

749 - List of unique container paths intended for DirsAllowed 

750 """ 

751 host_data_dir_abs = os.path.abspath(args.data_dir) 

752 os.makedirs(host_data_dir_abs, exist_ok=True) 

753 

754 cmd = [DOCKER_EXEC_PATH, "run"] 

755 

756 cmd.extend(["--name", args.name]) 

757 

758 # Add user mapping to run as the host user 

759 try: 

760 cmd.extend(["--user", f"{os.getuid()}:{os.getgid()}"]) 

761 except AttributeError: 

762 print("Warning: os.getuid/os.getgid not available on this system (likely Windows). Skipping user mapping.", file=sys.stderr) 

763 

764 cmd.extend(["-p", f"{args.http_port}:8890"]) 

765 cmd.extend(["-p", f"{args.isql_port}:1111"]) 

766 

767 if args.network: 

768 cmd.extend(["--network", args.network]) 

769 

770 # Ensure container_data_dir is absolute-like for consistency 

771 container_data_dir_path = DEFAULT_CONTAINER_DATA_DIR 

772 cmd.extend(["-v", f"{host_data_dir_abs}:{container_data_dir_path}"]) 

773 

774 # Mount additional volumes 

775 if args.extra_volumes: 

776 for volume_spec in args.extra_volumes: 

777 if ':' in volume_spec: 

778 host_path, container_path = volume_spec.split(':', 1) 

779 host_path_abs = os.path.abspath(host_path) 

780 cmd.extend(["-v", f"{host_path_abs}:{container_path}"]) 

781 

782 # Start with default Virtuoso paths 

783 paths_to_allow_in_container = DEFAULT_DIRS_ALLOWED.copy() 

784 paths_to_allow_in_container.add(container_data_dir_path) 

785 

786 # Add extra mounted volumes to paths_to_allow_in_container 

787 if args.extra_volumes: 

788 for volume_spec in args.extra_volumes: 

789 if ':' in volume_spec: 

790 _, container_path = volume_spec.split(':', 1) 

791 container_path_abs = container_path if container_path.startswith('/') else '/' + container_path 

792 paths_to_allow_in_container.add(container_path_abs) 

793 print(f"Info: Adding mounted volume path '{container_path_abs}' to DirsAllowed.") 

794 

795 memory_bytes = parse_memory_value(args.memory) 

796 reservation_bytes = int(memory_bytes * VIRTUOSO_MEMORY_PERCENTAGE) 

797 reservation_str = bytes_to_docker_mem_str(reservation_bytes) 

798 cmd.extend(["--memory-reservation", reservation_str]) 

799 cmd.extend(["--memory", args.memory]) 

800 if args.cpu_limit > 0: 

801 cmd.extend(["--cpus", str(args.cpu_limit)]) 

802 

803 env_vars = { 

804 "DBA_PASSWORD": args.dba_password, 

805 "VIRT_Parameters_ResultSetMaxRows": str(DEFAULT_MAX_ROWS), 

806 "VIRT_SPARQL_DefaultQuery": "SELECT (COUNT(*) AS ?quadCount) WHERE { GRAPH ?g { ?s ?p ?o } }", 

807 } 

808 

809 virt_env_vars = get_virt_env_vars( 

810 memory=args.memory, 

811 number_of_buffers=args.number_of_buffers, 

812 max_dirty_buffers=args.max_dirty_buffers, 

813 parallel_threads=args.parallel_threads, 

814 estimated_db_size_gb=args.estimated_db_size_gb, 

815 dirs_allowed=",".join(paths_to_allow_in_container), 

816 ) 

817 env_vars.update(virt_env_vars) 

818 

819 for key, value in env_vars.items(): 

820 cmd.extend(["-e", f"{key}={value}"]) 

821 

822 if args.detach: 

823 cmd.append("-d") 

824 

825 # Ensure --rm is added if not running detached 

826 if not args.detach: 

827 cmd.insert(2, "--rm") # Insert after "docker run" 

828 

829 # Append image name 

830 docker_image = get_docker_image(args.virtuoso_version, args.virtuoso_sha) 

831 cmd.append(docker_image) 

832 

833 return cmd, paths_to_allow_in_container 

834 

835 

836def wait_for_virtuoso_ready( 

837 dba_password: str, 

838 docker_container: str = None, 

839 timeout: int = DEFAULT_WAIT_TIMEOUT, 

840 poll_interval: int = 3, 

841) -> bool: 

842 print(f"Waiting for Virtuoso to be ready (timeout: {timeout}s)...") 

843 start_time = time.time() 

844 isql_args = create_isql_args(dba_password, docker_container) 

845 

846 while time.time() - start_time < timeout: 

847 try: 

848 success, _, stderr = run_isql_command(isql_args, sql_command="status();") 

849 if success: 

850 print("Virtuoso is ready.") 

851 return True 

852 if is_connection_error(stderr): 

853 elapsed = int(time.time() - start_time) 

854 if elapsed % 10 == 0: 

855 print(f" Waiting for Virtuoso... ({elapsed}s elapsed)") 

856 else: 

857 print(f"ISQL check failed: {stderr}", file=sys.stderr) 

858 return False 

859 time.sleep(poll_interval) 

860 except Exception as e: 

861 print(f"Warning: Error in readiness check: {e}", file=sys.stderr) 

862 time.sleep(poll_interval + 2) 

863 

864 print(f"Timeout ({timeout}s) waiting for Virtuoso.", file=sys.stderr) 

865 return False 

866 

867 

868def run_docker_command(cmd: List[str], capture_output=False, check=True, suppress_error=False): 

869 """Helper to run Docker commands and handle errors.""" 

870 print(f"Executing: {' '.join(cmd)}") 

871 try: 

872 result = subprocess.run( 

873 cmd, 

874 stdout=subprocess.PIPE if capture_output else sys.stdout, 

875 stderr=subprocess.PIPE if capture_output else sys.stderr, 

876 text=True, 

877 check=check 

878 ) 

879 return result 

880 except subprocess.CalledProcessError as e: 

881 if not suppress_error: 

882 print(f"Error executing Docker command: {e}", file=sys.stderr) 

883 if capture_output: 

884 print(f"Stderr: {e.stderr}", file=sys.stderr) 

885 print(f"Stdout: {e.stdout}", file=sys.stderr) 

886 raise 

887 except FileNotFoundError: 

888 if not suppress_error: 

889 print("Error: 'docker' command not found. Make sure Docker is installed and in your PATH.", file=sys.stderr) 

890 raise 

891 

892 

893def grant_write_permissions(dba_password: str, docker_container: str = None) -> bool: 

894 print("Granting write permissions...") 

895 isql_args = create_isql_args(dba_password, docker_container) 

896 

897 success1, _, stderr1 = run_isql_command( 

898 isql_args, sql_command="DB.DBA.RDF_DEFAULT_USER_PERMS_SET('nobody', 7);" 

899 ) 

900 if success1: 

901 print(" Set permissions for 'nobody' user.") 

902 else: 

903 print(f" Warning: Failed to set 'nobody' permissions: {stderr1}", file=sys.stderr) 

904 

905 success2, _, stderr2 = run_isql_command( 

906 isql_args, sql_command="DB.DBA.USER_GRANT_ROLE('SPARQL', 'SPARQL_UPDATE');" 

907 ) 

908 if success2: 

909 print(" Granted SPARQL_UPDATE role to 'SPARQL' user.") 

910 else: 

911 print(f" Warning: Failed to grant SPARQL_UPDATE: {stderr2}", file=sys.stderr) 

912 

913 return success1 and success2 

914 

915 

916def launch_virtuoso( # pragma: no cover 

917 name: str = "virtuoso", 

918 data_dir: str = "./virtuoso-data", 

919 http_port: int = 8890, 

920 isql_port: int = 1111, 

921 memory: str = None, 

922 dba_password: str = "dba", 

923 detach: bool = True, 

924 wait_ready: bool = True, 

925 enable_write_permissions: bool = False, 

926 force_remove: bool = False, 

927 extra_volumes: list = None, 

928 network: str = None, 

929 cpu_limit: float = 0, 

930 virtuoso_version: str = None, 

931 virtuoso_sha: str = None, 

932 estimated_db_size_gb: float = 0, 

933 parallel_threads: int = None, 

934) -> None: 

935 """ 

936 Launch Virtuoso Docker container. 

937 

938 Args: 

939 name: Container name 

940 data_dir: Host directory for Virtuoso data 

941 http_port: HTTP port to expose 

942 isql_port: ISQL port to expose 

943 memory: Memory limit (e.g., "4g"). Auto-calculated from host RAM if None. 

944 dba_password: DBA password 

945 detach: Run in detached mode 

946 wait_ready: Wait for Virtuoso to be ready 

947 enable_write_permissions: Enable SPARQL write permissions for 'nobody' and 'SPARQL' users 

948 force_remove: Force remove existing container with same name 

949 extra_volumes: Additional volumes to mount (list of "host:container" strings) 

950 network: Docker network to connect 

951 cpu_limit: CPU limit (0 = no limit) 

952 virtuoso_version: Docker image version tag 

953 virtuoso_sha: Docker image SHA digest (takes precedence over version) 

954 estimated_db_size_gb: Estimated DB size for MaxCheckpointRemap config 

955 parallel_threads: Max parallel threads for query execution. If None, uses all CPU cores. 

956 

957 Raises: 

958 RuntimeError: If Docker is not installed or launch fails 

959 """ 

960 if not check_docker_installed(): 

961 raise RuntimeError("Docker command not found. Please install Docker.") 

962 

963 if memory is None: 

964 if psutil: 

965 try: 

966 total_host_ram = psutil.virtual_memory().total 

967 default_mem_bytes = max(int(total_host_ram * (2/3)), 1 * 1024**3) 

968 memory = bytes_to_docker_mem_str(default_mem_bytes) 

969 except Exception: 

970 memory = "2g" 

971 else: 

972 memory = "2g" 

973 

974 number_of_buffers, max_dirty_buffers = get_optimal_buffer_values(memory) 

975 

976 args = argparse.Namespace( 

977 name=name, 

978 data_dir=data_dir, 

979 http_port=http_port, 

980 isql_port=isql_port, 

981 memory=memory, 

982 dba_password=dba_password, 

983 detach=detach, 

984 wait_ready=wait_ready, 

985 enable_write_permissions=enable_write_permissions, 

986 force_remove=force_remove, 

987 extra_volumes=extra_volumes, 

988 network=network, 

989 cpu_limit=cpu_limit, 

990 virtuoso_version=virtuoso_version, 

991 virtuoso_sha=virtuoso_sha, 

992 estimated_db_size_gb=estimated_db_size_gb, 

993 number_of_buffers=number_of_buffers, 

994 max_dirty_buffers=max_dirty_buffers, 

995 parallel_threads=parallel_threads, 

996 ) 

997 

998 host_data_dir_abs = os.path.abspath(data_dir) 

999 ini_file_path = os.path.join(host_data_dir_abs, "virtuoso.ini") 

1000 

1001 docker_cmd, unique_paths_to_allow = build_docker_run_command(args) 

1002 dirs_allowed_str = ",".join(unique_paths_to_allow) if unique_paths_to_allow else None 

1003 

1004 threading = calculate_threading_config(parallel_threads) 

1005 max_query_mem_value = calculate_max_query_mem(memory, number_of_buffers) 

1006 

1007 update_ini_memory_settings( 

1008 ini_path=ini_file_path, 

1009 data_dir_path=host_data_dir_abs, 

1010 number_of_buffers=number_of_buffers, 

1011 max_dirty_buffers=max_dirty_buffers, 

1012 dirs_allowed=dirs_allowed_str, 

1013 async_queue_max_threads=threading["async_queue_max_threads"], 

1014 threads_per_query=threading["threads_per_query"], 

1015 max_client_connections=threading["max_client_connections"], 

1016 adjust_vector_size=0, 

1017 vector_size=1000, 

1018 checkpoint_interval=1, 

1019 max_query_mem=max_query_mem_value, 

1020 http_server_threads=threading["max_client_connections"], 

1021 thread_cleanup_interval=1, 

1022 resources_cleanup_interval=1, 

1023 ) 

1024 

1025 if check_container_exists(name): 

1026 result = subprocess.run( 

1027 [DOCKER_EXEC_PATH, "ps", "--filter", f"name=^{name}$", "--format", "{{.Status}}"], 

1028 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True 

1029 ) 

1030 is_running = "Up" in result.stdout 

1031 

1032 if force_remove: 

1033 print(f"Container '{name}' already exists. Forcing removal...") 

1034 if not remove_container(name): 

1035 raise RuntimeError(f"Failed to remove existing container '{name}'") 

1036 elif is_running: 

1037 raise RuntimeError(f"Container '{name}' is already running. Stop it first or use force_remove=True.") 

1038 else: 

1039 print(f"Container '{name}' exists but is stopped. Removing...") 

1040 if not remove_container(name): 

1041 raise RuntimeError(f"Failed to remove existing stopped container '{name}'") 

1042 

1043 try: 

1044 run_docker_command(docker_cmd, check=not detach) 

1045 

1046 should_wait = wait_ready or enable_write_permissions 

1047 

1048 if detach and should_wait: 

1049 print("Waiting for Virtuoso readiness...") 

1050 ready = wait_for_virtuoso_ready(dba_password, docker_container=name) 

1051 if not ready: 

1052 raise RuntimeError("Virtuoso readiness check timed out or failed.") 

1053 

1054 if enable_write_permissions: 

1055 if not grant_write_permissions(dba_password, docker_container=name): 

1056 print("Warning: One or more commands to enable write permissions failed.", file=sys.stderr) 

1057 

1058 print(f"Virtuoso launched successfully on http://localhost:{http_port}/sparql") 

1059 

1060 except subprocess.CalledProcessError as e: 

1061 if detach and check_container_exists(name): 

1062 run_docker_command([DOCKER_EXEC_PATH, "stop", name], suppress_error=True, check=False) 

1063 run_docker_command([DOCKER_EXEC_PATH, "rm", name], suppress_error=True, check=False) 

1064 raise RuntimeError(f"Virtuoso launch failed: {e}") 

1065 except FileNotFoundError: 

1066 raise RuntimeError("Docker command not found.") 

1067 

1068 

1069def main() -> int: # pragma: no cover 

1070 """ 

1071 CLI entry point that parses arguments and calls launch_virtuoso(). 

1072 """ 

1073 args = parse_arguments() 

1074 

1075 try: 

1076 launch_virtuoso( 

1077 name=args.name, 

1078 data_dir=args.data_dir, 

1079 http_port=args.http_port, 

1080 isql_port=args.isql_port, 

1081 memory=args.memory, 

1082 dba_password=args.dba_password, 

1083 detach=args.detach, 

1084 wait_ready=args.wait_ready, 

1085 enable_write_permissions=args.enable_write_permissions, 

1086 force_remove=args.force_remove, 

1087 extra_volumes=args.extra_volumes, 

1088 network=args.network, 

1089 cpu_limit=args.cpu_limit, 

1090 virtuoso_version=args.virtuoso_version, 

1091 virtuoso_sha=args.virtuoso_sha, 

1092 estimated_db_size_gb=args.estimated_db_size_gb, 

1093 parallel_threads=args.parallel_threads, 

1094 ) 

1095 return 0 

1096 except RuntimeError as e: 

1097 print(f"Error: {e}", file=sys.stderr) 

1098 return 1 

1099 

1100 

1101if __name__ == "__main__": # pragma: no cover 

1102 sys.exit(main())