Coverage for virtuoso_utilities / rebuild_fulltext_index.py: 56%

108 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-04-14 09:16 +0000

1#!/usr/bin/env python3 

2 

3# SPDX-FileCopyrightText: 2025 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7""" 

8Utility for rebuilding the Virtuoso full-text index. 

9 

10This module provides functionality to rebuild the Virtuoso RDF Quad store's 

11full-text index, which is used for optimal querying of RDF object values 

12using the bif:contains function in SPARQL queries. 

13""" 

14import argparse 

15import shutil 

16import subprocess 

17import sys 

18import time 

19from typing import Tuple 

20 

21from virtuoso_utilities.isql_helpers import run_isql_command 

22 

23 

24 

25 

26def drop_fulltext_tables(args: argparse.Namespace) -> Tuple[bool, str, str]: 

27 """ 

28 Drop the full-text index tables. 

29  

30 Args: 

31 args: Command-line arguments containing connection details 

32  

33 Returns: 

34 Tuple of (success, stdout, stderr) 

35 """ 

36 sql_commands = [ 

37 "drop table DB.DBA.VTLOG_DB_DBA_RDF_OBJ;", 

38 "drop table DB.DBA.RDF_OBJ_RO_FLAGS_WORDS;" 

39 ] 

40 

41 print("Dropping existing full-text index tables...", file=sys.stderr) 

42 

43 for sql_command in sql_commands: 

44 success, stdout, stderr = run_isql_command(args, sql_command=sql_command, ignore_errors=True) 

45 

46 # Don't fail if tables don't exist - this is expected on first run 

47 if not success and "does not exist" not in stderr.lower(): 

48 return False, stdout, stderr 

49 

50 return True, "", "" 

51 

52 

53def recreate_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]: 

54 """ 

55 Recreate the full-text index. 

56  

57 Args: 

58 args: Command-line arguments containing connection details 

59  

60 Returns: 

61 Tuple of (success, stdout, stderr) 

62 """ 

63 sql_command = """ 

64 DB.DBA.vt_create_text_index ( 

65 fix_identifier_case ('DB.DBA.RDF_OBJ'), 

66 fix_identifier_case ('RO_FLAGS'), 

67 fix_identifier_case ('RO_ID'), 

68 0, 0, vector (), 1, '*ini*', 'UTF-8-QR'); 

69 """ 

70 

71 print("Recreating full-text index...", file=sys.stderr) 

72 

73 return run_isql_command(args, sql_command=sql_command) 

74 

75 

76def enable_batch_update(args: argparse.Namespace) -> Tuple[bool, str, str]: 

77 """ 

78 Enable batch update for the full-text index. 

79  

80 Args: 

81 args: Command-line arguments containing connection details 

82  

83 Returns: 

84 Tuple of (success, stdout, stderr) 

85 """ 

86 sql_command = "DB.DBA.vt_batch_update (fix_identifier_case ('DB.DBA.RDF_OBJ'), 'ON', 1);" 

87 

88 print("Enabling batch update for full-text index...", file=sys.stderr) 

89 

90 return run_isql_command(args, sql_command=sql_command) 

91 

92 

93def refill_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]: 

94 """ 

95 Refill the full-text index. 

96  

97 Args: 

98 args: Command-line arguments containing connection details 

99  

100 Returns: 

101 Tuple of (success, stdout, stderr) 

102 """ 

103 sql_command = "DB.DBA.RDF_OBJ_FT_RECOVER();" 

104 

105 print("Refilling full-text index (this may take a while)...", file=sys.stderr) 

106 

107 return run_isql_command(args, sql_command=sql_command) 

108 

109 

110def rebuild_fulltext_index(args: argparse.Namespace) -> bool: 

111 """ 

112 Complete process to rebuild the Virtuoso full-text index. 

113  

114 This function will: 

115 1. Drop existing full-text index tables 

116 2. Recreate the index 

117 3. Enable batch update 

118 4. Refill the index 

119  

120 After this process completes, the Virtuoso database should be restarted 

121 for optimal text index performance. 

122  

123 Args: 

124 args: Command-line arguments containing connection details 

125  

126 Returns: 

127 True if the rebuild process completed successfully, False otherwise 

128 """ 

129 # Check prerequisites 

130 if args.docker_container: 

131 if not shutil.which('docker'): 

132 print("Error: Docker command not found in PATH", file=sys.stderr) 

133 return False 

134 

135 # Check if container is running 

136 try: 

137 result = subprocess.run( 

138 ['docker', 'ps', '--format', '{{.Names}}'], 

139 capture_output=True, 

140 text=True, 

141 timeout=30 

142 ) 

143 

144 if args.docker_container not in result.stdout: 

145 print(f"Error: Container '{args.docker_container}' not found or not running", file=sys.stderr) 

146 return False 

147 

148 except Exception as e: 

149 print(f"Error checking container status: {e}", file=sys.stderr) 

150 return False 

151 else: 

152 if not shutil.which('isql'): 

153 print("Error: isql command not found in PATH", file=sys.stderr) 

154 return False 

155 

156 # Step 1: Drop existing tables 

157 success, _, stderr = drop_fulltext_tables(args) 

158 if not success: 

159 print(f"Error dropping full-text index tables: {stderr}", file=sys.stderr) 

160 return False 

161 

162 # Step 2: Recreate index 

163 success, _, stderr = recreate_fulltext_index(args) 

164 if not success: 

165 print(f"Error recreating full-text index: {stderr}", file=sys.stderr) 

166 return False 

167 

168 # Step 3: Enable batch update 

169 success, _, stderr = enable_batch_update(args) 

170 if not success: 

171 print(f"Error enabling batch update: {stderr}", file=sys.stderr) 

172 return False 

173 

174 # Step 4: Refill index 

175 success, _, stderr = refill_fulltext_index(args) 

176 if not success: 

177 print(f"Error refilling full-text index: {stderr}", file=sys.stderr) 

178 return False 

179 

180 print("Full-text index rebuild completed successfully.", file=sys.stderr) 

181 

182 # Restart container if requested and using Docker 

183 if args.restart_container and args.docker_container: 

184 print("Restarting Docker container to activate text index...", file=sys.stderr) 

185 

186 try: 

187 # Restart the container 

188 result = subprocess.run( 

189 ['docker', 'restart', args.docker_container], 

190 capture_output=True, 

191 text=True, 

192 timeout=60 

193 ) 

194 

195 if result.returncode != 0: 

196 print(f"Error restarting container: {result.stderr}", file=sys.stderr) 

197 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr) 

198 return True 

199 

200 print("Waiting for container to be ready after restart...", file=sys.stderr) 

201 

202 # Wait for container to be ready 

203 max_wait = 60 

204 wait_time = 0 

205 while wait_time < max_wait: 

206 try: 

207 success, _, _ = run_isql_command(args, sql_command="status();", ignore_errors=True) 

208 

209 if success: 

210 print("Container restarted and ready!", file=sys.stderr) 

211 return True 

212 

213 except subprocess.TimeoutExpired: 

214 pass 

215 

216 time.sleep(2) 

217 wait_time += 2 

218 

219 print("Container restarted but may still be initializing.", file=sys.stderr) 

220 

221 except Exception as e: 

222 print(f"Error during container restart: {e}", file=sys.stderr) 

223 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr) 

224 

225 elif args.restart_container and not args.docker_container: 

226 print("Note: --restart-container requires --docker-container to be specified.", file=sys.stderr) 

227 print("Note: Restart the Virtuoso database manually for optimal text index performance.", file=sys.stderr) 

228 

229 else: 

230 print("Note: Restart the Virtuoso database for optimal text index performance.", file=sys.stderr) 

231 

232 return True 

233 

234 

235def parse_args() -> argparse.Namespace: 

236 """Parse command-line arguments.""" 

237 parser = argparse.ArgumentParser( 

238 description="Rebuild the Virtuoso full-text index." 

239 ) 

240 

241 parser.add_argument("--host", default="localhost", help="Virtuoso host") 

242 parser.add_argument("--port", default="1111", help="Virtuoso port") 

243 parser.add_argument("--user", default="dba", help="Virtuoso username") 

244 parser.add_argument("--password", default="dba", help="Virtuoso password") 

245 parser.add_argument( 

246 "--docker-container", 

247 help="Docker container name/ID to execute isql inside" 

248 ) 

249 parser.add_argument( 

250 "--restart-container", 

251 action="store_true", 

252 help="Restart the Docker container after rebuilding the index (recommended for full activation)" 

253 ) 

254 

255 # Add arguments required by isql_helpers 

256 parser.add_argument("--docker-path", default="docker", help="Path to docker executable") 

257 parser.add_argument("--docker-isql-path", default="isql", help="Path to isql inside container") 

258 parser.add_argument("--isql-path", default="isql", help="Path to isql executable") 

259 

260 return parser.parse_args() 

261 

262 

263def main() -> int: 

264 """Main entry point.""" 

265 args = parse_args() 

266 success = rebuild_fulltext_index(args) 

267 return 0 if success else 1 

268 

269 

270if __name__ == "__main__": 

271 sys.exit(main())