Coverage for virtuoso_utilities / rebuild_fulltext_index.py: 56%

108 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 14:45 +0000

1#!/usr/bin/env python3 

2""" 

3Utility for rebuilding the Virtuoso full-text index. 

4 

5This module provides functionality to rebuild the Virtuoso RDF Quad store's 

6full-text index, which is used for optimal querying of RDF object values 

7using the bif:contains function in SPARQL queries. 

8""" 

9import argparse 

10import shutil 

11import subprocess 

12import sys 

13import time 

14from typing import Tuple 

15 

16from virtuoso_utilities.isql_helpers import run_isql_command 

17 

18 

19 

20 

21def drop_fulltext_tables(args: argparse.Namespace) -> Tuple[bool, str, str]: 

22 """ 

23 Drop the full-text index tables. 

24  

25 Args: 

26 args: Command-line arguments containing connection details 

27  

28 Returns: 

29 Tuple of (success, stdout, stderr) 

30 """ 

31 sql_commands = [ 

32 "drop table DB.DBA.VTLOG_DB_DBA_RDF_OBJ;", 

33 "drop table DB.DBA.RDF_OBJ_RO_FLAGS_WORDS;" 

34 ] 

35 

36 print("Dropping existing full-text index tables...", file=sys.stderr) 

37 

38 for sql_command in sql_commands: 

39 success, stdout, stderr = run_isql_command(args, sql_command=sql_command, ignore_errors=True) 

40 

41 # Don't fail if tables don't exist - this is expected on first run 

42 if not success and "does not exist" not in stderr.lower(): 

43 return False, stdout, stderr 

44 

45 return True, "", "" 

46 

47 

48def recreate_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]: 

49 """ 

50 Recreate the full-text index. 

51  

52 Args: 

53 args: Command-line arguments containing connection details 

54  

55 Returns: 

56 Tuple of (success, stdout, stderr) 

57 """ 

58 sql_command = """ 

59 DB.DBA.vt_create_text_index ( 

60 fix_identifier_case ('DB.DBA.RDF_OBJ'), 

61 fix_identifier_case ('RO_FLAGS'), 

62 fix_identifier_case ('RO_ID'), 

63 0, 0, vector (), 1, '*ini*', 'UTF-8-QR'); 

64 """ 

65 

66 print("Recreating full-text index...", file=sys.stderr) 

67 

68 return run_isql_command(args, sql_command=sql_command) 

69 

70 

71def enable_batch_update(args: argparse.Namespace) -> Tuple[bool, str, str]: 

72 """ 

73 Enable batch update for the full-text index. 

74  

75 Args: 

76 args: Command-line arguments containing connection details 

77  

78 Returns: 

79 Tuple of (success, stdout, stderr) 

80 """ 

81 sql_command = "DB.DBA.vt_batch_update (fix_identifier_case ('DB.DBA.RDF_OBJ'), 'ON', 1);" 

82 

83 print("Enabling batch update for full-text index...", file=sys.stderr) 

84 

85 return run_isql_command(args, sql_command=sql_command) 

86 

87 

88def refill_fulltext_index(args: argparse.Namespace) -> Tuple[bool, str, str]: 

89 """ 

90 Refill the full-text index. 

91  

92 Args: 

93 args: Command-line arguments containing connection details 

94  

95 Returns: 

96 Tuple of (success, stdout, stderr) 

97 """ 

98 sql_command = "DB.DBA.RDF_OBJ_FT_RECOVER();" 

99 

100 print("Refilling full-text index (this may take a while)...", file=sys.stderr) 

101 

102 return run_isql_command(args, sql_command=sql_command) 

103 

104 

105def rebuild_fulltext_index(args: argparse.Namespace) -> bool: 

106 """ 

107 Complete process to rebuild the Virtuoso full-text index. 

108  

109 This function will: 

110 1. Drop existing full-text index tables 

111 2. Recreate the index 

112 3. Enable batch update 

113 4. Refill the index 

114  

115 After this process completes, the Virtuoso database should be restarted 

116 for optimal text index performance. 

117  

118 Args: 

119 args: Command-line arguments containing connection details 

120  

121 Returns: 

122 True if the rebuild process completed successfully, False otherwise 

123 """ 

124 # Check prerequisites 

125 if args.docker_container: 

126 if not shutil.which('docker'): 

127 print("Error: Docker command not found in PATH", file=sys.stderr) 

128 return False 

129 

130 # Check if container is running 

131 try: 

132 result = subprocess.run( 

133 ['docker', 'ps', '--format', '{{.Names}}'], 

134 capture_output=True, 

135 text=True, 

136 timeout=30 

137 ) 

138 

139 if args.docker_container not in result.stdout: 

140 print(f"Error: Container '{args.docker_container}' not found or not running", file=sys.stderr) 

141 return False 

142 

143 except Exception as e: 

144 print(f"Error checking container status: {e}", file=sys.stderr) 

145 return False 

146 else: 

147 if not shutil.which('isql'): 

148 print("Error: isql command not found in PATH", file=sys.stderr) 

149 return False 

150 

151 # Step 1: Drop existing tables 

152 success, _, stderr = drop_fulltext_tables(args) 

153 if not success: 

154 print(f"Error dropping full-text index tables: {stderr}", file=sys.stderr) 

155 return False 

156 

157 # Step 2: Recreate index 

158 success, _, stderr = recreate_fulltext_index(args) 

159 if not success: 

160 print(f"Error recreating full-text index: {stderr}", file=sys.stderr) 

161 return False 

162 

163 # Step 3: Enable batch update 

164 success, _, stderr = enable_batch_update(args) 

165 if not success: 

166 print(f"Error enabling batch update: {stderr}", file=sys.stderr) 

167 return False 

168 

169 # Step 4: Refill index 

170 success, _, stderr = refill_fulltext_index(args) 

171 if not success: 

172 print(f"Error refilling full-text index: {stderr}", file=sys.stderr) 

173 return False 

174 

175 print("Full-text index rebuild completed successfully.", file=sys.stderr) 

176 

177 # Restart container if requested and using Docker 

178 if args.restart_container and args.docker_container: 

179 print("Restarting Docker container to activate text index...", file=sys.stderr) 

180 

181 try: 

182 # Restart the container 

183 result = subprocess.run( 

184 ['docker', 'restart', args.docker_container], 

185 capture_output=True, 

186 text=True, 

187 timeout=60 

188 ) 

189 

190 if result.returncode != 0: 

191 print(f"Error restarting container: {result.stderr}", file=sys.stderr) 

192 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr) 

193 return True 

194 

195 print("Waiting for container to be ready after restart...", file=sys.stderr) 

196 

197 # Wait for container to be ready 

198 max_wait = 60 

199 wait_time = 0 

200 while wait_time < max_wait: 

201 try: 

202 success, _, _ = run_isql_command(args, sql_command="status();", ignore_errors=True) 

203 

204 if success: 

205 print("Container restarted and ready!", file=sys.stderr) 

206 return True 

207 

208 except subprocess.TimeoutExpired: 

209 pass 

210 

211 time.sleep(2) 

212 wait_time += 2 

213 

214 print("Container restarted but may still be initializing.", file=sys.stderr) 

215 

216 except Exception as e: 

217 print(f"Error during container restart: {e}", file=sys.stderr) 

218 print("Note: Manual restart recommended for full text index activation.", file=sys.stderr) 

219 

220 elif args.restart_container and not args.docker_container: 

221 print("Note: --restart-container requires --docker-container to be specified.", file=sys.stderr) 

222 print("Note: Restart the Virtuoso database manually for optimal text index performance.", file=sys.stderr) 

223 

224 else: 

225 print("Note: Restart the Virtuoso database for optimal text index performance.", file=sys.stderr) 

226 

227 return True 

228 

229 

230def parse_args() -> argparse.Namespace: 

231 """Parse command-line arguments.""" 

232 parser = argparse.ArgumentParser( 

233 description="Rebuild the Virtuoso full-text index." 

234 ) 

235 

236 parser.add_argument("--host", default="localhost", help="Virtuoso host") 

237 parser.add_argument("--port", default="1111", help="Virtuoso port") 

238 parser.add_argument("--user", default="dba", help="Virtuoso username") 

239 parser.add_argument("--password", default="dba", help="Virtuoso password") 

240 parser.add_argument( 

241 "--docker-container", 

242 help="Docker container name/ID to execute isql inside" 

243 ) 

244 parser.add_argument( 

245 "--restart-container", 

246 action="store_true", 

247 help="Restart the Docker container after rebuilding the index (recommended for full activation)" 

248 ) 

249 

250 # Add arguments required by isql_helpers 

251 parser.add_argument("--docker-path", default="docker", help="Path to docker executable") 

252 parser.add_argument("--docker-isql-path", default="isql", help="Path to isql inside container") 

253 parser.add_argument("--isql-path", default="isql", help="Path to isql executable") 

254 

255 return parser.parse_args() 

256 

257 

258def main() -> int: 

259 """Main entry point.""" 

260 args = parse_args() 

261 success = rebuild_fulltext_index(args) 

262 return 0 if success else 1 

263 

264 

265if __name__ == "__main__": 

266 sys.exit(main())