Coverage for heritrace/extensions.py: 99%

233 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-18 11:10 +0000

1# heritrace/extensions.py 

2 

3import json 

4import os 

5from datetime import datetime, timedelta 

6from typing import Dict 

7from urllib.parse import urlparse, urlunparse 

8 

9import yaml 

10from flask import Flask, current_app, g, redirect, session, url_for 

11from flask_babel import Babel 

12from flask_login import LoginManager 

13from flask_login.signals import user_loaded_from_cookie 

14from heritrace.models import User 

15from heritrace.services.resource_lock_manager import ResourceLockManager 

16from heritrace.uri_generator.uri_generator import URIGenerator 

17from heritrace.utils.filters import Filter 

18from rdflib import Graph 

19from rdflib_ocdm.counter_handler.counter_handler import CounterHandler 

20from redis import Redis 

21from SPARQLWrapper import JSON, SPARQLWrapper 

22from time_agnostic_library.support import generate_config_file 

23 

24# Global variables 

25initialization_done = False 

26dataset_endpoint = None 

27provenance_endpoint = None 

28sparql = None 

29provenance_sparql = None 

30change_tracking_config = None 

31form_fields_cache = None 

32custom_filter = None 

33redis_client = None 

34display_rules = None 

35dataset_is_quadstore = None 

36shacl_graph = None 

37 

38def init_extensions(app: Flask, babel: Babel, login_manager: LoginManager, redis: Redis): 

39 """ 

40 Initialize Flask extensions and configure shared objects. 

41  

42 Args: 

43 app: Flask application instance 

44 babel: Babel extension instance 

45 login_manager: LoginManager instance 

46 redis: Redis client instance 

47 """ 

48 global redis_client 

49 

50 redis_client = redis 

51 

52 # Initialize Babel 

53 babel.init_app( 

54 app=app, 

55 locale_selector=lambda: session.get('lang', 'en'), 

56 default_translation_directories=app.config['BABEL_TRANSLATION_DIRECTORIES'] 

57 ) 

58 

59 # Initialize LoginManager 

60 init_login_manager(app, login_manager) 

61 

62 # Initialize SPARQL endpoints and other services 

63 init_sparql_services(app) 

64 

65 # Initialize filters 

66 init_filters(app) 

67 

68 # Register before_request handlers 

69 init_request_handlers(app) 

70 

71 # Store extensions in app context 

72 app.babel = babel 

73 app.login_manager = login_manager 

74 app.redis_client = redis_client 

75 

76def init_login_manager(app, login_manager: LoginManager): 

77 """Configure the Flask-Login extension.""" 

78 login_manager.init_app(app) 

79 login_manager.login_view = 'auth.login' 

80 login_manager.unauthorized_handler(lambda: redirect(url_for('auth.login'))) 

81 

82 @login_manager.user_loader 

83 def load_user(user_id): 

84 user_name = session.get('user_name', 'Unknown User') 

85 return User(id=user_id, name=user_name, orcid=user_id) 

86 

87 @user_loaded_from_cookie.connect 

88 def rotate_session_token(sender, user): 

89 session.modified = True 

90 

91def initialize_change_tracking_config(app: Flask, adjusted_dataset_endpoint=None, adjusted_provenance_endpoint=None): 

92 """ 

93 Initialize and return the change tracking configuration JSON. 

94 Uses pre-adjusted endpoints if provided to avoid redundant adjustments. 

95  

96 Args: 

97 app: Flask application instance 

98 adjusted_dataset_endpoint: Dataset endpoint URL already adjusted for Docker 

99 adjusted_provenance_endpoint: Provenance endpoint URL already adjusted for Docker 

100  

101 Returns: 

102 dict: The loaded configuration dictionary 

103 """ 

104 config_needs_generation = False 

105 config_path = None 

106 config = None 

107 

108 # Check if we have a config path in app.config 

109 if 'CHANGE_TRACKING_CONFIG' in app.config: 

110 config_path = app.config['CHANGE_TRACKING_CONFIG'] 

111 if not os.path.exists(config_path): 

112 app.logger.warning(f"Change tracking configuration file not found at specified path: {config_path}") 

113 config_needs_generation = True 

114 else: 

115 config_needs_generation = True 

116 config_path = os.path.join(app.instance_path, 'change_tracking_config.json') 

117 os.makedirs(app.instance_path, exist_ok=True) 

118 

119 if config_needs_generation: 

120 dataset_urls = [adjusted_dataset_endpoint] if adjusted_dataset_endpoint else [] 

121 provenance_urls = [adjusted_provenance_endpoint] if adjusted_provenance_endpoint else [] 

122 

123 cache_endpoint = adjust_endpoint_url(app.config.get('CACHE_ENDPOINT', '')) 

124 cache_update_endpoint = adjust_endpoint_url(app.config.get('CACHE_UPDATE_ENDPOINT', '')) 

125 

126 db_triplestore = app.config.get('DATASET_DB_TRIPLESTORE', '').lower() 

127 text_index_enabled = app.config.get('DATASET_DB_TEXT_INDEX_ENABLED', False) 

128 

129 blazegraph_search = db_triplestore == 'blazegraph' and text_index_enabled 

130 fuseki_search = db_triplestore == 'fuseki' and text_index_enabled 

131 virtuoso_search = db_triplestore == 'virtuoso' and text_index_enabled 

132 

133 graphdb_connector = '' #TODO: Add graphdb support 

134 

135 try: 

136 config = generate_config_file( 

137 config_path=config_path, 

138 dataset_urls=dataset_urls, 

139 dataset_dirs=app.config.get('DATASET_DIRS', []), 

140 dataset_is_quadstore=app.config.get('DATASET_IS_QUADSTORE', False), 

141 provenance_urls=provenance_urls, 

142 provenance_is_quadstore=app.config.get('PROVENANCE_IS_QUADSTORE', False), 

143 provenance_dirs=app.config.get('PROVENANCE_DIRS', []), 

144 blazegraph_full_text_search=blazegraph_search, 

145 fuseki_full_text_search=fuseki_search, 

146 virtuoso_full_text_search=virtuoso_search, 

147 graphdb_connector_name=graphdb_connector, 

148 cache_endpoint=cache_endpoint, 

149 cache_update_endpoint=cache_update_endpoint 

150 ) 

151 app.logger.info(f"Generated new change tracking configuration at: {config_path}") 

152 except Exception as e: 

153 raise RuntimeError(f"Failed to generate change tracking configuration: {str(e)}") 

154 

155 # Load and validate the configuration 

156 try: 

157 if not config: 

158 with open(config_path, 'r', encoding='utf8') as f: 

159 config = json.load(f) 

160 

161 # Adjust cache URLs if needed 

162 if config['cache_triplestore_url'].get('endpoint'): 

163 config['cache_triplestore_url']['endpoint'] = adjust_endpoint_url( 

164 config['cache_triplestore_url']['endpoint'] 

165 ) 

166 

167 if config['cache_triplestore_url'].get('update_endpoint'): 

168 config['cache_triplestore_url']['update_endpoint'] = adjust_endpoint_url( 

169 config['cache_triplestore_url']['update_endpoint'] 

170 ) 

171 

172 except json.JSONDecodeError as e: 

173 raise RuntimeError(f"Invalid change tracking configuration JSON at {config_path}: {str(e)}") 

174 except Exception as e: 

175 raise RuntimeError(f"Error reading change tracking configuration at {config_path}: {str(e)}") 

176 

177 app.config['CHANGE_TRACKING_CONFIG'] = config_path 

178 return config 

179 

180def need_initialization(app: Flask): 

181 """ 

182 Check if counter handler initialization is needed. 

183 """ 

184 uri_generator = app.config['URI_GENERATOR'] 

185 

186 if not hasattr(uri_generator, "counter_handler"): 

187 return False 

188 

189 cache_file = app.config['CACHE_FILE'] 

190 cache_validity_days = app.config['CACHE_VALIDITY_DAYS'] 

191 

192 if not os.path.exists(cache_file): 

193 return True 

194 

195 try: 

196 with open(cache_file, 'r', encoding='utf8') as f: 

197 cache = json.load(f) 

198 

199 last_init = datetime.fromisoformat(cache['last_initialization']) 

200 return datetime.now() - last_init > timedelta(days=cache_validity_days) 

201 except Exception: 

202 return True 

203 

204def update_cache(app: Flask): 

205 """ 

206 Update the cache file with current initialization timestamp. 

207 """ 

208 cache_file = app.config['CACHE_FILE'] 

209 cache = { 

210 'last_initialization': datetime.now().isoformat(), 

211 'version': '1.0' 

212 } 

213 with open(cache_file, 'w', encoding='utf8') as f: 

214 json.dump(cache, f, ensure_ascii=False, indent=4) 

215 

216def initialize_counter_handler(app: Flask): 

217 """ 

218 Initialize the counter handler for URI generation if needed. 

219 """ 

220 if not need_initialization(app): 

221 return 

222 

223 uri_generator: URIGenerator = app.config['URI_GENERATOR'] 

224 counter_handler: CounterHandler = uri_generator.counter_handler 

225 

226 # Inizializza i contatori specifici dell'URI generator 

227 uri_generator.initialize_counters(sparql) 

228 

229 # Query per contare gli snapshot nella provenance 

230 # Contiamo il numero di wasDerivedFrom per ogni entità e aggiungiamo 1  

231 # (poiché il primo snapshot non ha wasDerivedFrom) 

232 prov_query = """ 

233 SELECT ?entity (COUNT(DISTINCT ?snapshot) as ?count) 

234 WHERE { 

235 ?snapshot a <http://www.w3.org/ns/prov#Entity> ; 

236 <http://www.w3.org/ns/prov#specializationOf> ?entity . 

237 OPTIONAL { 

238 ?snapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?prev . 

239 } 

240 } 

241 GROUP BY ?entity 

242 """ 

243 

244 # Esegui query sulla provenance e imposta i contatori degli snapshot 

245 provenance_sparql.setQuery(prov_query) 

246 provenance_sparql.setReturnFormat(JSON) 

247 prov_results = provenance_sparql.query().convert() 

248 

249 for result in prov_results["results"]["bindings"]: 

250 entity = result["entity"]["value"] 

251 count = int(result["count"]["value"]) 

252 counter_handler.set_counter(count, entity) 

253 

254 update_cache(app) 

255 

256def initialize_global_variables(app: Flask): 

257 """ 

258 Initialize all global variables including form fields cache, display rules, 

259 and dataset configuration from SHACL shapes graph and configuration files. 

260  

261 Args: 

262 app: Flask application instance 

263 """ 

264 global shacl_graph, form_fields_cache, display_rules, dataset_is_quadstore 

265 

266 try: 

267 dataset_is_quadstore = app.config.get('DATASET_IS_QUADSTORE', False) 

268 

269 if app.config.get('DISPLAY_RULES_PATH'): 

270 if not os.path.exists(app.config['DISPLAY_RULES_PATH']): 

271 app.logger.warning(f"Display rules file not found at: {app.config['DISPLAY_RULES_PATH']}") 

272 else: 

273 try: 

274 with open(app.config['DISPLAY_RULES_PATH'], 'r') as f: 

275 display_rules = yaml.safe_load(f)['classes'] 

276 except Exception as e: 

277 app.logger.error(f"Error loading display rules: {str(e)}") 

278 raise RuntimeError(f"Failed to load display rules: {str(e)}") 

279 

280 if app.config.get('SHACL_PATH'): 

281 if not os.path.exists(app.config['SHACL_PATH']): 

282 app.logger.warning(f"SHACL file not found at: {app.config['SHACL_PATH']}") 

283 return 

284 

285 if form_fields_cache is not None: 

286 return 

287 

288 try: 

289 shacl_graph = Graph() 

290 shacl_graph.parse(source=app.config['SHACL_PATH'], format="turtle") 

291 

292 from heritrace.utils.shacl_utils import \ 

293 get_form_fields_from_shacl 

294 form_fields_cache = get_form_fields_from_shacl(shacl_graph, display_rules) 

295 

296 except Exception as e: 

297 app.logger.error(f"Error initializing form fields from SHACL: {str(e)}") 

298 raise RuntimeError(f"Failed to initialize form fields: {str(e)}") 

299 

300 app.logger.info("Global variables initialized successfully") 

301 

302 except Exception as e: 

303 app.logger.error(f"Error during global variables initialization: {str(e)}") 

304 raise RuntimeError(f"Global variables initialization failed: {str(e)}") 

305 

306def init_sparql_services(app: Flask): 

307 """Initialize SPARQL endpoints and related services.""" 

308 global initialization_done, dataset_endpoint, provenance_endpoint, sparql, provenance_sparql, change_tracking_config 

309 

310 if not initialization_done: 

311 # Adjust endpoints for Docker if necessary 

312 dataset_endpoint = adjust_endpoint_url(app.config['DATASET_DB_URL']) 

313 provenance_endpoint = adjust_endpoint_url(app.config['PROVENANCE_DB_URL']) 

314 

315 # Initialize SPARQL wrappers 

316 sparql = SPARQLWrapper(dataset_endpoint) 

317 provenance_sparql = SPARQLWrapper(provenance_endpoint) 

318 

319 # Initialize change tracking configuration 

320 change_tracking_config = initialize_change_tracking_config( 

321 app, 

322 adjusted_dataset_endpoint=dataset_endpoint, 

323 adjusted_provenance_endpoint=provenance_endpoint 

324 ) 

325 

326 # Initialize other required components 

327 initialize_counter_handler(app) 

328 initialize_global_variables(app) 

329 

330 initialization_done = True 

331 

332def init_filters(app: Flask): 

333 """Initialize custom template filters.""" 

334 global custom_filter 

335 

336 # Load context from configuration 

337 with open(os.path.join("resources", "context.json"), "r") as config_file: 

338 context = json.load(config_file)["@context"] 

339 

340 # Load display rules from configuration 

341 display_rules = None 

342 if app.config["DISPLAY_RULES_PATH"]: 

343 with open(app.config["DISPLAY_RULES_PATH"], 'r') as f: 

344 display_rules = yaml.safe_load(f)['classes'] 

345 

346 # Create custom filter instance 

347 custom_filter = Filter(context, display_rules, dataset_endpoint) 

348 

349 # Register template filters 

350 app.jinja_env.filters['human_readable_predicate'] = custom_filter.human_readable_predicate 

351 app.jinja_env.filters['human_readable_entity'] = custom_filter.human_readable_entity 

352 app.jinja_env.filters['human_readable_primary_source'] = custom_filter.human_readable_primary_source 

353 app.jinja_env.filters['format_datetime'] = custom_filter.human_readable_datetime 

354 app.jinja_env.filters['split_ns'] = custom_filter.split_ns 

355 app.jinja_env.filters['format_source_reference'] = custom_filter.format_source_reference 

356 app.jinja_env.filters['format_agent_reference'] = custom_filter.format_agent_reference 

357 

358def init_request_handlers(app): 

359 """Initialize before_request and teardown_request handlers.""" 

360 

361 @app.before_request 

362 def initialize_lock_manager(): 

363 """Initialize the resource lock manager for each request.""" 

364 if not hasattr(g, 'resource_lock_manager'): 

365 g.resource_lock_manager = ResourceLockManager(redis_client) 

366 

367 @app.teardown_appcontext 

368 def close_redis_connection(error): 

369 """Close Redis connection when the request context ends.""" 

370 if hasattr(g, 'resource_lock_manager'): 

371 del g.resource_lock_manager 

372 

373def adjust_endpoint_url(url: str) -> str: 

374 """ 

375 Adjust endpoint URLs to work properly within Docker containers. 

376  

377 Args: 

378 url: The endpoint URL to adjust 

379  

380 Returns: 

381 The adjusted URL if running in Docker, original URL otherwise 

382 """ 

383 if not running_in_docker(): 

384 return url 

385 

386 local_patterns = ['localhost', '127.0.0.1', '0.0.0.0'] 

387 parsed_url = urlparse(url) 

388 

389 if any(pattern in parsed_url.netloc for pattern in local_patterns): 

390 netloc_parts = parsed_url.netloc.split(':') 

391 new_netloc = f'host.docker.internal:{netloc_parts[1]}' if len(netloc_parts) > 1 else 'host.docker.internal' 

392 url_parts = list(parsed_url) 

393 url_parts[1] = new_netloc 

394 return urlunparse(url_parts) 

395 

396 return url 

397 

398def running_in_docker() -> bool: 

399 """Check if the application is running inside a Docker container.""" 

400 return os.path.exists('/.dockerenv') 

401 

402def get_dataset_endpoint() -> str: 

403 """Get the configured dataset endpoint URL.""" 

404 return dataset_endpoint 

405 

406def get_sparql() -> SPARQLWrapper: 

407 """Get the configured SPARQL wrapper for the dataset endpoint.""" 

408 return sparql 

409 

410def get_provenance_endpoint() -> str: 

411 """Get the configured provenance endpoint URL.""" 

412 return provenance_endpoint 

413 

414def get_provenance_sparql() -> SPARQLWrapper: 

415 """Get the configured SPARQL wrapper for the provenance endpoint.""" 

416 return provenance_sparql 

417 

418def get_counter_handler() -> CounterHandler: 

419 """ 

420 Get the configured CounterHandler instance from the URIGenerator. 

421 Assumes URIGenerator and its counter_handler are initialized in app.config. 

422 """ 

423 uri_generator: URIGenerator = current_app.config.get('URI_GENERATOR') 

424 if uri_generator and hasattr(uri_generator, 'counter_handler'): 

425 return uri_generator.counter_handler 

426 else: 

427 # Handle cases where it might not be initialized yet or configured 

428 current_app.logger.error("CounterHandler not found in URIGenerator config.") 

429 raise RuntimeError("CounterHandler is not available. Initialization might have failed.") 

430 

431def get_custom_filter() -> Filter: 

432 """Get the configured custom filter instance.""" 

433 return custom_filter 

434 

435def get_change_tracking_config() -> Dict: 

436 """Get the change tracking configuration.""" 

437 return change_tracking_config 

438 

439def get_display_rules() -> Dict: 

440 """Get the display_rules configuration.""" 

441 return display_rules 

442 

443def get_form_fields() -> Dict: 

444 """Get the form_fields configuration.""" 

445 return form_fields_cache 

446 

447def get_dataset_is_quadstore() -> bool: 

448 """Check if the dataset is a quadstore.""" 

449 return dataset_is_quadstore 

450 

451def get_shacl_graph() -> Graph: 

452 """Get the SHACL shapes graph.""" 

453 return shacl_graph