Coverage for heritrace/extensions.py: 99%
233 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-18 11:10 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-18 11:10 +0000
1# heritrace/extensions.py
3import json
4import os
5from datetime import datetime, timedelta
6from typing import Dict
7from urllib.parse import urlparse, urlunparse
9import yaml
10from flask import Flask, current_app, g, redirect, session, url_for
11from flask_babel import Babel
12from flask_login import LoginManager
13from flask_login.signals import user_loaded_from_cookie
14from heritrace.models import User
15from heritrace.services.resource_lock_manager import ResourceLockManager
16from heritrace.uri_generator.uri_generator import URIGenerator
17from heritrace.utils.filters import Filter
18from rdflib import Graph
19from rdflib_ocdm.counter_handler.counter_handler import CounterHandler
20from redis import Redis
21from SPARQLWrapper import JSON, SPARQLWrapper
22from time_agnostic_library.support import generate_config_file
24# Global variables
25initialization_done = False
26dataset_endpoint = None
27provenance_endpoint = None
28sparql = None
29provenance_sparql = None
30change_tracking_config = None
31form_fields_cache = None
32custom_filter = None
33redis_client = None
34display_rules = None
35dataset_is_quadstore = None
36shacl_graph = None
38def init_extensions(app: Flask, babel: Babel, login_manager: LoginManager, redis: Redis):
39 """
40 Initialize Flask extensions and configure shared objects.
42 Args:
43 app: Flask application instance
44 babel: Babel extension instance
45 login_manager: LoginManager instance
46 redis: Redis client instance
47 """
48 global redis_client
50 redis_client = redis
52 # Initialize Babel
53 babel.init_app(
54 app=app,
55 locale_selector=lambda: session.get('lang', 'en'),
56 default_translation_directories=app.config['BABEL_TRANSLATION_DIRECTORIES']
57 )
59 # Initialize LoginManager
60 init_login_manager(app, login_manager)
62 # Initialize SPARQL endpoints and other services
63 init_sparql_services(app)
65 # Initialize filters
66 init_filters(app)
68 # Register before_request handlers
69 init_request_handlers(app)
71 # Store extensions in app context
72 app.babel = babel
73 app.login_manager = login_manager
74 app.redis_client = redis_client
76def init_login_manager(app, login_manager: LoginManager):
77 """Configure the Flask-Login extension."""
78 login_manager.init_app(app)
79 login_manager.login_view = 'auth.login'
80 login_manager.unauthorized_handler(lambda: redirect(url_for('auth.login')))
82 @login_manager.user_loader
83 def load_user(user_id):
84 user_name = session.get('user_name', 'Unknown User')
85 return User(id=user_id, name=user_name, orcid=user_id)
87 @user_loaded_from_cookie.connect
88 def rotate_session_token(sender, user):
89 session.modified = True
91def initialize_change_tracking_config(app: Flask, adjusted_dataset_endpoint=None, adjusted_provenance_endpoint=None):
92 """
93 Initialize and return the change tracking configuration JSON.
94 Uses pre-adjusted endpoints if provided to avoid redundant adjustments.
96 Args:
97 app: Flask application instance
98 adjusted_dataset_endpoint: Dataset endpoint URL already adjusted for Docker
99 adjusted_provenance_endpoint: Provenance endpoint URL already adjusted for Docker
101 Returns:
102 dict: The loaded configuration dictionary
103 """
104 config_needs_generation = False
105 config_path = None
106 config = None
108 # Check if we have a config path in app.config
109 if 'CHANGE_TRACKING_CONFIG' in app.config:
110 config_path = app.config['CHANGE_TRACKING_CONFIG']
111 if not os.path.exists(config_path):
112 app.logger.warning(f"Change tracking configuration file not found at specified path: {config_path}")
113 config_needs_generation = True
114 else:
115 config_needs_generation = True
116 config_path = os.path.join(app.instance_path, 'change_tracking_config.json')
117 os.makedirs(app.instance_path, exist_ok=True)
119 if config_needs_generation:
120 dataset_urls = [adjusted_dataset_endpoint] if adjusted_dataset_endpoint else []
121 provenance_urls = [adjusted_provenance_endpoint] if adjusted_provenance_endpoint else []
123 cache_endpoint = adjust_endpoint_url(app.config.get('CACHE_ENDPOINT', ''))
124 cache_update_endpoint = adjust_endpoint_url(app.config.get('CACHE_UPDATE_ENDPOINT', ''))
126 db_triplestore = app.config.get('DATASET_DB_TRIPLESTORE', '').lower()
127 text_index_enabled = app.config.get('DATASET_DB_TEXT_INDEX_ENABLED', False)
129 blazegraph_search = db_triplestore == 'blazegraph' and text_index_enabled
130 fuseki_search = db_triplestore == 'fuseki' and text_index_enabled
131 virtuoso_search = db_triplestore == 'virtuoso' and text_index_enabled
133 graphdb_connector = '' #TODO: Add graphdb support
135 try:
136 config = generate_config_file(
137 config_path=config_path,
138 dataset_urls=dataset_urls,
139 dataset_dirs=app.config.get('DATASET_DIRS', []),
140 dataset_is_quadstore=app.config.get('DATASET_IS_QUADSTORE', False),
141 provenance_urls=provenance_urls,
142 provenance_is_quadstore=app.config.get('PROVENANCE_IS_QUADSTORE', False),
143 provenance_dirs=app.config.get('PROVENANCE_DIRS', []),
144 blazegraph_full_text_search=blazegraph_search,
145 fuseki_full_text_search=fuseki_search,
146 virtuoso_full_text_search=virtuoso_search,
147 graphdb_connector_name=graphdb_connector,
148 cache_endpoint=cache_endpoint,
149 cache_update_endpoint=cache_update_endpoint
150 )
151 app.logger.info(f"Generated new change tracking configuration at: {config_path}")
152 except Exception as e:
153 raise RuntimeError(f"Failed to generate change tracking configuration: {str(e)}")
155 # Load and validate the configuration
156 try:
157 if not config:
158 with open(config_path, 'r', encoding='utf8') as f:
159 config = json.load(f)
161 # Adjust cache URLs if needed
162 if config['cache_triplestore_url'].get('endpoint'):
163 config['cache_triplestore_url']['endpoint'] = adjust_endpoint_url(
164 config['cache_triplestore_url']['endpoint']
165 )
167 if config['cache_triplestore_url'].get('update_endpoint'):
168 config['cache_triplestore_url']['update_endpoint'] = adjust_endpoint_url(
169 config['cache_triplestore_url']['update_endpoint']
170 )
172 except json.JSONDecodeError as e:
173 raise RuntimeError(f"Invalid change tracking configuration JSON at {config_path}: {str(e)}")
174 except Exception as e:
175 raise RuntimeError(f"Error reading change tracking configuration at {config_path}: {str(e)}")
177 app.config['CHANGE_TRACKING_CONFIG'] = config_path
178 return config
180def need_initialization(app: Flask):
181 """
182 Check if counter handler initialization is needed.
183 """
184 uri_generator = app.config['URI_GENERATOR']
186 if not hasattr(uri_generator, "counter_handler"):
187 return False
189 cache_file = app.config['CACHE_FILE']
190 cache_validity_days = app.config['CACHE_VALIDITY_DAYS']
192 if not os.path.exists(cache_file):
193 return True
195 try:
196 with open(cache_file, 'r', encoding='utf8') as f:
197 cache = json.load(f)
199 last_init = datetime.fromisoformat(cache['last_initialization'])
200 return datetime.now() - last_init > timedelta(days=cache_validity_days)
201 except Exception:
202 return True
204def update_cache(app: Flask):
205 """
206 Update the cache file with current initialization timestamp.
207 """
208 cache_file = app.config['CACHE_FILE']
209 cache = {
210 'last_initialization': datetime.now().isoformat(),
211 'version': '1.0'
212 }
213 with open(cache_file, 'w', encoding='utf8') as f:
214 json.dump(cache, f, ensure_ascii=False, indent=4)
216def initialize_counter_handler(app: Flask):
217 """
218 Initialize the counter handler for URI generation if needed.
219 """
220 if not need_initialization(app):
221 return
223 uri_generator: URIGenerator = app.config['URI_GENERATOR']
224 counter_handler: CounterHandler = uri_generator.counter_handler
226 # Inizializza i contatori specifici dell'URI generator
227 uri_generator.initialize_counters(sparql)
229 # Query per contare gli snapshot nella provenance
230 # Contiamo il numero di wasDerivedFrom per ogni entità e aggiungiamo 1
231 # (poiché il primo snapshot non ha wasDerivedFrom)
232 prov_query = """
233 SELECT ?entity (COUNT(DISTINCT ?snapshot) as ?count)
234 WHERE {
235 ?snapshot a <http://www.w3.org/ns/prov#Entity> ;
236 <http://www.w3.org/ns/prov#specializationOf> ?entity .
237 OPTIONAL {
238 ?snapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?prev .
239 }
240 }
241 GROUP BY ?entity
242 """
244 # Esegui query sulla provenance e imposta i contatori degli snapshot
245 provenance_sparql.setQuery(prov_query)
246 provenance_sparql.setReturnFormat(JSON)
247 prov_results = provenance_sparql.query().convert()
249 for result in prov_results["results"]["bindings"]:
250 entity = result["entity"]["value"]
251 count = int(result["count"]["value"])
252 counter_handler.set_counter(count, entity)
254 update_cache(app)
256def initialize_global_variables(app: Flask):
257 """
258 Initialize all global variables including form fields cache, display rules,
259 and dataset configuration from SHACL shapes graph and configuration files.
261 Args:
262 app: Flask application instance
263 """
264 global shacl_graph, form_fields_cache, display_rules, dataset_is_quadstore
266 try:
267 dataset_is_quadstore = app.config.get('DATASET_IS_QUADSTORE', False)
269 if app.config.get('DISPLAY_RULES_PATH'):
270 if not os.path.exists(app.config['DISPLAY_RULES_PATH']):
271 app.logger.warning(f"Display rules file not found at: {app.config['DISPLAY_RULES_PATH']}")
272 else:
273 try:
274 with open(app.config['DISPLAY_RULES_PATH'], 'r') as f:
275 display_rules = yaml.safe_load(f)['classes']
276 except Exception as e:
277 app.logger.error(f"Error loading display rules: {str(e)}")
278 raise RuntimeError(f"Failed to load display rules: {str(e)}")
280 if app.config.get('SHACL_PATH'):
281 if not os.path.exists(app.config['SHACL_PATH']):
282 app.logger.warning(f"SHACL file not found at: {app.config['SHACL_PATH']}")
283 return
285 if form_fields_cache is not None:
286 return
288 try:
289 shacl_graph = Graph()
290 shacl_graph.parse(source=app.config['SHACL_PATH'], format="turtle")
292 from heritrace.utils.shacl_utils import \
293 get_form_fields_from_shacl
294 form_fields_cache = get_form_fields_from_shacl(shacl_graph, display_rules)
296 except Exception as e:
297 app.logger.error(f"Error initializing form fields from SHACL: {str(e)}")
298 raise RuntimeError(f"Failed to initialize form fields: {str(e)}")
300 app.logger.info("Global variables initialized successfully")
302 except Exception as e:
303 app.logger.error(f"Error during global variables initialization: {str(e)}")
304 raise RuntimeError(f"Global variables initialization failed: {str(e)}")
306def init_sparql_services(app: Flask):
307 """Initialize SPARQL endpoints and related services."""
308 global initialization_done, dataset_endpoint, provenance_endpoint, sparql, provenance_sparql, change_tracking_config
310 if not initialization_done:
311 # Adjust endpoints for Docker if necessary
312 dataset_endpoint = adjust_endpoint_url(app.config['DATASET_DB_URL'])
313 provenance_endpoint = adjust_endpoint_url(app.config['PROVENANCE_DB_URL'])
315 # Initialize SPARQL wrappers
316 sparql = SPARQLWrapper(dataset_endpoint)
317 provenance_sparql = SPARQLWrapper(provenance_endpoint)
319 # Initialize change tracking configuration
320 change_tracking_config = initialize_change_tracking_config(
321 app,
322 adjusted_dataset_endpoint=dataset_endpoint,
323 adjusted_provenance_endpoint=provenance_endpoint
324 )
326 # Initialize other required components
327 initialize_counter_handler(app)
328 initialize_global_variables(app)
330 initialization_done = True
332def init_filters(app: Flask):
333 """Initialize custom template filters."""
334 global custom_filter
336 # Load context from configuration
337 with open(os.path.join("resources", "context.json"), "r") as config_file:
338 context = json.load(config_file)["@context"]
340 # Load display rules from configuration
341 display_rules = None
342 if app.config["DISPLAY_RULES_PATH"]:
343 with open(app.config["DISPLAY_RULES_PATH"], 'r') as f:
344 display_rules = yaml.safe_load(f)['classes']
346 # Create custom filter instance
347 custom_filter = Filter(context, display_rules, dataset_endpoint)
349 # Register template filters
350 app.jinja_env.filters['human_readable_predicate'] = custom_filter.human_readable_predicate
351 app.jinja_env.filters['human_readable_entity'] = custom_filter.human_readable_entity
352 app.jinja_env.filters['human_readable_primary_source'] = custom_filter.human_readable_primary_source
353 app.jinja_env.filters['format_datetime'] = custom_filter.human_readable_datetime
354 app.jinja_env.filters['split_ns'] = custom_filter.split_ns
355 app.jinja_env.filters['format_source_reference'] = custom_filter.format_source_reference
356 app.jinja_env.filters['format_agent_reference'] = custom_filter.format_agent_reference
358def init_request_handlers(app):
359 """Initialize before_request and teardown_request handlers."""
361 @app.before_request
362 def initialize_lock_manager():
363 """Initialize the resource lock manager for each request."""
364 if not hasattr(g, 'resource_lock_manager'):
365 g.resource_lock_manager = ResourceLockManager(redis_client)
367 @app.teardown_appcontext
368 def close_redis_connection(error):
369 """Close Redis connection when the request context ends."""
370 if hasattr(g, 'resource_lock_manager'):
371 del g.resource_lock_manager
373def adjust_endpoint_url(url: str) -> str:
374 """
375 Adjust endpoint URLs to work properly within Docker containers.
377 Args:
378 url: The endpoint URL to adjust
380 Returns:
381 The adjusted URL if running in Docker, original URL otherwise
382 """
383 if not running_in_docker():
384 return url
386 local_patterns = ['localhost', '127.0.0.1', '0.0.0.0']
387 parsed_url = urlparse(url)
389 if any(pattern in parsed_url.netloc for pattern in local_patterns):
390 netloc_parts = parsed_url.netloc.split(':')
391 new_netloc = f'host.docker.internal:{netloc_parts[1]}' if len(netloc_parts) > 1 else 'host.docker.internal'
392 url_parts = list(parsed_url)
393 url_parts[1] = new_netloc
394 return urlunparse(url_parts)
396 return url
398def running_in_docker() -> bool:
399 """Check if the application is running inside a Docker container."""
400 return os.path.exists('/.dockerenv')
402def get_dataset_endpoint() -> str:
403 """Get the configured dataset endpoint URL."""
404 return dataset_endpoint
406def get_sparql() -> SPARQLWrapper:
407 """Get the configured SPARQL wrapper for the dataset endpoint."""
408 return sparql
410def get_provenance_endpoint() -> str:
411 """Get the configured provenance endpoint URL."""
412 return provenance_endpoint
414def get_provenance_sparql() -> SPARQLWrapper:
415 """Get the configured SPARQL wrapper for the provenance endpoint."""
416 return provenance_sparql
418def get_counter_handler() -> CounterHandler:
419 """
420 Get the configured CounterHandler instance from the URIGenerator.
421 Assumes URIGenerator and its counter_handler are initialized in app.config.
422 """
423 uri_generator: URIGenerator = current_app.config.get('URI_GENERATOR')
424 if uri_generator and hasattr(uri_generator, 'counter_handler'):
425 return uri_generator.counter_handler
426 else:
427 # Handle cases where it might not be initialized yet or configured
428 current_app.logger.error("CounterHandler not found in URIGenerator config.")
429 raise RuntimeError("CounterHandler is not available. Initialization might have failed.")
431def get_custom_filter() -> Filter:
432 """Get the configured custom filter instance."""
433 return custom_filter
435def get_change_tracking_config() -> Dict:
436 """Get the change tracking configuration."""
437 return change_tracking_config
439def get_display_rules() -> Dict:
440 """Get the display_rules configuration."""
441 return display_rules
443def get_form_fields() -> Dict:
444 """Get the form_fields configuration."""
445 return form_fields_cache
447def get_dataset_is_quadstore() -> bool:
448 """Check if the dataset is a quadstore."""
449 return dataset_is_quadstore
451def get_shacl_graph() -> Graph:
452 """Get the SHACL shapes graph."""
453 return shacl_graph