Coverage for heritrace/utils/shacl_utils.py: 100%
82 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-24 11:39 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-24 11:39 +0000
1from typing import List, Optional, Tuple
3from flask import Flask
4from heritrace.extensions import get_shacl_graph
5from heritrace.utils.display_rules_utils import get_class_priority
6from heritrace.utils.shacl_display import (apply_display_rules,
7 extract_shacl_form_fields,
8 order_form_fields,
9 process_nested_shapes)
10from rdflib import RDF, Graph
13def get_form_fields_from_shacl(shacl: Graph, display_rules: List[dict], app: Flask):
14 """
15 Analyze SHACL shapes to extract form fields for each entity type.
17 Args:
18 shacl: The SHACL graph
19 display_rules: The display rules configuration
20 app: Flask application instance
22 Returns:
23 OrderedDict: A dictionary where the keys are tuples (class, shape) and the values are dictionaries
24 of form fields with their properties.
25 """
26 if not shacl:
27 return dict()
29 # Step 1: Get the initial form fields from SHACL shapes
30 form_fields = extract_shacl_form_fields(shacl, display_rules, app=app)
32 # Step 2: Process nested shapes for each field
33 processed_shapes = set()
34 for entity_key in form_fields:
35 for predicate in form_fields[entity_key]:
36 for field_info in form_fields[entity_key][predicate]:
37 if field_info.get("nodeShape"):
38 field_info["nestedShape"] = process_nested_shapes(
39 shacl,
40 display_rules,
41 field_info["nodeShape"],
42 app=app,
43 processed_shapes=processed_shapes,
44 )
46 # Step 3: Apply display rules to the form fields
47 if display_rules:
48 form_fields = apply_display_rules(shacl, form_fields, display_rules)
50 # Step 4: Order the form fields according to the display rules
51 ordered_form_fields = order_form_fields(form_fields, display_rules)
52 return ordered_form_fields
55def determine_shape_for_classes(class_list: List[str]) -> Optional[str]:
56 """
57 Determine the most appropriate SHACL shape for a list of class URIs.
59 Args:
60 class_list: List of class URIs to find shapes for
62 Returns:
63 The most appropriate shape URI based on priority, or None if no shapes are found
64 """
65 shacl_graph = get_shacl_graph()
66 if not shacl_graph:
67 return None
69 all_shacl_shapes = []
71 for class_uri in class_list:
72 query_string = f"""
73 SELECT DISTINCT ?shape WHERE {{
74 ?shape <http://www.w3.org/ns/shacl#targetClass> <{class_uri}> .
75 }}
76 """
78 results = shacl_graph.query(query_string)
79 shapes = [str(row.shape) for row in results]
81 for shape in shapes:
82 all_shacl_shapes.append((class_uri, shape))
84 return _find_highest_priority_shape(all_shacl_shapes)
87def determine_shape_for_entity_triples(entity_triples_iter) -> Optional[str]:
88 """
89 Determine the most appropriate SHACL shape for an entity based on its triples.
91 Uses both class priority and heuristic property matching to distinguish
92 between shapes with the same target class but different properties
93 (e.g., SpecialIssueShape vs IssueShape).
95 Args:
96 entity_triples_iter: Iterator of triples (subject, predicate, object) for the entity
98 Returns:
99 The most appropriate shape URI, or None if no shapes are found
100 """
101 shacl_graph = get_shacl_graph()
102 if not shacl_graph:
103 return None
105 entity_classes = []
106 entity_properties = set()
108 for subject, predicate, obj in entity_triples_iter:
109 if str(predicate) == str(RDF.type):
110 entity_classes.append(str(obj))
111 entity_properties.add(str(predicate))
113 if not entity_classes:
114 return None
116 candidate_shapes = []
118 for class_uri in entity_classes:
119 query_string = f"""
120 SELECT DISTINCT ?shape WHERE {{
121 ?shape <http://www.w3.org/ns/shacl#targetClass> <{class_uri}> .
122 }}
123 """
125 results = shacl_graph.query(query_string)
126 shapes = [str(row.shape) for row in results]
128 for shape in shapes:
129 candidate_shapes.append((class_uri, shape))
131 if not candidate_shapes:
132 return None
134 if len(candidate_shapes) == 1:
135 return candidate_shapes[0][1]
137 shape_scores = {}
139 for class_uri, shape_uri in candidate_shapes:
140 shape_properties = _get_shape_properties(shacl_graph, shape_uri)
141 property_matches = len(entity_properties.intersection(shape_properties))
143 entity_key = (class_uri, shape_uri)
144 priority = get_class_priority(entity_key)
146 # Combined score: (property_matches, -priority)
147 # Higher property matches is better, lower priority number is better
148 combined_score = (property_matches, -priority)
149 shape_scores[shape_uri] = combined_score
151 best_shape = max(shape_scores.keys(), key=lambda s: shape_scores[s])
152 return best_shape
155def _find_highest_priority_shape(class_shape_pairs: List[Tuple[str, str]]) -> Optional[str]:
156 """
157 Helper function to find the shape with the highest priority from a list of (class_uri, shape) pairs.
159 Args:
160 class_shape_pairs: List of tuples (class_uri, shape)
162 Returns:
163 The shape with the highest priority, or None if the list is empty
164 """
165 highest_priority = float('inf')
166 highest_priority_shape = None
168 for class_uri, shape in class_shape_pairs:
169 entity_key = (class_uri, shape)
170 priority = get_class_priority(entity_key)
171 if priority < highest_priority:
172 highest_priority = priority
173 highest_priority_shape = shape
175 return highest_priority_shape
178def _get_shape_properties(shacl_graph: Graph, shape_uri: str) -> set:
179 """
180 Extract all properties defined in a SHACL shape.
182 Args:
183 shacl_graph: The SHACL graph
184 shape_uri: URI of the shape to analyze
186 Returns:
187 Set of property URIs defined in the shape
188 """
189 properties = set()
191 query_string = f"""
192 PREFIX sh: <http://www.w3.org/ns/shacl#>
193 SELECT DISTINCT ?property WHERE {{
194 <{shape_uri}> sh:property ?propertyShape .
195 ?propertyShape sh:path ?property .
196 }}
197 """
199 results = shacl_graph.query(query_string)
200 for row in results:
201 properties.add(str(row.property))
203 return properties