Coverage for heritrace/utils/shacl_utils.py: 100%

82 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-06-24 11:39 +0000

1from typing import List, Optional, Tuple 

2 

3from flask import Flask 

4from heritrace.extensions import get_shacl_graph 

5from heritrace.utils.display_rules_utils import get_class_priority 

6from heritrace.utils.shacl_display import (apply_display_rules, 

7 extract_shacl_form_fields, 

8 order_form_fields, 

9 process_nested_shapes) 

10from rdflib import RDF, Graph 

11 

12 

13def get_form_fields_from_shacl(shacl: Graph, display_rules: List[dict], app: Flask): 

14 """ 

15 Analyze SHACL shapes to extract form fields for each entity type. 

16  

17 Args: 

18 shacl: The SHACL graph 

19 display_rules: The display rules configuration 

20 app: Flask application instance 

21 

22 Returns: 

23 OrderedDict: A dictionary where the keys are tuples (class, shape) and the values are dictionaries 

24 of form fields with their properties. 

25 """ 

26 if not shacl: 

27 return dict() 

28 

29 # Step 1: Get the initial form fields from SHACL shapes 

30 form_fields = extract_shacl_form_fields(shacl, display_rules, app=app) 

31 

32 # Step 2: Process nested shapes for each field 

33 processed_shapes = set() 

34 for entity_key in form_fields: 

35 for predicate in form_fields[entity_key]: 

36 for field_info in form_fields[entity_key][predicate]: 

37 if field_info.get("nodeShape"): 

38 field_info["nestedShape"] = process_nested_shapes( 

39 shacl, 

40 display_rules, 

41 field_info["nodeShape"], 

42 app=app, 

43 processed_shapes=processed_shapes, 

44 ) 

45 

46 # Step 3: Apply display rules to the form fields 

47 if display_rules: 

48 form_fields = apply_display_rules(shacl, form_fields, display_rules) 

49 

50 # Step 4: Order the form fields according to the display rules 

51 ordered_form_fields = order_form_fields(form_fields, display_rules) 

52 return ordered_form_fields 

53 

54 

55def determine_shape_for_classes(class_list: List[str]) -> Optional[str]: 

56 """ 

57 Determine the most appropriate SHACL shape for a list of class URIs. 

58  

59 Args: 

60 class_list: List of class URIs to find shapes for 

61  

62 Returns: 

63 The most appropriate shape URI based on priority, or None if no shapes are found 

64 """ 

65 shacl_graph = get_shacl_graph() 

66 if not shacl_graph: 

67 return None 

68 

69 all_shacl_shapes = [] 

70 

71 for class_uri in class_list: 

72 query_string = f""" 

73 SELECT DISTINCT ?shape WHERE {{ 

74 ?shape <http://www.w3.org/ns/shacl#targetClass> <{class_uri}> . 

75 }} 

76 """ 

77 

78 results = shacl_graph.query(query_string) 

79 shapes = [str(row.shape) for row in results] 

80 

81 for shape in shapes: 

82 all_shacl_shapes.append((class_uri, shape)) 

83 

84 return _find_highest_priority_shape(all_shacl_shapes) 

85 

86 

87def determine_shape_for_entity_triples(entity_triples_iter) -> Optional[str]: 

88 """ 

89 Determine the most appropriate SHACL shape for an entity based on its triples. 

90  

91 Uses both class priority and heuristic property matching to distinguish 

92 between shapes with the same target class but different properties 

93 (e.g., SpecialIssueShape vs IssueShape). 

94  

95 Args: 

96 entity_triples_iter: Iterator of triples (subject, predicate, object) for the entity 

97  

98 Returns: 

99 The most appropriate shape URI, or None if no shapes are found 

100 """ 

101 shacl_graph = get_shacl_graph() 

102 if not shacl_graph: 

103 return None 

104 

105 entity_classes = [] 

106 entity_properties = set() 

107 

108 for subject, predicate, obj in entity_triples_iter: 

109 if str(predicate) == str(RDF.type): 

110 entity_classes.append(str(obj)) 

111 entity_properties.add(str(predicate)) 

112 

113 if not entity_classes: 

114 return None 

115 

116 candidate_shapes = [] 

117 

118 for class_uri in entity_classes: 

119 query_string = f""" 

120 SELECT DISTINCT ?shape WHERE {{ 

121 ?shape <http://www.w3.org/ns/shacl#targetClass> <{class_uri}> . 

122 }} 

123 """ 

124 

125 results = shacl_graph.query(query_string) 

126 shapes = [str(row.shape) for row in results] 

127 

128 for shape in shapes: 

129 candidate_shapes.append((class_uri, shape)) 

130 

131 if not candidate_shapes: 

132 return None 

133 

134 if len(candidate_shapes) == 1: 

135 return candidate_shapes[0][1] 

136 

137 shape_scores = {} 

138 

139 for class_uri, shape_uri in candidate_shapes: 

140 shape_properties = _get_shape_properties(shacl_graph, shape_uri) 

141 property_matches = len(entity_properties.intersection(shape_properties)) 

142 

143 entity_key = (class_uri, shape_uri) 

144 priority = get_class_priority(entity_key) 

145 

146 # Combined score: (property_matches, -priority) 

147 # Higher property matches is better, lower priority number is better 

148 combined_score = (property_matches, -priority) 

149 shape_scores[shape_uri] = combined_score 

150 

151 best_shape = max(shape_scores.keys(), key=lambda s: shape_scores[s]) 

152 return best_shape 

153 

154 

155def _find_highest_priority_shape(class_shape_pairs: List[Tuple[str, str]]) -> Optional[str]: 

156 """ 

157 Helper function to find the shape with the highest priority from a list of (class_uri, shape) pairs. 

158  

159 Args: 

160 class_shape_pairs: List of tuples (class_uri, shape) 

161  

162 Returns: 

163 The shape with the highest priority, or None if the list is empty 

164 """ 

165 highest_priority = float('inf') 

166 highest_priority_shape = None 

167 

168 for class_uri, shape in class_shape_pairs: 

169 entity_key = (class_uri, shape) 

170 priority = get_class_priority(entity_key) 

171 if priority < highest_priority: 

172 highest_priority = priority 

173 highest_priority_shape = shape 

174 

175 return highest_priority_shape 

176 

177 

178def _get_shape_properties(shacl_graph: Graph, shape_uri: str) -> set: 

179 """ 

180 Extract all properties defined in a SHACL shape. 

181  

182 Args: 

183 shacl_graph: The SHACL graph 

184 shape_uri: URI of the shape to analyze 

185  

186 Returns: 

187 Set of property URIs defined in the shape 

188 """ 

189 properties = set() 

190 

191 query_string = f""" 

192 PREFIX sh: <http://www.w3.org/ns/shacl#> 

193 SELECT DISTINCT ?property WHERE {{ 

194 <{shape_uri}> sh:property ?propertyShape . 

195 ?propertyShape sh:path ?property . 

196 }} 

197 """ 

198 

199 results = shacl_graph.query(query_string) 

200 for row in results: 

201 properties.add(str(row.property)) 

202 

203 return properties