Coverage for oc_meta/lib/finder.py: 80%

1from time import sleep

2from typing import Dict, List, Tuple

4import yaml

5from dateutil import parser

6from oc_meta.plugins.editor import MetaEditor

7from oc_ocdm.graph import GraphEntity

8from oc_ocdm.graph.graph_entity import GraphEntity

9from oc_ocdm.prov.prov_entity import ProvEntity

10from oc_ocdm.support import get_count, get_resource_number

11from rdflib import RDF, XSD, Graph, Literal, URIRef

12from SPARQLWrapper import JSON, POST, SPARQLWrapper

13from time_agnostic_library.agnostic_entity import AgnosticEntity

16class ResourceFinder:

18 def __init__(self, ts_url, base_iri:str, local_g: Graph = Graph(), settings: dict = dict(), meta_config_path: str = None):

19 self.ts = SPARQLWrapper(ts_url)

20 self.ts.setMethod(POST)

21 self.base_iri = base_iri[:-1] if base_iri[-1] == '/' else base_iri

22 self.local_g = local_g

23 self.ids_in_local_g = set()

24 self.meta_config_path = meta_config_path

25 self.meta_settings = settings

26 self.blazegraph_full_text_search = settings['blazegraph_full_text_search'] if settings and 'blazegraph_full_text_search' in settings else False

27 self.virtuoso_full_text_search = settings['virtuoso_full_text_search'] if settings and 'virtuoso_full_text_search' in settings else False

29 def __query(self, query, return_format = JSON):

30 """Execute a SPARQL query with retries and exponential backoff"""

31 self.ts.setReturnFormat(return_format)

32 self.ts.setQuery(query)

33 max_retries = 5 # Aumentiamo il numero di tentativi

34 base_wait = 5 # Tempo base di attesa in secondi

36 for attempt in range(max_retries):

37 try:

38 result = self.ts.queryAndConvert()

39 return result

40 except Exception as e:

41 wait_time = base_wait * (2 ** attempt) # Exponential backoff

42 if attempt < max_retries - 1: # Se non è l'ultimo tentativo

43 sleep(wait_time)

44 else:

45 # Ultimo tentativo fallito, logghiamo l'errore e solleviamo un'eccezione custom

46 error_msg = f"Failed to execute SPARQL query after {max_retries} attempts: {str(e)}\nQuery: {query}"

47 print(error_msg) # Log dell'errore

48 raise Exception(error_msg)

50 # _______________________________BR_________________________________ #

52 def retrieve_br_from_id(self, schema: str, value: str) -> List[Tuple[str, str, list]]:

53 '''

54 Given an identifier, it retrieves bibliographic resources associated with that identifier, related titles and other identifiers MetaIDs and literal values.

56 :params schema: an identifier schema

57 :type schema: str

58 :params value: an identifier literal value

59 :type value: str

60 :returns List[Tuple[str, str, list]]: -- it returns a list of three elements tuples. The first element is the MetaID of a resource associated with the input ID. The second element is a title of that resource, if present. The third element is a list of MetaID-ID tuples related to identifiers associated with that resource.

61 '''

62 schema_uri = URIRef(GraphEntity.DATACITE + schema)

63 value = value.replace('\\', '\\\\')

64 result_list = []

65 identifier_uri = None

67 # Search for both string-typed and untyped literals

68 for literal_value in [Literal(value, datatype=XSD.string), Literal(value)]:

69 for starting_triple in self.local_g.triples((None, GraphEntity.iri_has_literal_value, literal_value)):

70 for known_id_triple in self.local_g.triples((starting_triple[0], None, None)):

71 if known_id_triple[1] == GraphEntity.iri_uses_identifier_scheme and known_id_triple[2] == schema_uri:

72 identifier_uri = known_id_triple[0]

73 if identifier_uri:

74 break

75 if identifier_uri:

76 metaid_id_list = [(identifier_uri.replace(f'{self.base_iri}/id/', ''), f'{schema}:{value}')]

77 for triple in self.local_g.triples((None, GraphEntity.iri_has_identifier, identifier_uri)):

78 title = ''

79 res = triple[0]

80 for res_triple in self.local_g.triples((res, None, None)):

81 if res_triple[1] == GraphEntity.iri_title:

82 title = str(res_triple[2])

83 elif res_triple[1] == GraphEntity.iri_has_identifier and res_triple[2] != identifier_uri:

84 for id_triple in self.local_g.triples((res_triple[2], None, None)):

85 if id_triple[1] == GraphEntity.iri_uses_identifier_scheme:

86 id_schema = id_triple[2]

87 elif id_triple[1] == GraphEntity.iri_has_literal_value:

88 id_literal_value = id_triple[2]

89 full_id = f'{id_schema.replace(GraphEntity.DATACITE, "")}:{id_literal_value}'

90 metaid_id_tuple = (res_triple[2].replace(f'{self.base_iri}/id/', ''), full_id)

91 metaid_id_list.append(metaid_id_tuple)

92 result_list.append((res.replace(f'{self.base_iri}/br/', ''), title, metaid_id_list))

94 return result_list

96 def retrieve_br_from_meta(self, metaid: str) -> Tuple[str, List[Tuple[str, str]]]:

97 '''

98 Given a MetaID, it retrieves the title of the bibliographic resource having that MetaID and other identifiers of that entity.

100 :params metaid: a MetaID

101 :type metaid: str

102 :returns Tuple[str, List[Tuple[str, str]]]: -- it returns a tuple of two elements. The first element is the resource's title associated with the input MetaID. The second element is a list of MetaID-ID tuples related to identifiers associated with that entity.

103 '''

104 metaid_uri = f'{self.base_iri}/br/{metaid}'

105 title = ''

106 identifiers = []

107 it_exists = False

108

109 for triple in self.local_g.triples((URIRef(metaid_uri), None, None)):

110 it_exists = True

111 if triple[1] == GraphEntity.iri_title:

112 title = str(triple[2])

113 elif triple[1] == GraphEntity.iri_has_identifier:

114 id_scheme = ''

115 literal_value = ''

116 identifier = triple[2]

117 for triple_inner in self.local_g.triples((identifier, None, None)):

118 if triple_inner[1] == GraphEntity.iri_uses_identifier_scheme:

119 id_scheme = str(triple_inner[2]).replace(GraphEntity.DATACITE, '')

120 elif triple_inner[1] == GraphEntity.iri_has_literal_value:

121 literal_value = str(triple_inner[2])

122 if id_scheme and literal_value: # Ensure both id_scheme and literal_value are found before appending

123 full_id = f'{id_scheme}:{literal_value}'

124 identifiers.append((str(identifier).replace(self.base_iri + '/id/', ''), full_id))

125

126 if not it_exists:

127 return "", [], False

128

129 return title, identifiers, True

130

131 # _______________________________ID_________________________________ #

132

133 def retrieve_metaid_from_id(self, schema: str, value: str) -> str:

134 '''

135 Given the schema and value of an ID, it returns the MetaID associated with that identifier.

136

137 :params schema: an identifier schema

138 :type schema: str

139 :params value: an identifier literal value

140 :type value: str

141 :returns str: -- it returns the MetaID associated with the input ID.

142 '''

143 schema_uri = URIRef(GraphEntity.DATACITE + schema)

144 value = value.replace('\\', '\\\\')

145

146 # Create both untyped and string-typed literals

147 for literal in [Literal(value, datatype=XSD.string), Literal(value)]:

148 for starting_triple in self.local_g.triples((None, GraphEntity.iri_has_literal_value, literal)):

149 for known_id_triple in self.local_g.triples((starting_triple[0], None, None)):

150 if known_id_triple[1] == GraphEntity.iri_uses_identifier_scheme and known_id_triple[2] == schema_uri:

151 return known_id_triple[0].replace(f'{self.base_iri}/id/', '')

152

153 # If no match is found, return None or an appropriate value

154 return None

155

156 def retrieve_metaid_from_merged_entity(self, metaid_uri:str, prov_config:str) -> str:

157 '''

158 It looks for MetaId in the provenance. If the input entity was deleted due to a merge, this function returns the target entity. Otherwise, it returns None.

159

160 :params metaid_uri: a MetaId URI

161 :type metaid_uri: str

162 :params prov_config: the path of the configuration file required by time-agnostic-library

163 :type prov_config: str

164 :returns str: -- It returns the MetaID associated with the target entity after a merge. If there was no merge, it returns None.

165 '''

166 metaval = None

167 with open(prov_config, 'r', encoding='utf8') as f:

168 prov_config_dict = yaml.safe_load(f)

169 agnostic_meta = AgnosticEntity(res=metaid_uri, config=prov_config_dict, related_entities_history=False)

170 agnostic_meta_history = agnostic_meta.get_history(include_prov_metadata=True)

171 meta_history_data = agnostic_meta_history[0][metaid_uri]

172 if meta_history_data:

173 meta_history_metadata = agnostic_meta_history[1][metaid_uri]

174 penultimate_snapshot = sorted(

175 meta_history_metadata.items(),

176 key=lambda x: parser.parse(x[1]['generatedAtTime']).replace(tzinfo=None),

177 reverse=True

178 )[1][0]

179 query_if_it_was_merged = f'''

180 SELECT DISTINCT ?se

181 WHERE {{

182 ?se a <{ProvEntity.iri_entity}>;

183 <{ProvEntity.iri_was_derived_from}> <{penultimate_snapshot}>.

184 }}

185 '''

186 results = self.__query(query_if_it_was_merged)['results']['bindings']

187 # The entity was merged to another

188 merged_entity = [se for se in results if metaid_uri not in se['se']['value']]

189 if merged_entity:

190 merged_entity:str = merged_entity[0]['se']['value']

191 merged_entity = merged_entity.split('/prov/')[0]

192 merged_entity = get_count(merged_entity)

193 metaval = merged_entity

194 return metaval

195

196 # _______________________________RA_________________________________ #

197 def retrieve_ra_from_meta(self, metaid: str) -> Tuple[str, List[Tuple[str, str]]]:

198 '''

199 Given a MetaID, it retrieves the name and id of the responsible agent associated with it, whether it is an author or a publisher.

200 The output has the following format:

201

202 ('NAME', [('METAID_OF_THE_IDENTIFIER', 'LITERAL_VALUE')])

203 ('American Medical Association (ama)', [('4274', 'crossref:10')])

204

205 :params metaid: a responsible agent's MetaID

206 :type metaid: str

207 :returns str: -- it returns a tuple, where the first element is the responsible agent's name, and the second element is a list containing its identifier's MetaID and literal value

208 '''

209 metaid_uri = f'{self.base_iri}/ra/{metaid}'

210 family_name = ''

211 given_name = ''

212 name = ''

213 identifiers = []

214 it_exists = False

215

216 for triple in self.local_g.triples((URIRef(metaid_uri), None, None)):

217 it_exists = True

218 if triple[1] == GraphEntity.iri_family_name:

219 family_name = str(triple[2])

220 elif triple[1] == GraphEntity.iri_given_name:

221 given_name = str(triple[2])

222 elif triple[1] == GraphEntity.iri_name:

223 name = str(triple[2])

224 elif triple[1] == GraphEntity.iri_has_identifier:

225 identifier = triple[2]

226 id_scheme = ''

227 literal_value = ''

228 for triple_inner in self.local_g.triples((identifier, None, None)):

229 if triple_inner[1] == GraphEntity.iri_uses_identifier_scheme:

230 id_scheme = str(triple_inner[2]).replace(GraphEntity.DATACITE, '')

231 elif triple_inner[1] == GraphEntity.iri_has_literal_value:

232 literal_value = str(triple_inner[2])

233 if id_scheme and literal_value:

234 full_id = f'{id_scheme}:{literal_value}'

235 identifiers.append((str(identifier).replace(self.base_iri + '/id/', ''), full_id))

236

237 full_name = self._construct_full_name(name, family_name, given_name)

238

239 return full_name, identifiers, it_exists

240

241 def retrieve_ra_from_id(self, schema: str, value: str, publisher: bool) -> List[Tuple[str, str, list]]:

242 '''

243 Given an identifier, it retrieves responsible agents associated with that identifier, related names and other identifiers MetaIDs and literal values.

244 The output has the following format: ::

245

246 [(METAID, NAME, [(METAID_OF_THE_IDENTIFIER, LITERAL_VALUE)])]

247 [('3309', 'American Medical Association (ama)', [('4274', 'crossref:10')])]

248

249 :params schema: an identifier schema

250 :type schema: str

251 :params value: an identifier literal value

252 :type value: str

253 :params publisher: True if the identifier is associated with a publisher, False otherwise.

254 :type publisher: bool

255 :returns List[Tuple[str, str, list]]: -- it returns a list of three elements tuples. The first element is the MetaID of a responsible agent associated with the input ID. The second element is the name of that responsible agent, if present. The third element is a list of MetaID-ID tuples related to identifiers associated with that responsible agent.

256 '''

257 schema_uri = URIRef(GraphEntity.DATACITE + schema)

258 value = value.replace('\\', '\\\\')

259 result_list = []

260 identifier_uri = None

261

262 # Search for both string-typed and untyped literals

263 for literal_value in [Literal(value, datatype=XSD.string), Literal(value)]:

264 for starting_triple in self.local_g.triples((None, GraphEntity.iri_has_literal_value, literal_value)):

265 for known_id_triple in self.local_g.triples((starting_triple[0], None, None)):

266 if known_id_triple[1] == GraphEntity.iri_uses_identifier_scheme and known_id_triple[2] == schema_uri:

267 identifier_uri = known_id_triple[0]

268 break

269 if identifier_uri:

270 break

271 if identifier_uri:

272 metaid_id_list = [(identifier_uri.replace(f'{self.base_iri}/id/', ''), f'{schema}:{value}')]

273 for triple in self.local_g.triples((None, GraphEntity.iri_has_identifier, identifier_uri)):

274 name = ''

275 family_name = ''

276 given_name = ''

277 res = triple[0]

278 for res_triple in self.local_g.triples((res, None, None)):

279 if res_triple[1] == GraphEntity.iri_name:

280 name = str(res_triple[2])

281 elif res_triple[1] == GraphEntity.iri_family_name:

282 family_name = str(res_triple[2])

283 elif res_triple[1] == GraphEntity.iri_given_name:

284 given_name = str(res_triple[2])

285 elif res_triple[1] == GraphEntity.iri_has_identifier and res_triple[2] != identifier_uri:

286 for id_triple in self.local_g.triples((res_triple[2], None, None)):

287 if id_triple[1] == GraphEntity.iri_uses_identifier_scheme:

288 id_schema = id_triple[2]

289 elif id_triple[1] == GraphEntity.iri_has_literal_value:

290 id_literal_value = id_triple[2]

291 full_id = f'{id_schema.replace(GraphEntity.DATACITE, "")}:{id_literal_value}'

292 metaid_id_tuple = (res_triple[2].replace(f'{self.base_iri}/id/', ''), full_id)

293 metaid_id_list.append(metaid_id_tuple)

294

295 full_name = self._construct_full_name(name, family_name, given_name)

296 result_list.append((res.replace(f'{self.base_iri}/ra/', ''), full_name, metaid_id_list))

297

298 return result_list

299

300 def _construct_full_name(self, name: str, family_name: str, given_name: str) -> str:

301 if name and not family_name and not given_name:

302 return name

303 elif not name and family_name and not given_name:

304 return f'{family_name},'

305 elif not name and not family_name and given_name:

306 return f', {given_name}'

307 elif not name and family_name and given_name:

308 return f'{family_name}, {given_name}'

309 else:

310 return ''

311

312 def retrieve_ra_sequence_from_br_meta(self, metaid: str, col_name: str) -> List[Dict[str, tuple]]:

313 '''

314 Given a bibliographic resource's MetaID and a field name, it returns its agent roles and responsible agents in the correct order according to the specified field.

315 The output has the following format: ::

316 [

317 {METAID_AR_1: (NAME_RA_1, [(METAID_ID_RA_1, LITERAL_VALUE_ID_RA_1)], METAID_RA_1)},

318 {METAID_AR_2: (NAME_RA_2, [(METAID_ID_RA_2, LITERAL_VALUE_ID_RA_2)], METAID_RA_2)},

319 {METAID_AR_N: (NAME_RA_N, [(METAID_ID_RA_N, LITERAL_VALUE_ID_RA_N)], METAID_RA_N)},

320 ]

321 [

322 {'5343': ('Hodge, James G.', [], '3316')},

323 {'5344': ('Anderson, Evan D.', [], '3317')},

324 {'5345': ('Kirsch, Thomas D.', [], '3318')},

325 {'5346': ('Kelen, Gabor D.', [('4278', 'orcid:0000-0002-3236-8286')], '3319')}

326 ]

327 :params metaid: a MetaID

328 :type meta_id: str

329 :params col_name: a MetaID

330 :type col_name: str

331 :returns: List[Dict[str, tuple]] -- the output is a list of three-elements tuples. Each tuple's first and third elements are the MetaIDs of an agent role and responsible agent related to the specified bibliographic resource. The second element is a two-elements tuple, where the first element is the MetaID of the identifier of the responsible agent. In contrast, the second one is the literal value of that id.

332 '''

333 if col_name == 'author':

334 role = GraphEntity.iri_author

335 elif col_name == 'editor':

336 role = GraphEntity.iri_editor

337 else:

338 role = GraphEntity.iri_publisher

339

340 metaid_uri = URIRef(f'{self.base_iri}/br/{str(metaid)}')

341 dict_ar = dict()

342 changes_made = False

343

344 for triple in self.local_g.triples((metaid_uri, GraphEntity.iri_is_document_context_for, None)):

345 for ar_triple in self.local_g.triples((triple[2], None, None)):

346 if ar_triple[2] == role:

347 role_value = str(triple[2]).replace(f'{self.base_iri}/ar/', '')

348 next_role = ''

349 for relevant_ar_triple in self.local_g.triples((triple[2], None, None)):

350 if relevant_ar_triple[1] == GraphEntity.iri_has_next:

351 next_role = str(relevant_ar_triple[2]).replace(f'{self.base_iri}/ar/', '')

352 elif relevant_ar_triple[1] == GraphEntity.iri_is_held_by:

353 ra = str(relevant_ar_triple[2]).replace(f'{self.base_iri}/ra/', '')

354 dict_ar[role_value] = {'next': next_role, 'ra': ra}

355

356 initial_dict_ar = dict_ar.copy()

357

358 # Detect and handle duplicated RA

359 ra_to_ars = {}

360 for ar, details in dict_ar.items():

361 ra = details['ra']

362 if ra not in ra_to_ars:

363 ra_to_ars[ra] = []

364 ra_to_ars[ra].append(ar)

365

366 # Identify and delete duplicate ARs

367 ar_to_delete_list = []

368 for ra, ars in ra_to_ars.items():

369 if len(ars) > 1:

370 # Keep the first AR and delete the rest

371 for ar_to_delete in ars[1:]:

372 meta_editor = MetaEditor(meta_config=self.meta_config_path, resp_agent='https://w3id.org/oc/meta/prov/pa/1', save_queries=True)

373 meta_editor.delete(res=f"{self.base_iri}/ar/{ar_to_delete}")

374 ar_to_delete_list.append(ar_to_delete)

375 changes_made = True

376

377 for ar in ar_to_delete_list:

378 del dict_ar[ar]

379

380 # Check for ARs that have themselves as 'next' and remove the 'next' relationship

381 for ar, details in dict_ar.items():

382 if details['next'] == ar:

383 meta_editor = MetaEditor(meta_config=self.meta_config_path, resp_agent='https://w3id.org/oc/meta/prov/pa/1', save_queries=True)

384 meta_editor.delete(res=f"{self.base_iri}/ar/{ar}", property=str(GraphEntity.iri_has_next))

385 dict_ar[ar]['next'] = ''

386 changes_made = True

387

388 # Remove invalid 'next' references

389 for role, details in list(dict_ar.items()):

390 if details['next'] and details['next'] not in dict_ar:

391 dict_ar[role]['next'] = ''

392 changes_made = True

393

394 # Find the start_role by excluding all roles that are "next" for others from the set of all roles.

395 all_roles = set(dict_ar.keys())

396 roles_with_next = set(details['next'] for details in dict_ar.values() if details['next'])

397 start_role_candidates = all_roles - roles_with_next

398 # Handle the edge cases for start role determination

399

400 MAX_ITERATIONS = 1000 # Numero massimo di iterazioni permesse

401 SAFETY_TIMER = 3600 # Timer di sicurezza di 1 ora (in secondi)

402

403 if len(all_roles) == 0:

404 return []

405 elif len(start_role_candidates) != 1:

406 # If more than one start candidate exists or none exist in a multi-role situation, resolve automatically

407 chains = []

408 for start_candidate in start_role_candidates:

409 current_role = start_candidate

410 chain = []

411 visited_roles = set()

412 iteration_count = 0

413 while current_role and current_role not in visited_roles and iteration_count < MAX_ITERATIONS:

414 visited_roles.add(current_role)

415 ra_info = self.retrieve_ra_from_meta(dict_ar[current_role]['ra'])[0:2]

416 ra_tuple = ra_info + (dict_ar[current_role]['ra'],)

417 chain.append({current_role: ra_tuple})

418 current_role = dict_ar[current_role]['next']

419 iteration_count += 1

420

421 if iteration_count == MAX_ITERATIONS:

422 print(f"Possible infinite loop detected for BR: {metaid}")

423 print("Starting safety timer. Please stop the process if needed.")

424 sleep(SAFETY_TIMER)

425 return [] # Ritorna una lista vuota dopo il timer

426

427 chains.append(chain)

428 # Sort chains by length, then by the lowest sequential number of the starting role

429 chains.sort(key=lambda chain: (-len(chain), get_resource_number(f'{self.base_iri}/ar/{list(chain[0].keys())[0]}')))

430 try:

431 ordered_ar_list = chains[0]

432 except Exception as e:

433 print(f"\nProcessing BR: {metaid} for column: {col_name}")

434 print(f"Initial dict_ar: {dict_ar}")

435 print(f"All roles: {all_roles}")

436 print(f"Start role candidates: {start_role_candidates}")

437 print(f"Roles with next: {roles_with_next}")

438 print(f"Error occurred while sorting or selecting chains: {str(e)}")

439 print(f"Chains at time of error: {chains}")

440 raise

441 for chain in chains[1:]:

442 for ar_dict in chain:

443 for ar in ar_dict.keys():

444 meta_editor = MetaEditor(meta_config=self.meta_config_path, resp_agent='https://w3id.org/oc/meta/prov/pa/1', save_queries=True)

445 meta_editor.delete(res=f"{self.base_iri}/ar/{ar}")

446 changes_made = True

447 else:

448 start_role = start_role_candidates.pop()

449 # Follow the "next" chain from the start_role to construct an ordered list.

450 ordered_ar_list = []

451 current_role = start_role

452 while current_role:

453 ra_info = self.retrieve_ra_from_meta(dict_ar[current_role]['ra'])[0:2]

454 ra_tuple = ra_info + (dict_ar[current_role]['ra'],)

455 ordered_ar_list.append({current_role: ra_tuple})

456 current_role = dict_ar[current_role]['next']

457

458 final_chain = [list(ar_dict.keys())[0] for ar_dict in ordered_ar_list]

459

460 # Fill gaps in the AR chain

461 for i in range(len(final_chain) - 1):

462 current_ar = final_chain[i]

463 next_ar = final_chain[i + 1]

464 if dict_ar[current_ar]['next'] != next_ar:

465 meta_editor = MetaEditor(meta_config=self.meta_config_path, resp_agent='https://w3id.org/oc/meta/prov/pa/1', save_queries=True)

466 meta_editor.update_property(

467 res=f"{self.base_iri}/ar/{current_ar}",

468 property=str(GraphEntity.iri_has_next),

469 new_value=URIRef(f"{self.base_iri}/ar/{next_ar}")

470 )

471 dict_ar[current_ar]['next'] = next_ar

472 changes_made = True

473

474 # Ensure the last AR doesn't have a 'next' relationship

475 last_ar = final_chain[-1]

476 if dict_ar[last_ar]['next']:

477 meta_editor = MetaEditor(meta_config=self.meta_config_path, resp_agent='https://w3id.org/oc/meta/prov/pa/1', save_queries=True)

478 meta_editor.delete(res=f"{self.base_iri}/ar/{last_ar}", property=GraphEntity.iri_has_next)

479 dict_ar[last_ar]['next'] = ''

480 changes_made = True

481

482 if changes_made:

483 print(f"\nChanges made to AR chain for BR: {metaid}")

484 # print(f"Initial AR chain: {initial_dict_ar}")

485 # print(f"Final AR chain: {dict_ar}")

486 # print(f"Final ordered AR list: {ordered_ar_list}\n")

487

488 return ordered_ar_list

489

490 def retrieve_re_from_br_meta(self, metaid:str) -> Tuple[str, str]:

491 '''

492 Given a bibliographic resource's MetaID, it returns its resource embodiment's MetaID and pages.

493 The output has the following format: ::

494

495 (METAID, PAGES)

496 ('2011', '391-397')

497

498 :params metaid: a bibliographic resource's MetaID

499 :type meta_id: str

500 :returns: Tuple[str, str] -- the output is a two-elements tuple, where the first element is the MetaID of the resource embodiment, and the second is a pages' interval.

501 '''

502 metaid_uri = URIRef(f'{self.base_iri}/br/{str(metaid)}')

503 re_uri = None

504 starting_page = None

505 ending_page = None

506 for triple in self.local_g.triples((metaid_uri, GraphEntity.iri_embodiment, None)):

507 re_uri = triple[2].replace(f'{self.base_iri}/re/', '')

508 for re_triple in self.local_g.triples((triple[2], None, None)):

509 if re_triple[1] == GraphEntity.iri_starting_page:

510 starting_page = str(re_triple[2])

511 elif re_triple[1] == GraphEntity.iri_ending_page:

512 ending_page = str(re_triple[2])

513 if re_uri:

514 if starting_page and ending_page:

515 pages = f'{starting_page}-{ending_page}'

516 elif starting_page and not ending_page:

517 pages = f'{starting_page}-{starting_page}'

518 elif not starting_page and ending_page:

519 pages = f'{ending_page}-{ending_page}'

520 elif not starting_page and not ending_page:

521 pages = ''

522 return re_uri, pages

523

524 def retrieve_br_info_from_meta(self, metaid: str) -> dict:

525 '''

526 Given a bibliographic resource's MetaID, it returns all the information about that resource.

527 The output has the following format: ::

528

529 {

530 'pub_date': PUB_DATE,

531 'type': TYPE,

532 'page': (METAID, PAGES),

533 'issue': ISSUE,

534 'volume': VOLUME,

535 'venue': VENUE

536 }

537 {

538 'pub_date': '2006-02-27',

539 'type': 'journal article',

540 'page': ('2011', '391-397'),

541 'issue': '4',

542 'volume': '166',

543 'venue': 'Archives Of Internal Medicine [omid:br/4387]'

544 }

545

546 :param metaid: a bibliographic resource's MetaID

547 :type metaid: str

548 :returns: dict -- the output is a dictionary including the publication date, type, page, issue, volume, and venue of the specified bibliographic resource.

549 '''

550

551 venue_iris = [

552 GraphEntity.iri_archival_document,

553 GraphEntity.iri_journal,

554 GraphEntity.iri_book,

555 GraphEntity.iri_book_series,

556 GraphEntity.iri_series,

557 GraphEntity.iri_academic_proceedings,

558 GraphEntity.iri_proceedings_series,

559 GraphEntity.iri_reference_book,

560 GraphEntity.iri_series,

561

562 GraphEntity.iri_expression

563 ]

564

565 def extract_identifiers(entity_uri):

566 identifiers = [f"omid:{entity_uri.replace(f'{self.base_iri}/', '')}"]

567 for id_triple in self.local_g.triples((entity_uri, GraphEntity.iri_has_identifier, None)):

568 id_obj = id_triple[2]

569 scheme = value = None

570 for detail_triple in self.local_g.triples((id_obj, None, None)):

571 if detail_triple[1] == GraphEntity.iri_uses_identifier_scheme:

572 scheme = str(detail_triple[2])

573 elif detail_triple[1] == GraphEntity.iri_has_literal_value:

574 value = str(detail_triple[2])

575 if scheme and value:

576 scheme = scheme.replace(GraphEntity.DATACITE, '')

577 identifiers.append(f"{scheme}:{value}")

578 return identifiers

579

580 metaid = str(metaid)

581 metaid_uri = URIRef(f'{self.base_iri}/br/{metaid}') if self.base_iri not in metaid else URIRef(metaid)

582 res_dict = {

583 'pub_date': '',

584 'type': '',

585 'page': self.retrieve_re_from_br_meta(metaid),

586 'issue': '',

587 'volume': '',

588 'venue': ''

589 }

590

591 for triple in self.local_g.triples((metaid_uri, None, None)):

592 predicate, obj = triple[1], triple[2]

593

594 if predicate == GraphEntity.iri_has_publication_date:

595 res_dict['pub_date'] = str(obj)

596 elif predicate == RDF.type and obj != GraphEntity.iri_expression:

597 res_dict['type'] = self._type_it(obj)

598 elif predicate == GraphEntity.iri_has_sequence_identifier:

599 for inner_triple in self.local_g.triples((metaid_uri, None, None)):

600 inner_obj = inner_triple[2]

601 if inner_obj == GraphEntity.iri_journal_issue:

602 res_dict['issue'] = str(triple[2])

603 elif inner_obj == GraphEntity.iri_journal_volume:

604 res_dict['volume'] = str(triple[2])

605 elif predicate == GraphEntity.iri_part_of:

606 for vvi_triple in self.local_g.triples((obj, None, None)):

607 vvi_obj = vvi_triple[2]

608 if vvi_obj == GraphEntity.iri_journal_issue:

609 for inner_vvi_triple in self.local_g.triples((obj, None, None)):

610 if inner_vvi_triple[1] == GraphEntity.iri_has_sequence_identifier:

611 res_dict['issue'] = str(inner_vvi_triple[2])

612 elif vvi_obj == GraphEntity.iri_journal_volume:

613 for inner_vvi_triple in self.local_g.triples((obj, None, None)):

614 if inner_vvi_triple[1] == GraphEntity.iri_has_sequence_identifier:

615 res_dict['volume'] = str(inner_vvi_triple[2])

616 elif vvi_obj in venue_iris:

617 for inner_vvi_triple in self.local_g.triples((obj, None, None)):

618 if inner_vvi_triple[1] == GraphEntity.iri_title:

619 venue_title = str(inner_vvi_triple[2])

620 venue_ids = extract_identifiers(obj)

621 res_dict['venue'] = f"{venue_title} [{' '.join(venue_ids)}]"

622

623 if vvi_triple[1] == GraphEntity.iri_part_of:

624 for vi_triple in self.local_g.triples((vvi_obj, None, None)):

625 vi_obj = vi_triple[2]

626 if vi_obj == GraphEntity.iri_journal_volume:

627 for inner_vvi_triple in self.local_g.triples((vvi_obj, None, None)):

628 if inner_vvi_triple[1] == GraphEntity.iri_has_sequence_identifier:

629 res_dict['volume'] = str(inner_vvi_triple[2])

630 elif vi_obj in venue_iris:

631 for inner_vvi_triple in self.local_g.triples((vvi_obj, None, None)):

632 if inner_vvi_triple[1] == GraphEntity.iri_title:

633 venue_title = str(inner_vvi_triple[2])

634 venue_ids = extract_identifiers(vvi_obj)

635 res_dict['venue'] = f"{venue_title} [{' '.join(venue_ids)}]"

636

637 if vi_triple[1] == GraphEntity.iri_part_of:

638 for venue_triple in self.local_g.triples((vi_obj, None, None)):

639 if venue_triple[1] == GraphEntity.iri_title:

640 venue_title = str(venue_triple[2])

641 venue_ids = extract_identifiers(vi_obj)

642 res_dict['venue'] = f"{venue_title} [{' '.join(venue_ids)}]"

643 return res_dict

644

645 @staticmethod

646 def _type_it(br_type: URIRef) -> str:

647 output_type = ''

648 if br_type == GraphEntity.iri_archival_document:

649 output_type = 'archival document'

650 if br_type == GraphEntity.iri_book:

651 output_type = 'book'

652 if br_type == GraphEntity.iri_book_chapter:

653 output_type = 'book chapter'

654 if br_type == GraphEntity.iri_part:

655 output_type = 'book part'

656 if br_type == GraphEntity.iri_expression_collection:

657 output_type = 'book section'

658 if br_type == GraphEntity.iri_book_series:

659 output_type = 'book series'

660 if br_type == GraphEntity.iri_book_set:

661 output_type = 'book set'

662 if br_type == GraphEntity.iri_data_file:

663 output_type = 'data file'

664 if br_type == GraphEntity.iri_thesis:

665 output_type = 'dissertation'

666 if br_type == GraphEntity.iri_journal:

667 output_type = 'journal'

668 if br_type == GraphEntity.iri_journal_article:

669 output_type = 'journal article'

670 if br_type == GraphEntity.iri_journal_issue:

671 output_type = 'journal issue'

672 if br_type == GraphEntity.iri_journal_volume:

673 output_type = 'journal volume'

674 if br_type == GraphEntity.iri_proceedings_paper:

675 output_type = 'proceedings article'

676 if br_type == GraphEntity.iri_academic_proceedings:

677 output_type = 'proceedings'

678 if br_type == GraphEntity.iri_reference_book:

679 output_type = 'reference book'

680 if br_type == GraphEntity.iri_reference_entry:

681 output_type = 'reference entry'

682 if br_type == GraphEntity.iri_series:

683 output_type = 'series'

684 if br_type == GraphEntity.iri_report_document:

685 output_type = 'report'

686 if br_type == GraphEntity.iri_specification_document:

687 output_type = 'standard'

688 return output_type

689

690 def retrieve_publisher_from_br_metaid(self, metaid:str):

691 metaid_uri = URIRef(f'{self.base_iri}/br/{metaid}')

692 publishers = set()

693 for triple in self.local_g.triples((metaid_uri, None, None)):

694 if triple[1] == GraphEntity.iri_is_document_context_for:

695 for document_triple in self.local_g.triples((triple[2], None, None)):

696 if document_triple[2] == GraphEntity.iri_publisher:

697 publishers.add(triple[2])

698 elif triple[1] == GraphEntity.iri_part_of:

699 for inner_triple in self.local_g.triples((triple[2], None, None)):

700 if inner_triple[1] == GraphEntity.iri_is_document_context_for:

701 for document_triple in self.local_g.triples((inner_triple[2], None, None)):

702 if document_triple[2] == GraphEntity.iri_publisher:

703 publishers.add(inner_triple[2])

704 elif inner_triple[1] == GraphEntity.iri_part_of:

705 for inner_inner_triple in self.local_g.triples((inner_triple[2], None, None)):

706 if inner_inner_triple[1] == GraphEntity.iri_is_document_context_for:

707 for document_triple in self.local_g.triples((inner_inner_triple[2], None, None)):

708 if document_triple[2] == GraphEntity.iri_publisher:

709 publishers.add(inner_inner_triple[2])

710 publishers_output = []

711 for publisher_uri in publishers:

712 pub_identifiers = []

713 pub_name = None

714 for triple in self.local_g.triples((publisher_uri, None, None)):

715 if triple[1] == GraphEntity.iri_is_held_by:

716 pub_metaid = triple[2].replace(f'{self.base_iri}/', 'omid:')

717 pub_identifiers.append(pub_metaid)

718 for ra_triple in self.local_g.triples((triple[2], None, None)):

719 pub_schema = None

720 pub_literal = None

721 if ra_triple[1] == GraphEntity.iri_name:

722 pub_name = ra_triple[2]

723 elif ra_triple[1] == GraphEntity.iri_has_identifier:

724 for id_triple in self.local_g.triples((ra_triple[2], None, None)):

725 if id_triple[1] == GraphEntity.iri_uses_identifier_scheme:

726 pub_schema = id_triple[2].replace(f'{str(GraphEntity.DATACITE)}', '')

727 elif id_triple[1] == GraphEntity.iri_has_literal_value:

728 pub_literal = id_triple[2]

729 if pub_schema is not None and pub_literal is not None:

730 pub_id = f'{pub_schema}:{pub_literal}'

731 pub_identifiers.append(pub_id)

732 if pub_name is not None:

733 pub_full = f'{pub_name} [{" ".join(pub_identifiers)}]'

734 else:

735 pub_full = f'[{" ".join(pub_identifiers)}]'

736 publishers_output.append(pub_full)

737 return '; '.join(publishers_output)

738

739 def get_everything_about_res(self, metavals: set, identifiers: set, vvis: set, max_depth: int = 10) -> None:

740 BATCH_SIZE = 10

741 def batch_process(input_set, batch_size):

742 """Generator to split input data into smaller batches if batch_size is not None."""

743 if batch_size is None:

744 yield input_set

745 else:

746 for i in range(0, len(input_set), batch_size):

747 yield input_set[i:i + batch_size]

748

749 def process_batch(subjects, cur_depth):

750 """Process each batch of subjects up to the specified depth."""

751 if not subjects or (max_depth and cur_depth > max_depth):

752 return

753

754 next_subjects = set()

755 for batch in batch_process(list(subjects), BATCH_SIZE):

756 query_prefix = f'''

757 SELECT ?s ?p ?o

758 WHERE {{

759 VALUES ?s {{ {' '.join([f"<{s}>" for s in batch])} }}

760 ?s ?p ?o.

761 }}'''

762 result = self.__query(query_prefix)

763 if result:

764 for row in result['results']['bindings']:

765 s = URIRef(row['s']['value'])

766 p = URIRef(row['p']['value'])

767 o = row['o']['value']

768 o_type = row['o']['type']

769 o_datatype = URIRef(row['o']['datatype']) if 'datatype' in row['o'] else None

770 o = URIRef(o) if o_type == 'uri' else Literal(lexical_or_value=o, datatype=o_datatype)

771 self.local_g.add((s, p, o))

772 if isinstance(o, URIRef) and p not in {RDF.type, GraphEntity.iri_with_role, GraphEntity.iri_uses_identifier_scheme}:

773 next_subjects.add(str(o))

774

775 # Dopo aver processato tutti i batch di questo livello, procedi con il prossimo livello di profondità

776 process_batch(next_subjects, cur_depth + 1)

777

778 def get_initial_subjects_from_metavals(metavals):

779 """Convert metavals to a set of subjects."""

780 return {f"{self.base_iri}/{mid.replace('omid:', '')}" for mid in metavals}

781

782 def get_initial_subjects_from_identifiers(identifiers):

783 """Convert identifiers to a set of subjects based on batch queries."""

784 subjects = set()

785 for batch in batch_process(list(identifiers), BATCH_SIZE):

786 if not batch:

787 continue

788

789 if self.blazegraph_full_text_search:

790 # Processing for text search enabled databases

791 for identifier in batch:

792 scheme, literal = identifier.split(":", 1)

793 escaped_identifier = literal.replace('\\', '\\\\').replace('"', '\\"')

794 query = f'''

795 PREFIX bds: <http://www.bigdata.com/rdf/search#>

796 SELECT ?s WHERE {{

797 ?literal bds:search "{escaped_identifier}" ;

798 bds:matchAllTerms "true" ;

799 ^<{GraphEntity.iri_has_literal_value}> ?id.

800 ?id <{GraphEntity.iri_uses_identifier_scheme}> <{GraphEntity.DATACITE + scheme}>;

801 ^<{GraphEntity.iri_has_identifier}> ?s .

802 }}

803 '''

804 result = self.__query(query)

805 for row in result['results']['bindings']:

806 subjects.add(str(row['s']['value']))

807 elif self.virtuoso_full_text_search:

808 union_blocks = []

809 for identifier in batch:

810 scheme, literal = identifier.split(':', maxsplit=1)[0], identifier.split(':', maxsplit=1)[1]

811 escaped_literal = literal.replace('\\', '\\\\').replace('"', '\\"')

812 union_blocks.append(f"""

813 {{

814 {{

815 ?id <{GraphEntity.iri_has_literal_value}> "{escaped_literal}" .

816 }}

817 UNION

818 {{

819 ?id <{GraphEntity.iri_has_literal_value}> "{escaped_literal}"^^<{XSD.string}> .

820 }}

821 ?id <{GraphEntity.iri_uses_identifier_scheme}> <{GraphEntity.DATACITE + scheme}> .

822 ?s <{GraphEntity.iri_has_identifier}> ?id .

823 }}

824 """)

825 union_query = " UNION ".join(union_blocks)

826 query = f'''

827 SELECT ?s WHERE {{

828 {union_query}

829 }}

830 '''

831 result = self.__query(query)

832 for row in result['results']['bindings']:

833 subjects.add(str(row['s']['value']))

834 else:

835 identifiers_values = []

836 for identifier in batch:

837 scheme, literal = identifier.split(':', maxsplit=1)[0], identifier.split(':', maxsplit=1)[1]

838 escaped_literal = literal.replace('\\', '\\\\').replace('"', '\\"')

839 identifiers_values.append(f"(<{GraphEntity.DATACITE + scheme}> \"{escaped_literal}\")")

840 identifiers_values_str = " ".join(identifiers_values)

841 query = f'''

842 SELECT DISTINCT ?s WHERE {{

843 VALUES (?scheme ?literal) {{ {identifiers_values_str} }}

844 ?id <{GraphEntity.iri_uses_identifier_scheme}> ?scheme .

845 ?id <{GraphEntity.iri_has_literal_value}> ?literalValue .

846 FILTER(str(?literalValue) = str(?literal))

847 ?s <{GraphEntity.iri_has_identifier}> ?id .

848 }}

849 '''

850 result = self.__query(query)

851 for row in result['results']['bindings']:

852 subjects.add(str(row['s']['value']))

853 return subjects

854

855 def get_initial_subjects_from_vvis(vvis):

856 """Convert vvis to a set of subjects based on batch queries, handling venue ID to metaid conversion."""

857 subjects = set()

858

859 for volume, issue, venue_metaid, venue_ids_tuple in vvis:

860 venues_to_search = set()

861

862 if venue_metaid:

863 venues_to_search.add(venue_metaid)

864

865 if venue_ids_tuple:

866 venue_id_subjects = get_initial_subjects_from_identifiers(venue_ids_tuple)

867 subjects.update(venue_id_subjects)

868

869 # Convert venue URIs to metaid format for VVI search

870 for venue_uri in venue_id_subjects:

871 if '/br/' in venue_uri:

872 metaid = venue_uri.replace(f'{self.base_iri}/br/', '')

873 venues_to_search.add(f"omid:br/{metaid}")

874

875 # Search for VVI structures for each venue

876 for venue_metaid_to_search in venues_to_search:

877 venue_uri = f"{self.base_iri}/{venue_metaid_to_search.replace('omid:', '')}"

878 sequence_value = issue if issue else volume

879 escaped_sequence = sequence_value.replace('\\', '\\\\').replace('"', '\\"')

880

881 if issue:

882 # Search for journal issue

883 if volume:

884 # Search for issue within specific volume

885 escaped_volume = volume.replace('\\', '\\\\').replace('"', '\\"')

886 query = f'''

887 SELECT ?s WHERE {{

888 {{

889 ?volume a <{GraphEntity.iri_journal_volume}> ;

890 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

891 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_volume}" .

892 ?s a <{GraphEntity.iri_journal_issue}> ;

893 <{GraphEntity.iri_part_of}> ?volume ;

894 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}" .

895 }}

896 UNION

897 {{

898 ?volume a <{GraphEntity.iri_journal_volume}> ;

899 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

900 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_volume}"^^<{XSD.string}> .

901 ?s a <{GraphEntity.iri_journal_issue}> ;

902 <{GraphEntity.iri_part_of}> ?volume ;

903 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}" .

904 }}

905 UNION

906 {{

907 ?volume a <{GraphEntity.iri_journal_volume}> ;

908 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

909 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_volume}" .

910 ?s a <{GraphEntity.iri_journal_issue}> ;

911 <{GraphEntity.iri_part_of}> ?volume ;

912 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}"^^<{XSD.string}> .

913 }}

914 UNION

915 {{

916 ?volume a <{GraphEntity.iri_journal_volume}> ;

917 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

918 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_volume}"^^<{XSD.string}> .

919 ?s a <{GraphEntity.iri_journal_issue}> ;

920 <{GraphEntity.iri_part_of}> ?volume ;

921 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}"^^<{XSD.string}> .

922 }}

923 }}

924 '''

925 else:

926 # Search for issue directly under venue (no volume specified)

927 query = f'''

928 SELECT ?s WHERE {{

929 {{

930 ?s a <{GraphEntity.iri_journal_issue}> ;

931 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

932 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}" .

933 }}

934 UNION

935 {{

936 ?s a <{GraphEntity.iri_journal_issue}> ;

937 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

938 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}"^^<{XSD.string}> .

939 }}

940 }}

941 '''

942 else:

943 # Search for journal volume (only if volume is specified)

944 if volume:

945 query = f'''

946 SELECT ?s WHERE {{

947 {{

948 ?s a <{GraphEntity.iri_journal_volume}> ;

949 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

950 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}" .

951 }}

952 UNION

953 {{

954 ?s a <{GraphEntity.iri_journal_volume}> ;

955 <{GraphEntity.iri_part_of}> <{venue_uri}> ;

956 <{GraphEntity.iri_has_sequence_identifier}> "{escaped_sequence}"^^<{XSD.string}> .

957 }}

958 }}

959 '''

960 else:

961 # No volume specified, skip this VVI tuple

962 continue

963

964 result = self.__query(query)

965 for row in result['results']['bindings']:

966 subjects.add(str(row['s']['value']))

967

968 # Also add the venue itself as a subject

969 subjects.add(venue_uri)

970

971 return subjects

972

973 initial_subjects = set()

974

975 if metavals:

976 initial_subjects.update(get_initial_subjects_from_metavals(metavals))

977

978 if identifiers:

979 initial_subjects.update(get_initial_subjects_from_identifiers(identifiers))

980

981 if vvis:

982 initial_subjects.update(get_initial_subjects_from_vvis(vvis))

983

984 process_batch(initial_subjects, 0)

985

986 def get_subgraph(self, res: str, graphs_dict: dict) -> Graph|None:

987 if res in graphs_dict:

988 return graphs_dict[res]

989 subgraph = Graph()

990 for triple in self.local_g.triples((res, None, None)):

991 subgraph.add(triple)

992 if len(subgraph):

993 graphs_dict[res] = subgraph

994 return subgraph

995

996 def retrieve_venue_from_local_graph(self, meta_id: str) -> Dict[str, Dict[str, str]]:

997 """

998 Retrieve venue VVI structure from local graph instead of querying triplestore.

999

1000 :params meta_id: a MetaID

1001 :type meta_id: str

1002 :returns: Dict[str, Dict[str, str]] -- the venue structure with volumes and issues

1003 """

1004 content = {

1005 'issue': {},

1006 'volume': {}

1007 }

1008

1009 volumes = {}

1010 venue_uri = URIRef(f'{self.base_iri}/br/{meta_id}')

1011

1012 # Find all volumes directly part of this venue

1013 for triple in self.local_g.triples((None, RDF.type, GraphEntity.iri_journal_volume)):

1014 entity = triple[0]

1015 # Check if this volume is part of our venue

1016 for part_triple in self.local_g.triples((entity, GraphEntity.iri_part_of, venue_uri)):

1017 entity_id = str(entity).replace(f'{self.base_iri}/br/', '')

1018 for seq_triple in self.local_g.triples((entity, GraphEntity.iri_has_sequence_identifier, None)):

1019 seq = str(seq_triple[2])

1020 volumes[entity_id] = seq

1021 content['volume'][seq] = {

1022 'id': entity_id,

1023 'issue': {}

1024 }

1025

1026 # Find all issues

1027 for triple in self.local_g.triples((None, RDF.type, GraphEntity.iri_journal_issue)):

1028 entity = triple[0]

1029 entity_id = str(entity).replace(f'{self.base_iri}/br/', '')

1030 seq = None

1031 container = None

1032

1033 # Get sequence identifier

1034 for seq_triple in self.local_g.triples((entity, GraphEntity.iri_has_sequence_identifier, None)):

1035 seq = str(seq_triple[2])

1036

1037 # Get container (could be venue or volume)

1038 for container_triple in self.local_g.triples((entity, GraphEntity.iri_part_of, None)):

1039 container = str(container_triple[2])

1040

1041 if seq:

1042 if container:

1043 container_id = container.replace(f'{self.base_iri}/br/', '')

1044 # Check if container is a volume of our venue

1045 if container_id in volumes:

1046 volume_seq = volumes[container_id]

1047 content['volume'][volume_seq]['issue'][seq] = {'id': entity_id}

1048 # Check if container is directly our venue

1049 elif container == str(venue_uri):

1050 content['issue'][seq] = {'id': entity_id}

1051

1052 return content