Coverage for oc_meta/core/creator.py: 92%

437 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright 2019 Silvio Peroni <essepuntato@gmail.com> 

4# Copyright 2019-2020 Fabio Mariani <fabio.mariani555@gmail.com> 

5# Copyright 2021 Simone Persiani <iosonopersia@gmail.com> 

6# Copyright 2021-2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

7# 

8# Permission to use, copy, modify, and/or distribute this software for any purpose 

9# with or without fee is hereby granted, provided that the above copyright notice 

10# and this permission notice appear in all copies. 

11# 

12# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

13# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

14# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

15# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

16# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

17# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

18# SOFTWARE. 

19 

20from __future__ import annotations 

21 

22import re 

23from typing import List 

24 

25from oc_meta.core.curator import get_edited_br_metaid 

26from oc_meta.lib.finder import ResourceFinder 

27from oc_meta.lib.master_of_regex import ( 

28 comma_and_spaces, 

29 name_and_ids, 

30 one_or_more_spaces, 

31 semicolon_in_people_field, 

32) 

33from rdflib import Graph, URIRef 

34 

35from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

36from oc_ocdm.graph import GraphSet 

37from oc_ocdm.graph.entities.bibliographic import BibliographicResource 

38from oc_ocdm.graph.entities.bibliographic_entity import BibliographicEntity 

39from oc_ocdm.support import create_date 

40 

41 

42class Creator(object): 

43 def __init__( 

44 self, 

45 data: list, 

46 endpoint: str, 

47 base_iri: str, 

48 counter_handler: RedisCounterHandler, 

49 supplier_prefix: str, 

50 resp_agent: str, 

51 ra_index: dict, 

52 br_index: dict, 

53 re_index_csv: dict, 

54 ar_index_csv: dict, 

55 vi_index: dict, 

56 preexisting_entities: set, 

57 everything_everywhere_allatonce: Graph, 

58 settings: dict = None, 

59 meta_config_path: str = None, 

60 ): 

61 self.url = base_iri 

62 self.setgraph = GraphSet( 

63 self.url, 

64 supplier_prefix=supplier_prefix, 

65 wanted_label=False, 

66 custom_counter_handler=counter_handler, 

67 ) 

68 self.resp_agent = resp_agent 

69 self.finder = ResourceFinder( 

70 ts_url=endpoint, 

71 base_iri=base_iri, 

72 local_g=everything_everywhere_allatonce, 

73 settings=settings, 

74 meta_config_path=meta_config_path, 

75 ) 

76 

77 self.ra_id_schemas = {"crossref", "orcid", "viaf", "wikidata", "ror"} 

78 self.br_id_schemas = { 

79 "arxiv", 

80 "doi", 

81 "issn", 

82 "isbn", 

83 "jid", 

84 "openalex", 

85 "pmid", 

86 "pmcid", 

87 "url", 

88 "wikidata", 

89 "wikipedia", 

90 } 

91 self.schemas = self.ra_id_schemas.union(self.br_id_schemas) 

92 

93 self.ra_index = self.indexer_id(ra_index) 

94 self.br_index = self.indexer_id(br_index) 

95 self.re_index = self.index_re(re_index_csv) 

96 self.ar_index = self.index_ar(ar_index_csv) 

97 self.vi_index = vi_index 

98 self.preexisting_entities = preexisting_entities 

99 self.preexisting_graphs = dict() 

100 self.data = data 

101 self.counter_handler = counter_handler 

102 

103 def creator(self, source=None): 

104 self.src = source 

105 for row in self.data: 

106 self.row_meta = "" 

107 self.venue_meta = "" 

108 ids = row["id"] 

109 title = row["title"] 

110 authors = row["author"] 

111 pub_date = row["pub_date"] 

112 venue = row["venue"] 

113 vol = row["volume"] 

114 issue = row["issue"] 

115 page = row["page"] 

116 self.type = row["type"] 

117 publisher = row["publisher"] 

118 editor = row["editor"] 

119 self.venue_graph = None 

120 self.vol_graph = None 

121 self.issue_graph = None 

122 self.id_action(ids) 

123 self.vvi_action(venue, vol, issue) 

124 self.title_action(title) 

125 self.author_action(authors) 

126 self.pub_date_action(pub_date) 

127 self.page_action(page) 

128 self.type_action(self.type) 

129 self.publisher_action(publisher) 

130 self.editor_action(editor, row) 

131 return self.setgraph 

132 

133 @staticmethod 

134 def index_re(id_index): 

135 index = dict() 

136 for row in id_index: 

137 index[row["br"]] = row["re"] 

138 return index 

139 

140 @staticmethod 

141 def index_ar(id_index): 

142 index = dict() 

143 for row in id_index: 

144 index[row["meta"]] = dict() 

145 index[row["meta"]]["author"] = Creator.__ar_worker(row["author"]) 

146 index[row["meta"]]["editor"] = Creator.__ar_worker(row["editor"]) 

147 index[row["meta"]]["publisher"] = Creator.__ar_worker(row["publisher"]) 

148 return index 

149 

150 @staticmethod 

151 def __ar_worker(s: str) -> dict: 

152 if s: 

153 ar_dict = dict() 

154 couples = s.split("; ") 

155 for c in couples: 

156 cou = c.split(", ") 

157 ar_dict[cou[1]] = cou[0] 

158 return ar_dict 

159 else: 

160 return dict() 

161 

162 def indexer_id(self, csv_index): 

163 index = dict() 

164 for schema in self.schemas: 

165 index[schema] = dict() 

166 for row in csv_index: 

167 for schema in self.schemas: 

168 if row["id"].startswith(schema): 

169 identifier = row["id"].replace(f"{schema}:", "") 

170 index[schema][identifier] = row["meta"] 

171 return index 

172 

173 def id_action(self, ids): 

174 idslist = re.split(one_or_more_spaces, ids) 

175 # publication id 

176 for identifier in idslist: 

177 if "omid:" in identifier: 

178 identifier = identifier.replace("omid:", "") 

179 preexisting_entity = ( 

180 True if identifier in self.preexisting_entities else False 

181 ) 

182 self.row_meta = identifier.replace("br/", "") 

183 url = URIRef(self.url + identifier) 

184 preexisting_graph = ( 

185 self.finder.get_subgraph(url, self.preexisting_graphs) 

186 if preexisting_entity 

187 else None 

188 ) 

189 self.br_graph = self.setgraph.add_br( 

190 self.resp_agent, 

191 source=self.src, 

192 res=url, 

193 preexisting_graph=preexisting_graph, 

194 ) 

195 for identifier in idslist: 

196 self.id_creator(self.br_graph, identifier, ra=False) 

197 

198 def title_action(self, title): 

199 if title: 

200 self.br_graph.has_title(title) 

201 

202 def author_action(self, authors): 

203 if authors: 

204 authorslist = re.split(semicolon_in_people_field, authors) 

205 aut_role_list = list() 

206 for aut in authorslist: 

207 aut_and_ids = re.search(name_and_ids, aut) 

208 aut_id = aut_and_ids.group(2) 

209 aut_id_list = aut_id.split(" ") 

210 for identifier in aut_id_list: 

211 if "omid:" in identifier: 

212 identifier = str(identifier).replace("omid:", "") 

213 preexisting_entity = ( 

214 True if identifier in self.preexisting_entities else False 

215 ) 

216 url = URIRef(self.url + identifier) 

217 aut_meta = identifier.replace("ra/", "") 

218 preexisting_graph = ( 

219 self.finder.get_subgraph(url, self.preexisting_graphs) 

220 if preexisting_entity 

221 else None 

222 ) 

223 pub_aut = self.setgraph.add_ra( 

224 self.resp_agent, 

225 source=self.src, 

226 res=url, 

227 preexisting_graph=preexisting_graph, 

228 ) 

229 author_name = aut_and_ids.group(1) 

230 if "," in author_name: 

231 author_name_splitted = re.split( 

232 comma_and_spaces, author_name 

233 ) 

234 first_name = author_name_splitted[1] 

235 last_name = author_name_splitted[0] 

236 if first_name.strip(): 

237 pub_aut.has_given_name(first_name) 

238 pub_aut.has_family_name(last_name) 

239 else: 

240 pub_aut.has_name(author_name) 

241 # lists of authors' IDs 

242 for identifier in aut_id_list: 

243 self.id_creator(pub_aut, identifier, ra=True) 

244 # Author ROLE 

245 AR = self.ar_index[self.row_meta]["author"][aut_meta] 

246 ar_id = "ar/" + str(AR) 

247 preexisting_entity = ( 

248 True if ar_id in self.preexisting_entities else False 

249 ) 

250 url_ar = URIRef(self.url + ar_id) 

251 preexisting_graph = ( 

252 self.finder.get_subgraph(url_ar, self.preexisting_graphs) 

253 if preexisting_entity 

254 else None 

255 ) 

256 pub_aut_role = self.setgraph.add_ar( 

257 self.resp_agent, 

258 source=self.src, 

259 res=url_ar, 

260 preexisting_graph=preexisting_graph, 

261 ) 

262 pub_aut_role.create_author() 

263 self.br_graph.has_contributor(pub_aut_role) 

264 pub_aut_role.is_held_by(pub_aut) 

265 aut_role_list.append(pub_aut_role) 

266 if len(aut_role_list) > 1: 

267 aut_role_list[aut_role_list.index(pub_aut_role) - 1].has_next( 

268 pub_aut_role 

269 ) 

270 

271 def pub_date_action(self, pub_date): 

272 if pub_date: 

273 datelist = list() 

274 datesplit = pub_date.split("-") 

275 if datesplit: 

276 for x in datesplit: 

277 datelist.append(int(x)) 

278 else: 

279 datelist.append(int(pub_date)) 

280 str_date = create_date(datelist) 

281 self.br_graph.has_pub_date(str_date) 

282 

283 def vvi_action(self, venue, vol, issue): 

284 if venue: 

285 venue_and_ids = re.search(name_and_ids, venue) 

286 venue_ids = venue_and_ids.group(2) 

287 venue_ids_list = venue_ids.split() 

288 for identifier in venue_ids_list: 

289 if "omid:" in identifier: 

290 ven_id = str(identifier).replace("omid:", "") 

291 self.venue_meta = ven_id.replace("br/", "") 

292 preexisting_entity = ( 

293 True if ven_id in self.preexisting_entities else False 

294 ) 

295 url = URIRef(self.url + ven_id) 

296 venue_title = venue_and_ids.group(1) 

297 preexisting_graph = ( 

298 self.finder.get_subgraph(url, self.preexisting_graphs) 

299 if preexisting_entity 

300 else None 

301 ) 

302 self.venue_graph = self.setgraph.add_br( 

303 self.resp_agent, 

304 source=self.src, 

305 res=url, 

306 preexisting_graph=preexisting_graph, 

307 ) 

308 try: 

309 venue_type = self.get_venue_type(self.type, venue_ids_list) 

310 except UnboundLocalError: 

311 error_message = f"[INFO:Creator] I found the venue {venue} for the resource of type {self.type}, but I don't know how to handle it" 

312 raise UnboundLocalError(error_message) 

313 if venue_type: 

314 venue_type = venue_type.replace(" ", "_") 

315 getattr(self.venue_graph, f"create_{venue_type}")() 

316 self.venue_graph.has_title(venue_title) 

317 for identifier in venue_ids_list: 

318 self.id_creator(self.venue_graph, identifier, ra=False) 

319 if self.type in {"journal article", "journal volume", "journal issue"}: 

320 if vol: 

321 vol_meta = self.vi_index[self.venue_meta]["volume"][vol]["id"] 

322 vol_meta = "br/" + vol_meta 

323 preexisting_entity = ( 

324 True if vol_meta in self.preexisting_entities else False 

325 ) 

326 vol_url = URIRef(self.url + vol_meta) 

327 preexisting_graph = ( 

328 self.finder.get_subgraph(vol_url, self.preexisting_graphs) 

329 if preexisting_entity 

330 else None 

331 ) 

332 self.vol_graph = self.setgraph.add_br( 

333 self.resp_agent, 

334 source=self.src, 

335 res=vol_url, 

336 preexisting_graph=preexisting_graph, 

337 ) 

338 self.vol_graph.create_volume() 

339 self.vol_graph.has_number(vol) 

340 if issue: 

341 if vol: 

342 issue_meta = self.vi_index[self.venue_meta]["volume"][vol][ 

343 "issue" 

344 ][issue]["id"] 

345 else: 

346 issue_meta = self.vi_index[self.venue_meta]["issue"][issue][ 

347 "id" 

348 ] 

349 issue_meta = "br/" + issue_meta 

350 preexisting_entity = ( 

351 True if issue_meta in self.preexisting_entities else False 

352 ) 

353 issue_url = URIRef(self.url + issue_meta) 

354 preexisting_graph = ( 

355 self.finder.get_subgraph(issue_url, self.preexisting_graphs) 

356 if preexisting_entity 

357 else None 

358 ) 

359 self.issue_graph = self.setgraph.add_br( 

360 self.resp_agent, 

361 source=self.src, 

362 res=issue_url, 

363 preexisting_graph=preexisting_graph, 

364 ) 

365 self.issue_graph.create_issue() 

366 self.issue_graph.has_number(issue) 

367 if venue and vol and issue: 

368 self.br_graph.is_part_of(self.issue_graph) 

369 self.issue_graph.is_part_of(self.vol_graph) 

370 self.vol_graph.is_part_of(self.venue_graph) 

371 elif venue and vol and not issue: 

372 self.br_graph.is_part_of(self.vol_graph) 

373 self.vol_graph.is_part_of(self.venue_graph) 

374 elif venue and not vol and not issue: 

375 self.br_graph.is_part_of(self.venue_graph) 

376 elif venue and not vol and issue: 

377 self.br_graph.is_part_of(self.issue_graph) 

378 self.issue_graph.is_part_of(self.venue_graph) 

379 

380 @classmethod 

381 def get_venue_type(cls, br_type: str, venue_ids: list) -> str: 

382 schemas = {venue_id.split(":", maxsplit=1)[0] for venue_id in venue_ids} 

383 venue_type = "" 

384 if br_type in {"journal article", "journal volume", "journal issue"}: 

385 venue_type = "journal" 

386 elif br_type in {"book chapter", "book part", "book section", "book track"}: 

387 venue_type = "book" 

388 elif br_type in {"book", "edited book", "monograph", "reference book"}: 

389 venue_type = "book series" 

390 elif br_type == "proceedings article": 

391 venue_type = "proceedings" 

392 elif br_type in {"proceedings", "report", "standard", "series"}: 

393 venue_type = "series" 

394 elif br_type == "reference entry": 

395 venue_type = "reference book" 

396 elif br_type == "report series": 

397 venue_type = "report series" 

398 elif not br_type or br_type in {"dataset", "data file"}: 

399 venue_type = "" 

400 # Check the type based on the identifier scheme 

401 if any( 

402 identifier for identifier in venue_ids if not identifier.startswith("omid:") 

403 ): 

404 if venue_type in {"journal", "book series", "series", "report series"}: 

405 if "isbn" in schemas or "issn" not in schemas: 

406 # It is undecidable 

407 venue_type = "" 

408 elif venue_type in {"book", "proceedings"}: 

409 if "issn" in schemas or "isbn" not in schemas: 

410 venue_type = "" 

411 elif venue_type == "reference book": 

412 if "isbn" in schemas and "issn" not in schemas: 

413 venue_type = "reference book" 

414 elif "issn" in schemas and "isbn" not in schemas: 

415 venue_type = "journal" 

416 elif "issn" in schemas and "isbn" in schemas: 

417 venue_type = "" 

418 return venue_type 

419 

420 def page_action(self, page): 

421 if page: 

422 res_em = self.re_index[self.row_meta] 

423 re_id = "re/" + str(res_em) 

424 preexisting_entity = True if re_id in self.preexisting_entities else False 

425 url_re = URIRef(self.url + re_id) 

426 preexisting_graph = ( 

427 self.finder.get_subgraph(url_re, self.preexisting_graphs) 

428 if preexisting_entity 

429 else None 

430 ) 

431 form = self.setgraph.add_re( 

432 self.resp_agent, 

433 source=self.src, 

434 res=url_re, 

435 preexisting_graph=preexisting_graph, 

436 ) 

437 form.has_starting_page(page) 

438 form.has_ending_page(page) 

439 self.br_graph.has_format(form) 

440 

441 def type_action(self, entity_type): 

442 if entity_type == "abstract": 

443 self.br_graph.create_abstract() 

444 if entity_type == "archival document": 

445 self.br_graph.create_archival_document() 

446 elif entity_type == "audio document": 

447 self.br_graph.create_audio_document() 

448 elif entity_type == "book": 

449 self.br_graph.create_book() 

450 elif entity_type == "book chapter": 

451 self.br_graph.create_book_chapter() 

452 elif entity_type == "book part": 

453 self.br_graph.create_book_part() 

454 elif entity_type == "book section": 

455 self.br_graph.create_book_section() 

456 elif entity_type == "book series": 

457 self.br_graph.create_book_series() 

458 elif entity_type == "book set": 

459 self.br_graph.create_book_set() 

460 elif entity_type == "computer program": 

461 self.br_graph.create_computer_program() 

462 elif entity_type in {"data file", "dataset"}: 

463 self.br_graph.create_dataset() 

464 elif entity_type == "data management plan": 

465 self.br_graph.create_data_management_plan() 

466 elif entity_type == "dissertation": 

467 self.br_graph.create_dissertation() 

468 elif entity_type == "editorial": 

469 self.br_graph.create_editorial() 

470 # elif entity_type == 'edited book': 

471 # self.br_graph.create_edited_book() 

472 elif entity_type == "journal": 

473 self.br_graph.create_journal() 

474 elif entity_type == "journal article": 

475 self.br_graph.create_journal_article() 

476 elif entity_type == "journal editorial": 

477 self.br_graph.create_journal_editorial() 

478 elif entity_type == "journal issue": 

479 self.br_graph.create_issue() 

480 elif entity_type == "journal volume": 

481 self.br_graph.create_volume() 

482 elif entity_type == "newspaper": 

483 self.br_graph.create_newspaper() 

484 elif entity_type == "newspaper article": 

485 self.br_graph.create_newspaper_article() 

486 elif entity_type == "newspaper issue": 

487 self.br_graph.create_newspaper_issue() 

488 # elif entity_type == 'monograph': 

489 # self.br_graph.create_monograph() 

490 elif entity_type == "peer review": 

491 self.br_graph.create_peer_review() 

492 elif entity_type == "preprint": 

493 self.br_graph.create_preprint() 

494 elif entity_type == "presentation": 

495 self.br_graph.create_presentation() 

496 elif entity_type == "proceedings": 

497 self.br_graph.create_proceedings() 

498 elif entity_type == "proceedings article": 

499 self.br_graph.create_proceedings_article() 

500 # elif entity_type == 'proceedings series': 

501 # self.br_graph.create_proceedings_series() 

502 elif entity_type == "reference book": 

503 self.br_graph.create_reference_book() 

504 elif entity_type == "reference entry": 

505 self.br_graph.create_reference_entry() 

506 elif entity_type == "report": 

507 self.br_graph.create_report() 

508 elif entity_type == "report series": 

509 self.br_graph.create_report_series() 

510 elif entity_type == "retraction notice": 

511 self.br_graph.create_retraction_notice() 

512 elif entity_type == "standard": 

513 self.br_graph.create_standard() 

514 elif entity_type == "series": 

515 self.br_graph.create_series() 

516 # elif entity_type == 'standard series': 

517 # self.br_graph.create_standard_series()() 

518 elif entity_type == "web content": 

519 self.br_graph.create_web_content() 

520 

521 def publisher_action(self, publisher): 

522 if publisher: 

523 publishers_list = re.split(semicolon_in_people_field, publisher) 

524 pub_role_list = list() 

525 for pub in publishers_list: 

526 publ_and_ids = re.search(name_and_ids, pub) 

527 publ_id = publ_and_ids.group(2) 

528 publ_id_list = publ_id.split() 

529 for identifier in publ_id_list: 

530 if "omid:" in identifier: 

531 identifier = str(identifier).replace("omid:", "") 

532 preexisting_entity = ( 

533 True if identifier in self.preexisting_entities else False 

534 ) 

535 pub_meta = identifier.replace("ra/", "") 

536 url = URIRef(self.url + identifier) 

537 publ_name = publ_and_ids.group(1) 

538 preexisting_graph = ( 

539 self.finder.get_subgraph(url, self.preexisting_graphs) 

540 if preexisting_entity 

541 else None 

542 ) 

543 publ = self.setgraph.add_ra( 

544 self.resp_agent, 

545 source=self.src, 

546 res=url, 

547 preexisting_graph=preexisting_graph, 

548 ) 

549 publ.has_name(publ_name) 

550 for identifier in publ_id_list: 

551 self.id_creator(publ, identifier, ra=True) 

552 # publisherRole 

553 AR = self.ar_index[self.row_meta]["publisher"][pub_meta] 

554 ar_id = "ar/" + str(AR) 

555 preexisting_entity = ( 

556 True if ar_id in self.preexisting_entities else False 

557 ) 

558 url_ar = URIRef(self.url + ar_id) 

559 preexisting_graph = self.finder.get_subgraph( 

560 url_ar, self.preexisting_graphs 

561 ) 

562 publ_role = self.setgraph.add_ar( 

563 self.resp_agent, 

564 source=self.src, 

565 res=url_ar, 

566 preexisting_graph=preexisting_graph, 

567 ) 

568 publ_role.create_publisher() 

569 self.br_graph.has_contributor(publ_role) 

570 publ_role.is_held_by(publ) 

571 pub_role_list.append(publ_role) 

572 if len(pub_role_list) > 1: 

573 pub_role_list[pub_role_list.index(publ_role) - 1].has_next( 

574 publ_role 

575 ) 

576 

577 def editor_action(self, editor, row): 

578 if editor: 

579 editorslist = re.split(semicolon_in_people_field, editor) 

580 edit_role_list = list() 

581 for ed in editorslist: 

582 ed_and_ids = re.search(name_and_ids, ed) 

583 ed_id = ed_and_ids.group(2) 

584 ed_id_list = ed_id.split(" ") 

585 for identifier in ed_id_list: 

586 if "omid:" in identifier: 

587 identifier = str(identifier).replace("omid:", "") 

588 preexisting_entity = ( 

589 True if identifier in self.preexisting_entities else False 

590 ) 

591 ed_meta = identifier.replace("ra/", "") 

592 url = URIRef(self.url + identifier) 

593 preexisting_graph = ( 

594 self.finder.get_subgraph(url, self.preexisting_graphs) 

595 if preexisting_entity 

596 else None 

597 ) 

598 pub_ed = self.setgraph.add_ra( 

599 self.resp_agent, 

600 source=self.src, 

601 res=url, 

602 preexisting_graph=preexisting_graph, 

603 ) 

604 editor_name = ed_and_ids.group(1) 

605 if "," in editor_name: 

606 editor_name_splitted = re.split( 

607 comma_and_spaces, editor_name 

608 ) 

609 firstName = editor_name_splitted[1] 

610 lastName = editor_name_splitted[0] 

611 if firstName.strip(): 

612 pub_ed.has_given_name(firstName) 

613 pub_ed.has_family_name(lastName) 

614 else: 

615 pub_ed.has_name(editor_name) 

616 # lists of editor's IDs 

617 for identifier in ed_id_list: 

618 self.id_creator(pub_ed, identifier, ra=True) 

619 # editorRole 

620 br_key = get_edited_br_metaid(row, self.row_meta, self.venue_meta) 

621 AR = self.ar_index[br_key]["editor"][ed_meta] 

622 ar_id = "ar/" + str(AR) 

623 preexisting_entity = ( 

624 True if ar_id in self.preexisting_entities else False 

625 ) 

626 url_ar = URIRef(self.url + ar_id) 

627 preexisting_graph = ( 

628 self.finder.get_subgraph(url_ar, self.preexisting_graphs) 

629 if preexisting_entity 

630 else None 

631 ) 

632 pub_ed_role = self.setgraph.add_ar( 

633 self.resp_agent, 

634 source=self.src, 

635 res=url_ar, 

636 preexisting_graph=preexisting_graph, 

637 ) 

638 pub_ed_role.create_editor() 

639 br_graphs: List[BibliographicResource] = [ 

640 self.br_graph, 

641 self.issue_graph, 

642 self.vol_graph, 

643 self.venue_graph, 

644 ] 

645 for graph in br_graphs: 

646 if br_key == self.__res_metaid(graph): 

647 graph.has_contributor(pub_ed_role) 

648 pub_ed_role.is_held_by(pub_ed) 

649 edit_role_list.append(pub_ed_role) 

650 for i, edit_role in enumerate(edit_role_list): 

651 if i > 0: 

652 edit_role_list[i - 1].has_next(edit_role) 

653 

654 def __res_metaid(self, graph: BibliographicResource): 

655 if graph: 

656 return graph.res.replace(f"{self.url}br/", "") 

657 

658 def id_creator(self, graph: BibliographicEntity, identifier: str, ra: bool) -> None: 

659 new_id = None 

660 # Skip temporary identifiers - they should not be saved in the final dataset 

661 if identifier.startswith("temp:"): 

662 return 

663 

664 if ra: 

665 for ra_id_schema in self.ra_id_schemas: 

666 if identifier.startswith(ra_id_schema): 

667 identifier = identifier.replace(f"{ra_id_schema}:", "") 

668 res = self.ra_index[ra_id_schema][identifier] 

669 preexisting_entity = ( 

670 True if f"id/{res}" in self.preexisting_entities else False 

671 ) 

672 url = URIRef(self.url + "id/" + res) 

673 preexisting_graph = ( 

674 self.finder.get_subgraph(url, self.preexisting_graphs) 

675 if preexisting_entity 

676 else None 

677 ) 

678 new_id = self.setgraph.add_id( 

679 self.resp_agent, 

680 source=self.src, 

681 res=url, 

682 preexisting_graph=preexisting_graph, 

683 ) 

684 getattr(new_id, f"create_{ra_id_schema}")(identifier) 

685 else: 

686 for br_id_schema in self.br_id_schemas: 

687 if identifier.startswith(br_id_schema): 

688 identifier = identifier.replace(f"{br_id_schema}:", "") 

689 res = self.br_index[br_id_schema][identifier] 

690 preexisting_entity = ( 

691 True if f"id/{res}" in self.preexisting_entities else False 

692 ) 

693 url = URIRef(self.url + "id/" + res) 

694 preexisting_graph = ( 

695 self.finder.get_subgraph(url, self.preexisting_graphs) 

696 if preexisting_entity 

697 else None 

698 ) 

699 new_id = self.setgraph.add_id( 

700 self.resp_agent, 

701 source=self.src, 

702 res=url, 

703 preexisting_graph=preexisting_graph, 

704 ) 

705 getattr(new_id, f"create_{br_id_schema}")(identifier) 

706 if new_id: 

707 graph.has_identifier(new_id)