Coverage for oc_meta/core/creator.py: 93%

466 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright 2019 Silvio Peroni <essepuntato@gmail.com> 

4# Copyright 2019-2020 Fabio Mariani <fabio.mariani555@gmail.com> 

5# Copyright 2021 Simone Persiani <iosonopersia@gmail.com> 

6# Copyright 2021-2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

7# 

8# Permission to use, copy, modify, and/or distribute this software for any purpose 

9# with or without fee is hereby granted, provided that the above copyright notice 

10# and this permission notice appear in all copies. 

11# 

12# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

13# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

14# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

15# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

16# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

17# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

18# SOFTWARE. 

19 

20from __future__ import annotations 

21 

22import re 

23from typing import List 

24 

25from oc_meta.core.curator import get_edited_br_metaid 

26from oc_meta.lib.finder import ResourceFinder 

27from oc_meta.lib.master_of_regex import ( 

28 comma_and_spaces, 

29 name_and_ids, 

30 one_or_more_spaces, 

31 semicolon_in_people_field, 

32) 

33from rdflib import URIRef 

34 

35from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

36from oc_ocdm.graph import GraphSet 

37from oc_ocdm.graph.entities.bibliographic import BibliographicResource 

38from oc_ocdm.graph.entities.bibliographic_entity import BibliographicEntity 

39from oc_ocdm.support import create_date 

40 

41 

42class Creator(object): 

43 def __init__( 

44 self, 

45 data: list, 

46 finder: ResourceFinder, 

47 base_iri: str, 

48 counter_handler: RedisCounterHandler, 

49 supplier_prefix: str, 

50 resp_agent: str, 

51 ra_index: dict, 

52 br_index: dict, 

53 re_index_csv: dict, 

54 ar_index_csv: dict, 

55 vi_index: dict, 

56 silencer: list = None, 

57 ): 

58 self.url = base_iri 

59 self.setgraph = GraphSet( 

60 self.url, 

61 supplier_prefix=supplier_prefix, 

62 wanted_label=False, 

63 custom_counter_handler=counter_handler, 

64 ) 

65 self.resp_agent = resp_agent 

66 self.finder = finder 

67 

68 self.ra_id_schemas = {"crossref", "orcid", "viaf", "wikidata", "ror"} 

69 self.br_id_schemas = { 

70 "arxiv", 

71 "doi", 

72 "issn", 

73 "isbn", 

74 "jid", 

75 "openalex", 

76 "pmid", 

77 "pmcid", 

78 "url", 

79 "wikidata", 

80 "wikipedia", 

81 } 

82 self.schemas = self.ra_id_schemas.union(self.br_id_schemas) 

83 

84 self.ra_index = self.indexer_id(ra_index) 

85 self.br_index = self.indexer_id(br_index) 

86 self.re_index = self.index_re(re_index_csv) 

87 self.ar_index = self.index_ar(ar_index_csv) 

88 self.vi_index = vi_index 

89 self.data = data 

90 self.counter_handler = counter_handler 

91 self.silencer = silencer or [] 

92 

93 def _has_existing_roles(self, br_uri: URIRef) -> dict: 

94 """ 

95 Check if BR has existing author/editor/publisher roles in local_g. 

96 

97 Args: 

98 br_uri: URI of the bibliographic resource 

99 

100 Returns: 

101 Dictionary with keys 'author', 'editor', 'publisher' indicating presence of existing roles 

102 """ 

103 has_roles = { 

104 "author": False, 

105 "editor": False, 

106 "publisher": False 

107 } 

108 

109 pro_isDocumentContextFor = URIRef("http://purl.org/spar/pro/isDocumentContextFor") 

110 pro_withRole = URIRef("http://purl.org/spar/pro/withRole") 

111 pro_author = URIRef("http://purl.org/spar/pro/author") 

112 pro_editor = URIRef("http://purl.org/spar/pro/editor") 

113 pro_publisher = URIRef("http://purl.org/spar/pro/publisher") 

114 

115 for _, _, ar_uri in self.finder.local_g.triples((br_uri, pro_isDocumentContextFor, None)): 

116 for _, _, role in self.finder.local_g.triples((ar_uri, pro_withRole, None)): 

117 if role == pro_author: 

118 has_roles["author"] = True 

119 elif role == pro_editor: 

120 has_roles["editor"] = True 

121 elif role == pro_publisher: 

122 has_roles["publisher"] = True 

123 

124 return has_roles 

125 

126 def creator(self, source=None): 

127 self.src = source 

128 for row in self.data: 

129 self.row_meta = "" 

130 self.venue_meta = "" 

131 ids = row["id"] 

132 title = row["title"] 

133 authors = row["author"] 

134 pub_date = row["pub_date"] 

135 venue = row["venue"] 

136 vol = row["volume"] 

137 issue = row["issue"] 

138 page = row["page"] 

139 self.type = row["type"] 

140 publisher = row["publisher"] 

141 editor = row["editor"] 

142 self.venue_graph = None 

143 self.vol_graph = None 

144 self.issue_graph = None 

145 self.id_action(ids) 

146 self.vvi_action(venue, vol, issue) 

147 self.title_action(title) 

148 

149 br_uri = URIRef(f"{self.url}br/{self.row_meta}") 

150 br_is_preexisting = br_uri in self.finder.prebuilt_subgraphs 

151 

152 skip_author = False 

153 skip_publisher = False 

154 skip_editor = False 

155 

156 if br_is_preexisting: 

157 br_uri = URIRef(f"{self.url}br/{self.row_meta}") 

158 existing_roles = self._has_existing_roles(br_uri) 

159 

160 skip_author = "author" in self.silencer and existing_roles["author"] 

161 skip_publisher = "publisher" in self.silencer and existing_roles["publisher"] 

162 skip_editor = "editor" in self.silencer and existing_roles["editor"] 

163 

164 if not skip_author: 

165 self.author_action(authors) 

166 

167 self.pub_date_action(pub_date) 

168 self.page_action(page) 

169 self.type_action(self.type) 

170 

171 if not skip_publisher: 

172 self.publisher_action(publisher) 

173 

174 if not skip_editor: 

175 self.editor_action(editor, row) 

176 return self.setgraph 

177 

178 @staticmethod 

179 def index_re(id_index): 

180 index = dict() 

181 for row in id_index: 

182 index[row["br"]] = row["re"] 

183 return index 

184 

185 @staticmethod 

186 def index_ar(id_index): 

187 index = dict() 

188 for row in id_index: 

189 index[row["meta"]] = dict() 

190 index[row["meta"]]["author"] = Creator.__ar_worker(row["author"]) 

191 index[row["meta"]]["editor"] = Creator.__ar_worker(row["editor"]) 

192 index[row["meta"]]["publisher"] = Creator.__ar_worker(row["publisher"]) 

193 return index 

194 

195 @staticmethod 

196 def __ar_worker(s: str) -> dict: 

197 if s: 

198 ar_dict = dict() 

199 couples = s.split("; ") 

200 for c in couples: 

201 cou = c.split(", ") 

202 ar_dict[cou[1]] = cou[0] 

203 return ar_dict 

204 else: 

205 return dict() 

206 

207 def indexer_id(self, csv_index): 

208 index = dict() 

209 for schema in self.schemas: 

210 index[schema] = dict() 

211 for row in csv_index: 

212 for schema in self.schemas: 

213 if row["id"].startswith(schema): 

214 identifier = row["id"].replace(f"{schema}:", "") 

215 index[schema][identifier] = row["meta"] 

216 return index 

217 

218 def id_action(self, ids): 

219 idslist = re.split(one_or_more_spaces, ids) 

220 # publication id 

221 for identifier in idslist: 

222 if "omid:" in identifier: 

223 identifier = identifier.replace("omid:", "") 

224 url = URIRef(self.url + identifier) 

225 preexisting_entity = url in self.finder.prebuilt_subgraphs 

226 self.row_meta = identifier.replace("br/", "") 

227 preexisting_graph = ( 

228 self.finder.get_subgraph(url) 

229 if preexisting_entity 

230 else None 

231 ) 

232 self.br_graph = self.setgraph.add_br( 

233 self.resp_agent, 

234 source=self.src, 

235 res=url, 

236 preexisting_graph=preexisting_graph, 

237 ) 

238 for identifier in idslist: 

239 self.id_creator(self.br_graph, identifier, ra=False) 

240 

241 def title_action(self, title): 

242 if title: 

243 self.br_graph.has_title(title) 

244 

245 def author_action(self, authors): 

246 if authors: 

247 authorslist = re.split(semicolon_in_people_field, authors) 

248 aut_role_list = list() 

249 for aut in authorslist: 

250 aut_and_ids = re.search(name_and_ids, aut) 

251 aut_id = aut_and_ids.group(2) 

252 aut_id_list = aut_id.split(" ") 

253 for identifier in aut_id_list: 

254 if "omid:" in identifier: 

255 identifier = str(identifier).replace("omid:", "") 

256 url = URIRef(self.url + identifier) 

257 preexisting_entity = url in self.finder.prebuilt_subgraphs 

258 aut_meta = identifier.replace("ra/", "") 

259 preexisting_graph = ( 

260 self.finder.get_subgraph(url) 

261 if preexisting_entity 

262 else None 

263 ) 

264 pub_aut = self.setgraph.add_ra( 

265 self.resp_agent, 

266 source=self.src, 

267 res=url, 

268 preexisting_graph=preexisting_graph, 

269 ) 

270 author_name = aut_and_ids.group(1) 

271 if "," in author_name: 

272 author_name_splitted = re.split( 

273 comma_and_spaces, author_name 

274 ) 

275 first_name = author_name_splitted[1] 

276 last_name = author_name_splitted[0] 

277 if first_name.strip(): 

278 pub_aut.has_given_name(first_name) 

279 pub_aut.has_family_name(last_name) 

280 else: 

281 pub_aut.has_name(author_name) 

282 # lists of authors' IDs 

283 for identifier in aut_id_list: 

284 self.id_creator(pub_aut, identifier, ra=True) 

285 # Author ROLE 

286 AR = self.ar_index[self.row_meta]["author"][aut_meta] 

287 ar_id = "ar/" + str(AR) 

288 url_ar = URIRef(self.url + ar_id) 

289 preexisting_entity = url_ar in self.finder.prebuilt_subgraphs 

290 preexisting_graph = ( 

291 self.finder.get_subgraph(url_ar) 

292 if preexisting_entity 

293 else None 

294 ) 

295 pub_aut_role = self.setgraph.add_ar( 

296 self.resp_agent, 

297 source=self.src, 

298 res=url_ar, 

299 preexisting_graph=preexisting_graph, 

300 ) 

301 pub_aut_role.create_author() 

302 self.br_graph.has_contributor(pub_aut_role) 

303 pub_aut_role.is_held_by(pub_aut) 

304 aut_role_list.append(pub_aut_role) 

305 if len(aut_role_list) > 1: 

306 aut_role_list[aut_role_list.index(pub_aut_role) - 1].has_next( 

307 pub_aut_role 

308 ) 

309 

310 def pub_date_action(self, pub_date): 

311 if pub_date: 

312 datelist = list() 

313 datesplit = pub_date.split("-") 

314 if datesplit: 

315 for x in datesplit: 

316 datelist.append(int(x)) 

317 else: 

318 datelist.append(int(pub_date)) 

319 str_date = create_date(datelist) 

320 self.br_graph.has_pub_date(str_date) 

321 

322 def vvi_action(self, venue, vol, issue): 

323 if venue: 

324 venue_and_ids = re.search(name_and_ids, venue) 

325 venue_ids = venue_and_ids.group(2) 

326 venue_ids_list = venue_ids.split() 

327 for identifier in venue_ids_list: 

328 if "omid:" in identifier: 

329 ven_id = str(identifier).replace("omid:", "") 

330 self.venue_meta = ven_id.replace("br/", "") 

331 url = URIRef(self.url + ven_id) 

332 preexisting_entity = url in self.finder.prebuilt_subgraphs 

333 venue_title = venue_and_ids.group(1) 

334 preexisting_graph = ( 

335 self.finder.get_subgraph(url) 

336 if preexisting_entity 

337 else None 

338 ) 

339 self.venue_graph = self.setgraph.add_br( 

340 self.resp_agent, 

341 source=self.src, 

342 res=url, 

343 preexisting_graph=preexisting_graph, 

344 ) 

345 try: 

346 venue_type = self.get_venue_type(self.type, venue_ids_list) 

347 except UnboundLocalError: 

348 error_message = f"[INFO:Creator] I found the venue {venue} for the resource of type {self.type}, but I don't know how to handle it" 

349 raise UnboundLocalError(error_message) 

350 if venue_type: 

351 venue_type = venue_type.replace(" ", "_") 

352 getattr(self.venue_graph, f"create_{venue_type}")() 

353 self.venue_graph.has_title(venue_title) 

354 for identifier in venue_ids_list: 

355 self.id_creator(self.venue_graph, identifier, ra=False) 

356 if self.type in {"journal article", "journal volume", "journal issue"}: 

357 if vol: 

358 vol_meta = self.vi_index[self.venue_meta]["volume"][vol]["id"] 

359 vol_meta = "br/" + vol_meta 

360 vol_url = URIRef(self.url + vol_meta) 

361 preexisting_entity = vol_url in self.finder.prebuilt_subgraphs 

362 preexisting_graph = ( 

363 self.finder.get_subgraph(vol_url) 

364 if preexisting_entity 

365 else None 

366 ) 

367 self.vol_graph = self.setgraph.add_br( 

368 self.resp_agent, 

369 source=self.src, 

370 res=vol_url, 

371 preexisting_graph=preexisting_graph, 

372 ) 

373 self.vol_graph.create_volume() 

374 self.vol_graph.has_number(vol) 

375 if issue: 

376 if vol: 

377 issue_meta = self.vi_index[self.venue_meta]["volume"][vol][ 

378 "issue" 

379 ][issue]["id"] 

380 else: 

381 issue_meta = self.vi_index[self.venue_meta]["issue"][issue][ 

382 "id" 

383 ] 

384 issue_meta = "br/" + issue_meta 

385 issue_url = URIRef(self.url + issue_meta) 

386 preexisting_entity = issue_url in self.finder.prebuilt_subgraphs 

387 preexisting_graph = ( 

388 self.finder.get_subgraph(issue_url) 

389 if preexisting_entity 

390 else None 

391 ) 

392 self.issue_graph = self.setgraph.add_br( 

393 self.resp_agent, 

394 source=self.src, 

395 res=issue_url, 

396 preexisting_graph=preexisting_graph, 

397 ) 

398 self.issue_graph.create_issue() 

399 self.issue_graph.has_number(issue) 

400 if venue and vol and issue: 

401 self.br_graph.is_part_of(self.issue_graph) 

402 self.issue_graph.is_part_of(self.vol_graph) 

403 self.vol_graph.is_part_of(self.venue_graph) 

404 elif venue and vol and not issue: 

405 self.br_graph.is_part_of(self.vol_graph) 

406 self.vol_graph.is_part_of(self.venue_graph) 

407 elif venue and not vol and not issue: 

408 self.br_graph.is_part_of(self.venue_graph) 

409 elif venue and not vol and issue: 

410 self.br_graph.is_part_of(self.issue_graph) 

411 self.issue_graph.is_part_of(self.venue_graph) 

412 

413 @classmethod 

414 def get_venue_type(cls, br_type: str, venue_ids: list) -> str: 

415 schemas = {venue_id.split(":", maxsplit=1)[0] for venue_id in venue_ids} 

416 venue_type = "" 

417 if br_type in {"journal article", "journal volume", "journal issue"}: 

418 venue_type = "journal" 

419 elif br_type in {"book chapter", "book part", "book section", "book track"}: 

420 venue_type = "book" 

421 elif br_type in {"book", "edited book", "monograph", "reference book"}: 

422 venue_type = "book series" 

423 elif br_type == "proceedings article": 

424 venue_type = "proceedings" 

425 elif br_type in {"proceedings", "report", "standard", "series"}: 

426 venue_type = "series" 

427 elif br_type == "reference entry": 

428 venue_type = "reference book" 

429 elif br_type == "report series": 

430 venue_type = "report series" 

431 elif not br_type or br_type in {"dataset", "data file"}: 

432 venue_type = "" 

433 # Check the type based on the identifier scheme 

434 if any( 

435 identifier for identifier in venue_ids if not identifier.startswith("omid:") 

436 ): 

437 if venue_type in {"journal", "book series", "series", "report series"}: 

438 if "isbn" in schemas or "issn" not in schemas: 

439 # It is undecidable 

440 venue_type = "" 

441 elif venue_type in {"book", "proceedings"}: 

442 if "issn" in schemas or "isbn" not in schemas: 

443 venue_type = "" 

444 elif venue_type == "reference book": 

445 if "isbn" in schemas and "issn" not in schemas: 

446 venue_type = "reference book" 

447 elif "issn" in schemas and "isbn" not in schemas: 

448 venue_type = "journal" 

449 elif "issn" in schemas and "isbn" in schemas: 

450 venue_type = "" 

451 return venue_type 

452 

453 def page_action(self, page): 

454 if page: 

455 res_em = self.re_index[self.row_meta] 

456 re_id = "re/" + str(res_em) 

457 url_re = URIRef(self.url + re_id) 

458 preexisting_entity = url_re in self.finder.prebuilt_subgraphs 

459 preexisting_graph = ( 

460 self.finder.get_subgraph(url_re) 

461 if preexisting_entity 

462 else None 

463 ) 

464 form = self.setgraph.add_re( 

465 self.resp_agent, 

466 source=self.src, 

467 res=url_re, 

468 preexisting_graph=preexisting_graph, 

469 ) 

470 form.has_starting_page(page) 

471 form.has_ending_page(page) 

472 self.br_graph.has_format(form) 

473 

474 def type_action(self, entity_type): 

475 if entity_type == "abstract": 

476 self.br_graph.create_abstract() 

477 if entity_type == "archival document": 

478 self.br_graph.create_archival_document() 

479 elif entity_type == "audio document": 

480 self.br_graph.create_audio_document() 

481 elif entity_type == "book": 

482 self.br_graph.create_book() 

483 elif entity_type == "book chapter": 

484 self.br_graph.create_book_chapter() 

485 elif entity_type == "book part": 

486 self.br_graph.create_book_part() 

487 elif entity_type == "book section": 

488 self.br_graph.create_book_section() 

489 elif entity_type == "book series": 

490 self.br_graph.create_book_series() 

491 elif entity_type == "book set": 

492 self.br_graph.create_book_set() 

493 elif entity_type == "computer program": 

494 self.br_graph.create_computer_program() 

495 elif entity_type in {"data file", "dataset"}: 

496 self.br_graph.create_dataset() 

497 elif entity_type == "data management plan": 

498 self.br_graph.create_data_management_plan() 

499 elif entity_type == "dissertation": 

500 self.br_graph.create_dissertation() 

501 elif entity_type == "editorial": 

502 self.br_graph.create_editorial() 

503 # elif entity_type == 'edited book': 

504 # self.br_graph.create_edited_book() 

505 elif entity_type == "journal": 

506 self.br_graph.create_journal() 

507 elif entity_type == "journal article": 

508 self.br_graph.create_journal_article() 

509 elif entity_type == "journal editorial": 

510 self.br_graph.create_journal_editorial() 

511 elif entity_type == "journal issue": 

512 self.br_graph.create_issue() 

513 elif entity_type == "journal volume": 

514 self.br_graph.create_volume() 

515 elif entity_type == "newspaper": 

516 self.br_graph.create_newspaper() 

517 elif entity_type == "newspaper article": 

518 self.br_graph.create_newspaper_article() 

519 elif entity_type == "newspaper issue": 

520 self.br_graph.create_newspaper_issue() 

521 # elif entity_type == 'monograph': 

522 # self.br_graph.create_monograph() 

523 elif entity_type == "peer review": 

524 self.br_graph.create_peer_review() 

525 elif entity_type == "preprint": 

526 self.br_graph.create_preprint() 

527 elif entity_type == "presentation": 

528 self.br_graph.create_presentation() 

529 elif entity_type == "proceedings": 

530 self.br_graph.create_proceedings() 

531 elif entity_type == "proceedings article": 

532 self.br_graph.create_proceedings_article() 

533 # elif entity_type == 'proceedings series': 

534 # self.br_graph.create_proceedings_series() 

535 elif entity_type == "reference book": 

536 self.br_graph.create_reference_book() 

537 elif entity_type == "reference entry": 

538 self.br_graph.create_reference_entry() 

539 elif entity_type == "report": 

540 self.br_graph.create_report() 

541 elif entity_type == "report series": 

542 self.br_graph.create_report_series() 

543 elif entity_type == "retraction notice": 

544 self.br_graph.create_retraction_notice() 

545 elif entity_type == "standard": 

546 self.br_graph.create_standard() 

547 elif entity_type == "series": 

548 self.br_graph.create_series() 

549 # elif entity_type == 'standard series': 

550 # self.br_graph.create_standard_series()() 

551 elif entity_type == "web content": 

552 self.br_graph.create_web_content() 

553 

554 def publisher_action(self, publisher): 

555 if publisher: 

556 publishers_list = re.split(semicolon_in_people_field, publisher) 

557 pub_role_list = list() 

558 for pub in publishers_list: 

559 publ_and_ids = re.search(name_and_ids, pub) 

560 publ_id = publ_and_ids.group(2) 

561 publ_id_list = publ_id.split() 

562 for identifier in publ_id_list: 

563 if "omid:" in identifier: 

564 identifier = str(identifier).replace("omid:", "") 

565 pub_meta = identifier.replace("ra/", "") 

566 url = URIRef(self.url + identifier) 

567 preexisting_entity = url in self.finder.prebuilt_subgraphs 

568 publ_name = publ_and_ids.group(1) 

569 preexisting_graph = ( 

570 self.finder.get_subgraph(url) 

571 if preexisting_entity 

572 else None 

573 ) 

574 publ = self.setgraph.add_ra( 

575 self.resp_agent, 

576 source=self.src, 

577 res=url, 

578 preexisting_graph=preexisting_graph, 

579 ) 

580 publ.has_name(publ_name) 

581 for identifier in publ_id_list: 

582 self.id_creator(publ, identifier, ra=True) 

583 # publisherRole 

584 AR = self.ar_index[self.row_meta]["publisher"][pub_meta] 

585 ar_id = "ar/" + str(AR) 

586 url_ar = URIRef(self.url + ar_id) 

587 preexisting_entity = url_ar in self.finder.prebuilt_subgraphs 

588 preexisting_graph = self.finder.get_subgraph(url_ar) 

589 publ_role = self.setgraph.add_ar( 

590 self.resp_agent, 

591 source=self.src, 

592 res=url_ar, 

593 preexisting_graph=preexisting_graph, 

594 ) 

595 publ_role.create_publisher() 

596 self.br_graph.has_contributor(publ_role) 

597 publ_role.is_held_by(publ) 

598 pub_role_list.append(publ_role) 

599 if len(pub_role_list) > 1: 

600 pub_role_list[pub_role_list.index(publ_role) - 1].has_next( 

601 publ_role 

602 ) 

603 

604 def editor_action(self, editor, row): 

605 if editor: 

606 editorslist = re.split(semicolon_in_people_field, editor) 

607 edit_role_list = list() 

608 for ed in editorslist: 

609 ed_and_ids = re.search(name_and_ids, ed) 

610 ed_id = ed_and_ids.group(2) 

611 ed_id_list = ed_id.split(" ") 

612 for identifier in ed_id_list: 

613 if "omid:" in identifier: 

614 identifier = str(identifier).replace("omid:", "") 

615 ed_meta = identifier.replace("ra/", "") 

616 url = URIRef(self.url + identifier) 

617 preexisting_entity = url in self.finder.prebuilt_subgraphs 

618 preexisting_graph = ( 

619 self.finder.get_subgraph(url) 

620 if preexisting_entity 

621 else None 

622 ) 

623 pub_ed = self.setgraph.add_ra( 

624 self.resp_agent, 

625 source=self.src, 

626 res=url, 

627 preexisting_graph=preexisting_graph, 

628 ) 

629 editor_name = ed_and_ids.group(1) 

630 if "," in editor_name: 

631 editor_name_splitted = re.split( 

632 comma_and_spaces, editor_name 

633 ) 

634 firstName = editor_name_splitted[1] 

635 lastName = editor_name_splitted[0] 

636 if firstName.strip(): 

637 pub_ed.has_given_name(firstName) 

638 pub_ed.has_family_name(lastName) 

639 else: 

640 pub_ed.has_name(editor_name) 

641 # lists of editor's IDs 

642 for identifier in ed_id_list: 

643 self.id_creator(pub_ed, identifier, ra=True) 

644 # editorRole 

645 br_key = get_edited_br_metaid(row, self.row_meta, self.venue_meta) 

646 AR = self.ar_index[br_key]["editor"][ed_meta] 

647 ar_id = "ar/" + str(AR) 

648 url_ar = URIRef(self.url + ar_id) 

649 preexisting_entity = url_ar in self.finder.prebuilt_subgraphs 

650 preexisting_graph = ( 

651 self.finder.get_subgraph(url_ar) 

652 if preexisting_entity 

653 else None 

654 ) 

655 pub_ed_role = self.setgraph.add_ar( 

656 self.resp_agent, 

657 source=self.src, 

658 res=url_ar, 

659 preexisting_graph=preexisting_graph, 

660 ) 

661 pub_ed_role.create_editor() 

662 br_graphs: List[BibliographicResource] = [ 

663 self.br_graph, 

664 self.issue_graph, 

665 self.vol_graph, 

666 self.venue_graph, 

667 ] 

668 for graph in br_graphs: 

669 if br_key == self.__res_metaid(graph): 

670 graph.has_contributor(pub_ed_role) 

671 pub_ed_role.is_held_by(pub_ed) 

672 edit_role_list.append(pub_ed_role) 

673 for i, edit_role in enumerate(edit_role_list): 

674 if i > 0: 

675 edit_role_list[i - 1].has_next(edit_role) 

676 

677 def __res_metaid(self, graph: BibliographicResource): 

678 if graph: 

679 return graph.res.replace(f"{self.url}br/", "") 

680 

681 def id_creator(self, graph: BibliographicEntity, identifier: str, ra: bool) -> None: 

682 new_id = None 

683 # Skip temporary identifiers - they should not be saved in the final dataset 

684 if identifier.startswith("temp:"): 

685 return 

686 

687 if ra: 

688 for ra_id_schema in self.ra_id_schemas: 

689 if identifier.startswith(ra_id_schema): 

690 identifier = identifier.replace(f"{ra_id_schema}:", "") 

691 res = self.ra_index[ra_id_schema][identifier] 

692 url = URIRef(self.url + "id/" + res) 

693 preexisting_entity = url in self.finder.prebuilt_subgraphs 

694 preexisting_graph = ( 

695 self.finder.get_subgraph(url) 

696 if preexisting_entity 

697 else None 

698 ) 

699 new_id = self.setgraph.add_id( 

700 self.resp_agent, 

701 source=self.src, 

702 res=url, 

703 preexisting_graph=preexisting_graph, 

704 ) 

705 getattr(new_id, f"create_{ra_id_schema}")(identifier) 

706 else: 

707 for br_id_schema in self.br_id_schemas: 

708 if identifier.startswith(br_id_schema): 

709 identifier = identifier.replace(f"{br_id_schema}:", "") 

710 res = self.br_index[br_id_schema][identifier] 

711 url = URIRef(self.url + "id/" + res) 

712 preexisting_entity = url in self.finder.prebuilt_subgraphs 

713 preexisting_graph = ( 

714 self.finder.get_subgraph(url) 

715 if preexisting_entity 

716 else None 

717 ) 

718 new_id = self.setgraph.add_id( 

719 self.resp_agent, 

720 source=self.src, 

721 res=url, 

722 preexisting_graph=preexisting_graph, 

723 ) 

724 getattr(new_id, f"create_{br_id_schema}")(identifier) 

725 if new_id: 

726 graph.has_identifier(new_id)