Coverage for oc_ocdm / metadata / metadata_set.py: 75%

73 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-28 18:52 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7# -*- coding: utf-8 -*- 

8from __future__ import annotations 

9 

10from typing import TYPE_CHECKING, cast 

11 

12from oc_ocdm.counter_handler.counter_handler import CounterHandler 

13from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler 

14from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler 

15from oc_ocdm.metadata.entities.dataset import Dataset 

16from oc_ocdm.metadata.entities.distribution import Distribution 

17from oc_ocdm.support.support import get_count, is_dataset, get_short_name 

18 

19if TYPE_CHECKING: 

20 from typing import Dict, Tuple, ClassVar 

21 

22from rdflib import Graph, URIRef 

23 

24from oc_ocdm.metadata.metadata_entity import MetadataEntity 

25from oc_ocdm.abstract_set import AbstractSet 

26 

27 

28class MetadataSet(AbstractSet[MetadataEntity]): 

29 # Labels 

30 labels: ClassVar[Dict[str, str]] = { 

31 "_dataset_": "dataset", 

32 "di": "distribution" 

33 } 

34 

35 def __init__(self, base_iri: str, info_dir: str = "", wanted_label: bool = True) -> None: 

36 super(MetadataSet, self).__init__() 

37 # The following variable maps a URIRef with the related metadata entity 

38 self.res_to_entity: Dict[URIRef, MetadataEntity] = {} 

39 self.base_iri: str = base_iri 

40 if self.base_iri[-1] != '/': 

41 self.base_iri += '/' 

42 self.wanted_label: bool = wanted_label 

43 

44 if info_dir is not None and info_dir != "": 

45 self.counter_handler: CounterHandler = FilesystemCounterHandler(info_dir) 

46 else: 

47 self.counter_handler: CounterHandler = InMemoryCounterHandler() 

48 

49 def get_entity(self, res: URIRef) -> MetadataEntity | None: 

50 if res in self.res_to_entity: 

51 return self.res_to_entity[res] 

52 

53 def add_dataset(self, dataset_name: str, resp_agent: str, source: str | None = None, res: URIRef | None = None, 

54 preexisting_graph: Graph | None = None) -> Dataset: 

55 if res is not None and not is_dataset(res): 

56 raise ValueError(f"Given res: <{res}> is inappropriate for a Dataset entity.") 

57 if res is not None and res in self.res_to_entity: 

58 return cast(Dataset, self.res_to_entity[res]) 

59 # Here we use a fictitious short name for Dataset, since the OCDM document doesn't specify 

60 # any particular short name for this type of entity. It's only used internally to distinguish 

61 # between different metadata entities but it's meaningless outside of this scope. 

62 cur_g, count, label = self._add_metadata("_dataset_", dataset_name, res) 

63 return Dataset(cur_g, self.base_iri, dataset_name, self, 

64 MetadataEntity.iri_dataset, res, 

65 resp_agent, source, count, label, "_dataset_", 

66 preexisting_graph) 

67 

68 def add_di(self, dataset_name: str, resp_agent: str, source: str | None = None, 

69 res: URIRef | None = None, preexisting_graph: Graph | None = None) -> Distribution: 

70 if res is not None and get_short_name(res) != "di": 

71 raise ValueError(f"Given res: <{res}> is inappropriate for a Distribution entity.") 

72 if res is not None and res in self.res_to_entity: 

73 return cast(Distribution, self.res_to_entity[res]) 

74 cur_g, count, label = self._add_metadata("di", dataset_name, res) 

75 return Distribution(cur_g, self.base_iri, dataset_name, self, 

76 MetadataEntity.iri_datafile, res, 

77 resp_agent, source, count, label, "di", 

78 preexisting_graph) 

79 

80 def _add_metadata(self, short_name: str, dataset_name: str, 

81 res: URIRef | None = None) -> Tuple[Graph, str | None, str | None]: 

82 cur_g: Graph = Graph() 

83 self._set_ns(cur_g) 

84 

85 count: str | None = None 

86 label: str | None = None 

87 

88 if res is not None: 

89 if short_name != '_dataset_': # Datasets don't have a counter associated with them... 

90 try: 

91 res_count: int = int(get_count(res)) 

92 except ValueError: 

93 res_count: int = -1 

94 if res_count > self.counter_handler.read_metadata_counter(short_name, dataset_name): 

95 self.counter_handler.set_metadata_counter(res_count, short_name, dataset_name) 

96 return cur_g, count, label 

97 

98 if short_name != '_dataset_': # Datasets don't have a counter associated with them... 

99 count = str(self.counter_handler.increment_metadata_counter(short_name, dataset_name)) 

100 

101 if self.wanted_label: 

102 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

103 

104 return cur_g, count, label 

105 

106 def commit_changes(self): 

107 for res, entity in self.res_to_entity.items(): 

108 entity.commit_changes() 

109 if entity.to_be_deleted: 

110 del self.res_to_entity[res] 

111 

112 @staticmethod 

113 def _set_ns(g: Graph) -> None: 

114 g.namespace_manager.bind("dcterms", MetadataEntity.DCTERMS) 

115 g.namespace_manager.bind("dcat", MetadataEntity.DCAT) 

116 g.namespace_manager.bind("void", MetadataEntity.VOID) 

117 

118 def get_dataset(self) -> tuple[Dataset, ...]: 

119 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Dataset)) 

120 

121 def get_di(self) -> tuple[Distribution, ...]: 

122 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Distribution))