Coverage for oc_ocdm / metadata / metadata_set.py: 75%
73 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
1#!/usr/bin/python
3# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7# -*- coding: utf-8 -*-
8from __future__ import annotations
10from typing import TYPE_CHECKING, cast
12from oc_ocdm.counter_handler.counter_handler import CounterHandler
13from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler
14from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler
15from oc_ocdm.metadata.entities.dataset import Dataset
16from oc_ocdm.metadata.entities.distribution import Distribution
17from oc_ocdm.support.support import get_count, is_dataset, get_short_name
19if TYPE_CHECKING:
20 from typing import Dict, Tuple, ClassVar
22from rdflib import Graph, URIRef
24from oc_ocdm.metadata.metadata_entity import MetadataEntity
25from oc_ocdm.abstract_set import AbstractSet
28class MetadataSet(AbstractSet[MetadataEntity]):
29 # Labels
30 labels: ClassVar[Dict[str, str]] = {
31 "_dataset_": "dataset",
32 "di": "distribution"
33 }
35 def __init__(self, base_iri: str, info_dir: str = "", wanted_label: bool = True) -> None:
36 super(MetadataSet, self).__init__()
37 # The following variable maps a URIRef with the related metadata entity
38 self.res_to_entity: Dict[URIRef, MetadataEntity] = {}
39 self.base_iri: str = base_iri
40 if self.base_iri[-1] != '/':
41 self.base_iri += '/'
42 self.wanted_label: bool = wanted_label
44 if info_dir is not None and info_dir != "":
45 self.counter_handler: CounterHandler = FilesystemCounterHandler(info_dir)
46 else:
47 self.counter_handler: CounterHandler = InMemoryCounterHandler()
49 def get_entity(self, res: URIRef) -> MetadataEntity | None:
50 if res in self.res_to_entity:
51 return self.res_to_entity[res]
53 def add_dataset(self, dataset_name: str, resp_agent: str, source: str | None = None, res: URIRef | None = None,
54 preexisting_graph: Graph | None = None) -> Dataset:
55 if res is not None and not is_dataset(res):
56 raise ValueError(f"Given res: <{res}> is inappropriate for a Dataset entity.")
57 if res is not None and res in self.res_to_entity:
58 return cast(Dataset, self.res_to_entity[res])
59 # Here we use a fictitious short name for Dataset, since the OCDM document doesn't specify
60 # any particular short name for this type of entity. It's only used internally to distinguish
61 # between different metadata entities but it's meaningless outside of this scope.
62 cur_g, count, label = self._add_metadata("_dataset_", dataset_name, res)
63 return Dataset(cur_g, self.base_iri, dataset_name, self,
64 MetadataEntity.iri_dataset, res,
65 resp_agent, source, count, label, "_dataset_",
66 preexisting_graph)
68 def add_di(self, dataset_name: str, resp_agent: str, source: str | None = None,
69 res: URIRef | None = None, preexisting_graph: Graph | None = None) -> Distribution:
70 if res is not None and get_short_name(res) != "di":
71 raise ValueError(f"Given res: <{res}> is inappropriate for a Distribution entity.")
72 if res is not None and res in self.res_to_entity:
73 return cast(Distribution, self.res_to_entity[res])
74 cur_g, count, label = self._add_metadata("di", dataset_name, res)
75 return Distribution(cur_g, self.base_iri, dataset_name, self,
76 MetadataEntity.iri_datafile, res,
77 resp_agent, source, count, label, "di",
78 preexisting_graph)
80 def _add_metadata(self, short_name: str, dataset_name: str,
81 res: URIRef | None = None) -> Tuple[Graph, str | None, str | None]:
82 cur_g: Graph = Graph()
83 self._set_ns(cur_g)
85 count: str | None = None
86 label: str | None = None
88 if res is not None:
89 if short_name != '_dataset_': # Datasets don't have a counter associated with them...
90 try:
91 res_count: int = int(get_count(res))
92 except ValueError:
93 res_count: int = -1
94 if res_count > self.counter_handler.read_metadata_counter(short_name, dataset_name):
95 self.counter_handler.set_metadata_counter(res_count, short_name, dataset_name)
96 return cur_g, count, label
98 if short_name != '_dataset_': # Datasets don't have a counter associated with them...
99 count = str(self.counter_handler.increment_metadata_counter(short_name, dataset_name))
101 if self.wanted_label:
102 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count)
104 return cur_g, count, label
106 def commit_changes(self):
107 for res, entity in self.res_to_entity.items():
108 entity.commit_changes()
109 if entity.to_be_deleted:
110 del self.res_to_entity[res]
112 @staticmethod
113 def _set_ns(g: Graph) -> None:
114 g.namespace_manager.bind("dcterms", MetadataEntity.DCTERMS)
115 g.namespace_manager.bind("dcat", MetadataEntity.DCAT)
116 g.namespace_manager.bind("void", MetadataEntity.VOID)
118 def get_dataset(self) -> tuple[Dataset, ...]:
119 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Dataset))
121 def get_di(self) -> tuple[Distribution, ...]:
122 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Distribution))