Coverage for oc_ocdm/metadata/metadata_set.py: 62%

85 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-05-30 22:05 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING 

19 

20from oc_ocdm.counter_handler.counter_handler import CounterHandler 

21from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler 

22from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler 

23from oc_ocdm.metadata.entities.dataset import Dataset 

24from oc_ocdm.metadata.entities.distribution import Distribution 

25from oc_ocdm.support.support import get_count, is_dataset, get_short_name 

26 

27if TYPE_CHECKING: 

28 from typing import Dict, Optional, Tuple, ClassVar 

29 

30from rdflib import Graph, URIRef 

31 

32from oc_ocdm.metadata.metadata_entity import MetadataEntity 

33from oc_ocdm.abstract_set import AbstractSet 

34 

35 

36class MetadataSet(AbstractSet): 

37 # Labels 

38 labels: ClassVar[Dict[str, str]] = { 

39 "_dataset_": "dataset", 

40 "di": "distribution" 

41 } 

42 

43 def __init__(self, base_iri: str, info_dir: str = "", wanted_label: bool = True) -> None: 

44 super(MetadataSet, self).__init__() 

45 # The following variable maps a URIRef with the related metadata entity 

46 self.res_to_entity: Dict[URIRef, MetadataEntity] = {} 

47 self.base_iri: str = base_iri 

48 if self.base_iri[-1] != '/': 

49 self.base_iri += '/' 

50 self.wanted_label: bool = wanted_label 

51 

52 if info_dir is not None and info_dir != "": 

53 self.counter_handler: CounterHandler = FilesystemCounterHandler(info_dir) 

54 else: 

55 self.counter_handler: CounterHandler = InMemoryCounterHandler() 

56 

57 def get_entity(self, res: URIRef) -> Optional[MetadataEntity]: 

58 if res in self.res_to_entity: 

59 return self.res_to_entity[res] 

60 

61 def add_dataset(self, dataset_name: str, resp_agent: str, source: str = None, res: URIRef = None, 

62 preexisting_graph: Graph = None) -> Dataset: 

63 if res is not None and not is_dataset(res): 

64 raise ValueError(f"Given res: <{res}> is inappropriate for a Dataset entity.") 

65 if res is not None and res in self.res_to_entity: 

66 return self.res_to_entity[res] 

67 # Here we use a fictitious short name for Dataset, since the OCDM document doesn't specify 

68 # any particular short name for this type of entity. It's only used internally to distinguish 

69 # between different metadata entities but it's meaningless outside of this scope. 

70 cur_g, count, label = self._add_metadata("_dataset_", dataset_name, res) 

71 return Dataset(cur_g, self.base_iri, dataset_name, self, res, 

72 MetadataEntity.iri_dataset, resp_agent, 

73 source, count, label, "_dataset_", preexisting_graph) 

74 

75 def add_di(self, dataset_name: str, resp_agent: str, source: str = None, 

76 res: URIRef = None, preexisting_graph: Graph = None) -> Distribution: 

77 if res is not None and get_short_name(res) != "di": 

78 raise ValueError(f"Given res: <{res}> is inappropriate for a Distribution entity.") 

79 if res is not None and res in self.res_to_entity: 

80 return self.res_to_entity[res] 

81 cur_g, count, label = self._add_metadata("di", dataset_name, res) 

82 return Distribution(cur_g, self.base_iri, dataset_name, self, res, 

83 MetadataEntity.iri_datafile, resp_agent, 

84 source, count, label, "di", preexisting_graph) 

85 

86 def _add_metadata(self, short_name: str, dataset_name: str, 

87 res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]: 

88 cur_g: Graph = Graph() 

89 self._set_ns(cur_g) 

90 

91 count: Optional[str] = None 

92 label: Optional[str] = None 

93 

94 if res is not None: 

95 if short_name != '_dataset_': # Datasets don't have a counter associated with them... 

96 try: 

97 res_count: int = int(get_count(res)) 

98 except ValueError: 

99 res_count: int = -1 

100 if res_count > self.counter_handler.read_metadata_counter(short_name, dataset_name): 

101 self.counter_handler.set_metadata_counter(res_count, short_name, dataset_name) 

102 return cur_g, count, label 

103 

104 if short_name != '_dataset_': # Datasets don't have a counter associated with them... 

105 count = str(self.counter_handler.increment_metadata_counter(short_name, dataset_name)) 

106 

107 if self.wanted_label: 

108 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

109 

110 return cur_g, count, label 

111 

112 def commit_changes(self): 

113 for res, entity in self.res_to_entity.items(): 

114 entity.commit_changes() 

115 if entity.to_be_deleted: 

116 del self.res_to_entity[res] 

117 

118 @staticmethod 

119 def _set_ns(g: Graph) -> None: 

120 g.namespace_manager.bind("dcterms", MetadataEntity.DCTERMS) 

121 g.namespace_manager.bind("dcat", MetadataEntity.DCAT) 

122 g.namespace_manager.bind("void", MetadataEntity.VOID) 

123 

124 def get_dataset(self) -> Tuple[Dataset]: 

125 result: Tuple[Dataset] = tuple() 

126 for ref in self.res_to_entity: 

127 entity: MetadataEntity = self.res_to_entity[ref] 

128 if isinstance(entity, Dataset): 

129 result += (entity, ) 

130 return result 

131 

132 def get_di(self) -> Tuple[Distribution]: 

133 result: Tuple[Distribution] = tuple() 

134 for ref in self.res_to_entity: 

135 entity: MetadataEntity = self.res_to_entity[ref] 

136 if isinstance(entity, Distribution): 

137 result += (entity, ) 

138 return result