Coverage for oc_meta / run / infodir / check.py: 0%
53 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-03 17:25 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-03 17:25 +0000
1import argparse
2import os
3import zipfile
4from multiprocessing import Pool, cpu_count
6from oc_ocdm.support import get_prefix, get_resource_number, get_short_name
7from rdflib import Dataset, URIRef
8from rdflib.namespace import PROV, RDF
9from redis import Redis
10from rich_argparse import RichHelpFormatter
11from tqdm import tqdm
14def process_zip_file(args):
15 zip_file, redis_host, redis_port, redis_db = args
16 redis_client = Redis(host=redis_host, port=redis_port, db=redis_db)
17 missing_entities = []
19 with zipfile.ZipFile(zip_file, 'r') as zip_ref:
20 for file_name in zip_ref.namelist():
21 with zip_ref.open(file_name) as entity_file:
22 g = Dataset(default_union=True)
23 g.parse(data=entity_file.read(), format='json-ld')
25 for s, p, o in g.triples((None, RDF.type, PROV.Entity)):
26 prov_entity_uri = str(s)
27 entity_uri = prov_entity_uri.split('/prov/se/')[0]
28 entity_uri_ref = URIRef(entity_uri)
29 supplier_prefix = get_prefix(entity_uri_ref)
30 short_name = get_short_name(entity_uri_ref)
31 resource_number = get_resource_number(entity_uri_ref)
33 expected_key = f"{short_name}:{supplier_prefix}:{resource_number}:se"
35 if not redis_client.exists(expected_key):
36 print("\nEntità mancante trovata:")
37 print(f"URI: {entity_uri}")
38 print(f"Prov URI: {prov_entity_uri}")
39 print(f"Chiave Redis attesa: {expected_key}")
40 print("---")
42 missing_entities.append({
43 "URI": entity_uri,
44 "Prov URI": prov_entity_uri,
45 "Chiave Redis attesa": expected_key
46 })
48 return missing_entities
50def explore_provenance_files(root_path, redis_host, redis_port, redis_db):
51 prov_zip_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(root_path)
52 for f in filenames if f.endswith('.zip') and 'prov' in dp]
54 args_list = [(zip_file, redis_host, redis_port, redis_db) for zip_file in prov_zip_files]
56 num_processes = cpu_count() # Usa tutti i core disponibili
57 with Pool(processes=num_processes) as pool:
58 results = list(tqdm(pool.imap(process_zip_file, args_list), total=len(args_list), desc="Processing provenance zip files"))
60 all_missing_entities = [item for sublist in results for item in sublist]
62 print(f"\nTotale entità mancanti trovate: {len(all_missing_entities)}")
64def main():
65 parser = argparse.ArgumentParser(
66 description="Verifica la presenza di entità di provenance in Redis.",
67 formatter_class=RichHelpFormatter,
68 )
69 parser.add_argument("directory", type=str, help="Il percorso della directory da esplorare")
70 parser.add_argument("--redis-host", type=str, default="localhost", help="L'host del server Redis")
71 parser.add_argument("--redis-port", type=int, default=6379, help="La porta del server Redis")
72 parser.add_argument("--redis-db", type=int, default=6, help="Il numero del database Redis da utilizzare")
73 args = parser.parse_args()
75 explore_provenance_files(args.directory, args.redis_host, args.redis_port, args.redis_db)
77if __name__ == "__main__":
78 main()