Coverage for oc_meta / run / infodir / check.py: 0%

53 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-03 17:25 +0000

1import argparse 

2import os 

3import zipfile 

4from multiprocessing import Pool, cpu_count 

5 

6from oc_ocdm.support import get_prefix, get_resource_number, get_short_name 

7from rdflib import Dataset, URIRef 

8from rdflib.namespace import PROV, RDF 

9from redis import Redis 

10from rich_argparse import RichHelpFormatter 

11from tqdm import tqdm 

12 

13 

14def process_zip_file(args): 

15 zip_file, redis_host, redis_port, redis_db = args 

16 redis_client = Redis(host=redis_host, port=redis_port, db=redis_db) 

17 missing_entities = [] 

18 

19 with zipfile.ZipFile(zip_file, 'r') as zip_ref: 

20 for file_name in zip_ref.namelist(): 

21 with zip_ref.open(file_name) as entity_file: 

22 g = Dataset(default_union=True) 

23 g.parse(data=entity_file.read(), format='json-ld') 

24 

25 for s, p, o in g.triples((None, RDF.type, PROV.Entity)): 

26 prov_entity_uri = str(s) 

27 entity_uri = prov_entity_uri.split('/prov/se/')[0] 

28 entity_uri_ref = URIRef(entity_uri) 

29 supplier_prefix = get_prefix(entity_uri_ref) 

30 short_name = get_short_name(entity_uri_ref) 

31 resource_number = get_resource_number(entity_uri_ref) 

32 

33 expected_key = f"{short_name}:{supplier_prefix}:{resource_number}:se" 

34 

35 if not redis_client.exists(expected_key): 

36 print("\nEntità mancante trovata:") 

37 print(f"URI: {entity_uri}") 

38 print(f"Prov URI: {prov_entity_uri}") 

39 print(f"Chiave Redis attesa: {expected_key}") 

40 print("---") 

41 

42 missing_entities.append({ 

43 "URI": entity_uri, 

44 "Prov URI": prov_entity_uri, 

45 "Chiave Redis attesa": expected_key 

46 }) 

47 

48 return missing_entities 

49 

50def explore_provenance_files(root_path, redis_host, redis_port, redis_db): 

51 prov_zip_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(root_path) 

52 for f in filenames if f.endswith('.zip') and 'prov' in dp] 

53 

54 args_list = [(zip_file, redis_host, redis_port, redis_db) for zip_file in prov_zip_files] 

55 

56 num_processes = cpu_count() # Usa tutti i core disponibili 

57 with Pool(processes=num_processes) as pool: 

58 results = list(tqdm(pool.imap(process_zip_file, args_list), total=len(args_list), desc="Processing provenance zip files")) 

59 

60 all_missing_entities = [item for sublist in results for item in sublist] 

61 

62 print(f"\nTotale entità mancanti trovate: {len(all_missing_entities)}") 

63 

64def main(): 

65 parser = argparse.ArgumentParser( 

66 description="Verifica la presenza di entità di provenance in Redis.", 

67 formatter_class=RichHelpFormatter, 

68 ) 

69 parser.add_argument("directory", type=str, help="Il percorso della directory da esplorare") 

70 parser.add_argument("--redis-host", type=str, default="localhost", help="L'host del server Redis") 

71 parser.add_argument("--redis-port", type=int, default=6379, help="La porta del server Redis") 

72 parser.add_argument("--redis-db", type=int, default=6, help="Il numero del database Redis da utilizzare") 

73 args = parser.parse_args() 

74 

75 explore_provenance_files(args.directory, args.redis_host, args.redis_port, args.redis_db) 

76 

77if __name__ == "__main__": 

78 main()