Coverage for src / piccione / upload / on_figshare.py: 81%
118 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-21 11:49 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-21 11:49 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import argparse
6import hashlib
7import json
8import os
9import time
11import requests
12import yaml
13from requests.exceptions import HTTPError
14from tqdm import tqdm
16BASE_URL = "https://api.figshare.com/v2/account/articles"
17CHUNK_SIZE = 1048576
20def get_file_check_data(file_name):
21 with open(file_name, "rb") as fin:
22 md5 = hashlib.md5()
23 size = 0
24 data = fin.read(CHUNK_SIZE)
25 while data:
26 size += len(data)
27 md5.update(data)
28 data = fin.read(CHUNK_SIZE)
29 return md5.hexdigest(), size
32def issue_request(method, url, token, data=None, binary=False):
33 headers = {"Authorization": "token " + token}
34 if data is not None and not binary:
35 data = json.dumps(data)
37 attempt = 0
38 while True:
39 attempt += 1
40 try:
41 response = requests.request(method, url, headers=headers, data=data, timeout=(30, 300))
42 if response.status_code >= 500:
43 print(f"[ERROR] Server error {response.status_code}: {response.text[:200]}")
44 wait = min(2 ** (attempt - 1), 60)
45 print(f"Retrying in {wait}s...")
46 time.sleep(wait)
47 continue
48 response.raise_for_status()
49 try:
50 return json.loads(response.content)
51 except ValueError:
52 return response.content
53 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
54 print(f"[ERROR] Network error: {e}")
55 wait = min(2 ** (attempt - 1), 60)
56 print(f"Retrying in {wait}s...")
57 time.sleep(wait)
58 except HTTPError as e:
59 print(f"[ERROR] HTTP error: {e}")
60 print("Body:", response.text)
61 raise
64def upload_parts(file_info, file_path, token):
65 url = file_info["upload_url"]
66 result = issue_request(method="GET", url=url, token=token)
67 print(f"\nUploading {os.path.basename(file_path)}:")
69 total_size = sum(
70 part["endOffset"] - part["startOffset"] + 1 for part in result["parts"]
71 )
73 with open(file_path, "rb") as fin:
74 with tqdm(
75 total=total_size, unit="B", unit_scale=True, unit_divisor=1024
76 ) as pbar:
77 for part in result["parts"]:
78 chunk_size = part["endOffset"] - part["startOffset"] + 1
79 upload_part(file_info, fin, part, token)
80 pbar.update(chunk_size)
83def upload_part(file_info, stream, part, token):
84 udata = file_info.copy()
85 udata.update(part)
86 url = "{upload_url}/{partNo}".format(**udata)
87 stream.seek(part["startOffset"])
88 data = stream.read(part["endOffset"] - part["startOffset"] + 1)
89 issue_request(method="PUT", url=url, data=data, binary=True, token=token)
90 print(" Uploaded part {partNo} from {startOffset} to {endOffset}".format(**part))
93def get_existing_files(article_id, token):
94 url = f"{BASE_URL}/{article_id}/files"
95 headers = {"Authorization": f"token {token}"}
96 response = requests.get(url, headers=headers)
97 response.raise_for_status()
98 return {f["name"]: {"id": f["id"], "md5": f["computed_md5"]} for f in response.json()}
101def delete_file(article_id, file_id, token):
102 url = f"{BASE_URL}/{article_id}/files/{file_id}"
103 headers = {"Authorization": f"token {token}"}
104 response = requests.delete(url, headers=headers)
105 response.raise_for_status()
108def create_file(article_id, file_name, file_path, token):
109 url = f"{BASE_URL}/{article_id}/files"
110 headers = {"Authorization": f"token {token}"}
111 md5, size = get_file_check_data(file_path)
112 data = {"name": os.path.basename(file_name), "md5": md5, "size": size}
113 post_response = requests.post(url, headers=headers, json=data)
114 post_response.raise_for_status()
115 get_response = requests.get(post_response.json()["location"], headers=headers)
116 get_response.raise_for_status()
117 return get_response.json()
120def complete_upload(article_id, file_id, token):
121 url = f"{BASE_URL}/{article_id}/files/{file_id}"
122 issue_request(method="POST", url=url, token=token)
123 print(f" Upload completion confirmed for file {file_id}")
126def main(config_path):
127 with open(config_path) as f:
128 config = yaml.safe_load(f)
130 token = config["TOKEN"]
131 article_id = config["ARTICLE_ID"]
132 files_to_upload = config["files_to_upload"]
134 print(f"Starting upload of {len(files_to_upload)} files to Figshare...")
135 existing_files = get_existing_files(article_id, token)
136 print(f"Found {len(existing_files)} existing files in article")
138 for file_path in tqdm(files_to_upload, desc="Total progress", unit="file"):
139 file_name = os.path.basename(file_path)
140 local_md5, _ = get_file_check_data(file_path)
142 if file_name in existing_files:
143 if existing_files[file_name]["md5"] == local_md5:
144 print(f"\n[SKIP] {file_name} (already uploaded, MD5 matches)")
145 continue
146 print(f"\n[REPLACE] {file_name} (MD5 mismatch, deleting old version)")
147 delete_file(article_id, existing_files[file_name]["id"], token)
149 print(f"\nPreparing {file_name}...")
150 file_info = create_file(article_id, file_name, file_path, token)
151 upload_parts(file_info, file_path, token)
152 complete_upload(article_id, file_info["id"], token)
153 print(f"[OK] {file_name} completed")
155 print("\nAll files uploaded successfully to Figshare!")
158if __name__ == "__main__": # pragma: no cover
159 parser = argparse.ArgumentParser(description="Upload files to Figshare.")
160 parser.add_argument("config", help="Path to the YAML configuration file.")
161 args = parser.parse_args()
162 main(args.config)