Coverage for src / piccione / upload / on_figshare.py: 81%
118 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-02-28 16:52 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-02-28 16:52 +0000
1import argparse
2import hashlib
3import json
4import os
5import time
7import requests
8import yaml
9from requests.exceptions import HTTPError
10from tqdm import tqdm
12BASE_URL = "https://api.figshare.com/v2/account/articles"
13CHUNK_SIZE = 1048576
16def get_file_check_data(file_name):
17 with open(file_name, "rb") as fin:
18 md5 = hashlib.md5()
19 size = 0
20 data = fin.read(CHUNK_SIZE)
21 while data:
22 size += len(data)
23 md5.update(data)
24 data = fin.read(CHUNK_SIZE)
25 return md5.hexdigest(), size
28def issue_request(method, url, token, data=None, binary=False):
29 headers = {"Authorization": "token " + token}
30 if data is not None and not binary:
31 data = json.dumps(data)
33 attempt = 0
34 while True:
35 attempt += 1
36 try:
37 response = requests.request(method, url, headers=headers, data=data, timeout=(30, 300))
38 if response.status_code >= 500:
39 print(f"[ERROR] Server error {response.status_code}: {response.text[:200]}")
40 wait = min(2 ** (attempt - 1), 60)
41 print(f"Retrying in {wait}s...")
42 time.sleep(wait)
43 continue
44 response.raise_for_status()
45 try:
46 return json.loads(response.content)
47 except ValueError:
48 return response.content
49 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
50 print(f"[ERROR] Network error: {e}")
51 wait = min(2 ** (attempt - 1), 60)
52 print(f"Retrying in {wait}s...")
53 time.sleep(wait)
54 except HTTPError as e:
55 print(f"[ERROR] HTTP error: {e}")
56 print("Body:", response.text)
57 raise
60def upload_parts(file_info, file_path, token):
61 url = file_info["upload_url"]
62 result = issue_request(method="GET", url=url, token=token)
63 print(f"\nUploading {os.path.basename(file_path)}:")
65 total_size = sum(
66 part["endOffset"] - part["startOffset"] + 1 for part in result["parts"]
67 )
69 with open(file_path, "rb") as fin:
70 with tqdm(
71 total=total_size, unit="B", unit_scale=True, unit_divisor=1024
72 ) as pbar:
73 for part in result["parts"]:
74 chunk_size = part["endOffset"] - part["startOffset"] + 1
75 upload_part(file_info, fin, part, token)
76 pbar.update(chunk_size)
79def upload_part(file_info, stream, part, token):
80 udata = file_info.copy()
81 udata.update(part)
82 url = "{upload_url}/{partNo}".format(**udata)
83 stream.seek(part["startOffset"])
84 data = stream.read(part["endOffset"] - part["startOffset"] + 1)
85 issue_request(method="PUT", url=url, data=data, binary=True, token=token)
86 print(" Uploaded part {partNo} from {startOffset} to {endOffset}".format(**part))
89def get_existing_files(article_id, token):
90 url = f"{BASE_URL}/{article_id}/files"
91 headers = {"Authorization": f"token {token}"}
92 response = requests.get(url, headers=headers)
93 response.raise_for_status()
94 return {f["name"]: {"id": f["id"], "md5": f["computed_md5"]} for f in response.json()}
97def delete_file(article_id, file_id, token):
98 url = f"{BASE_URL}/{article_id}/files/{file_id}"
99 headers = {"Authorization": f"token {token}"}
100 response = requests.delete(url, headers=headers)
101 response.raise_for_status()
104def create_file(article_id, file_name, file_path, token):
105 url = f"{BASE_URL}/{article_id}/files"
106 headers = {"Authorization": f"token {token}"}
107 md5, size = get_file_check_data(file_path)
108 data = {"name": os.path.basename(file_name), "md5": md5, "size": size}
109 post_response = requests.post(url, headers=headers, json=data)
110 post_response.raise_for_status()
111 get_response = requests.get(post_response.json()["location"], headers=headers)
112 get_response.raise_for_status()
113 return get_response.json()
116def complete_upload(article_id, file_id, token):
117 url = f"{BASE_URL}/{article_id}/files/{file_id}"
118 issue_request(method="POST", url=url, token=token)
119 print(f" Upload completion confirmed for file {file_id}")
122def main(config_path):
123 with open(config_path) as f:
124 config = yaml.safe_load(f)
126 token = config["TOKEN"]
127 article_id = config["ARTICLE_ID"]
128 files_to_upload = config["files_to_upload"]
130 print(f"Starting upload of {len(files_to_upload)} files to Figshare...")
131 existing_files = get_existing_files(article_id, token)
132 print(f"Found {len(existing_files)} existing files in article")
134 for file_path in tqdm(files_to_upload, desc="Total progress", unit="file"):
135 file_name = os.path.basename(file_path)
136 local_md5, _ = get_file_check_data(file_path)
138 if file_name in existing_files:
139 if existing_files[file_name]["md5"] == local_md5:
140 print(f"\n[SKIP] {file_name} (already uploaded, MD5 matches)")
141 continue
142 print(f"\n[REPLACE] {file_name} (MD5 mismatch, deleting old version)")
143 delete_file(article_id, existing_files[file_name]["id"], token)
145 print(f"\nPreparing {file_name}...")
146 file_info = create_file(article_id, file_name, file_path, token)
147 upload_parts(file_info, file_path, token)
148 complete_upload(article_id, file_info["id"], token)
149 print(f"[OK] {file_name} completed")
151 print("\nAll files uploaded successfully to Figshare!")
154if __name__ == "__main__": # pragma: no cover
155 parser = argparse.ArgumentParser(description="Upload files to Figshare.")
156 parser.add_argument("config", help="Path to the YAML configuration file.")
157 args = parser.parse_args()
158 main(args.config)