Coverage for src / piccione / upload / on_figshare.py: 81%
135 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-05-27 20:21 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-05-27 20:21 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import argparse
6import hashlib
7import json
8import time
9from pathlib import Path
10from typing import BinaryIO, TypedDict, cast
12import requests
13import yaml
14from requests.exceptions import HTTPError
15from rich.console import Console
16from tqdm import tqdm
18console = Console()
20BASE_URL = "https://api.figshare.com/v2/account/articles"
21CHUNK_SIZE = 1048576
22HTTP_INTERNAL_SERVER_ERROR = 500
25class FigshareFileInfo(TypedDict):
26 upload_url: str
27 id: int | str
30class FigsharePart(TypedDict):
31 partNo: int
32 startOffset: int
33 endOffset: int
36class FigsharePartsResponse(TypedDict):
37 parts: list[FigsharePart]
40class FigshareExistingFile(TypedDict):
41 id: int | str
42 md5: str
45def get_file_check_data(file_name: str | Path) -> tuple[str, int]:
46 with Path(file_name).open("rb") as fin:
47 md5 = hashlib.md5(usedforsecurity=False)
48 size = 0
49 data = fin.read(CHUNK_SIZE)
50 while data:
51 size += len(data)
52 md5.update(data)
53 data = fin.read(CHUNK_SIZE)
54 return md5.hexdigest(), size
57def issue_request(
58 method: str,
59 url: str,
60 token: str,
61 data: str | bytes | dict[str, object] | None = None,
62 *,
63 binary: bool = False,
64) -> dict[str, object] | bytes:
65 headers = {"Authorization": "token " + token}
66 if data is not None and not binary:
67 data = json.dumps(data)
69 attempt = 0
70 while True:
71 attempt += 1
72 try:
73 response = requests.request(method, url, headers=headers, data=data, timeout=(30, 300))
74 if response.status_code >= HTTP_INTERNAL_SERVER_ERROR:
75 console.print(f"[ERROR] Server error {response.status_code}: {response.text[:200]}")
76 wait = min(2 ** (attempt - 1), 60)
77 console.print(f"Retrying in {wait}s...")
78 time.sleep(wait)
79 continue
80 response.raise_for_status()
81 try:
82 return json.loads(response.content)
83 except ValueError:
84 return response.content
85 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
86 console.print(f"[ERROR] Network error: {e}")
87 wait = min(2 ** (attempt - 1), 60)
88 console.print(f"Retrying in {wait}s...")
89 time.sleep(wait)
90 except HTTPError as e:
91 console.print(f"[ERROR] HTTP error: {e}")
92 if e.response is not None:
93 console.print("Body:", e.response.text)
94 raise
97def upload_parts(file_info: FigshareFileInfo, file_path: str | Path, token: str) -> None:
98 result = issue_request(method="GET", url=file_info["upload_url"], token=token)
99 if not isinstance(result, dict):
100 msg = "Expected dict response"
101 raise TypeError(msg)
102 console.print(f"\nUploading {Path(file_path).name}:")
104 parts = cast("FigsharePartsResponse", result)["parts"]
105 total_size = sum(part["endOffset"] - part["startOffset"] + 1 for part in parts)
107 with (
108 Path(file_path).open("rb") as fin,
109 tqdm(total=total_size, unit="B", unit_scale=True, unit_divisor=1024) as pbar,
110 ):
111 for part in parts:
112 chunk_size = part["endOffset"] - part["startOffset"] + 1
113 upload_part(file_info, fin, part, token)
114 pbar.update(chunk_size)
117def upload_part(file_info: FigshareFileInfo, stream: BinaryIO, part: FigsharePart, token: str) -> None:
118 url = f"{file_info['upload_url']}/{part['partNo']}"
119 stream.seek(part["startOffset"])
120 data = stream.read(part["endOffset"] - part["startOffset"] + 1)
121 issue_request(method="PUT", url=url, data=data, binary=True, token=token)
122 console.print(" Uploaded part {partNo} from {startOffset} to {endOffset}".format_map(part))
125def get_existing_files(article_id: str, token: str) -> dict[str, FigshareExistingFile]:
126 url = f"{BASE_URL}/{article_id}/files"
127 headers = {"Authorization": f"token {token}"}
128 response = requests.get(url, headers=headers, timeout=30)
129 response.raise_for_status()
130 return {f["name"]: {"id": f["id"], "md5": f["computed_md5"]} for f in response.json()}
133def delete_file(article_id: str, file_id: str, token: str) -> None:
134 url = f"{BASE_URL}/{article_id}/files/{file_id}"
135 headers = {"Authorization": f"token {token}"}
136 response = requests.delete(url, headers=headers, timeout=30)
137 response.raise_for_status()
140def create_file(article_id: str, file_name: str, file_path: str | Path, token: str) -> FigshareFileInfo:
141 url = f"{BASE_URL}/{article_id}/files"
142 headers = {"Authorization": f"token {token}"}
143 md5, size = get_file_check_data(file_path)
144 data = {"name": Path(file_name).name, "md5": md5, "size": size}
145 post_response = requests.post(url, headers=headers, json=data, timeout=30)
146 post_response.raise_for_status()
147 get_response = requests.get(post_response.json()["location"], headers=headers, timeout=30)
148 get_response.raise_for_status()
149 return get_response.json()
152def complete_upload(article_id: str, file_id: str, token: str) -> None:
153 url = f"{BASE_URL}/{article_id}/files/{file_id}"
154 issue_request(method="POST", url=url, token=token)
155 console.print(f" Upload completion confirmed for file {file_id}")
158def main(config_path: str | Path) -> None:
159 with Path(config_path).open() as f:
160 config = yaml.safe_load(f)
162 token = config["TOKEN"]
163 article_id = config["ARTICLE_ID"]
164 files_to_upload = config["files_to_upload"]
166 console.print(f"Starting upload of {len(files_to_upload)} files to Figshare...")
167 existing_files = get_existing_files(article_id, token)
168 console.print(f"Found {len(existing_files)} existing files in article")
170 for file_path in tqdm(files_to_upload, desc="Total progress", unit="file"):
171 file_name = Path(file_path).name
172 local_md5, _ = get_file_check_data(file_path)
174 if file_name in existing_files:
175 if existing_files[file_name]["md5"] == local_md5:
176 console.print(f"\n[SKIP] {file_name} (already uploaded, MD5 matches)")
177 continue
178 console.print(f"\n[REPLACE] {file_name} (MD5 mismatch, deleting old version)")
179 delete_file(article_id, str(existing_files[file_name]["id"]), token)
181 console.print(f"\nPreparing {file_name}...")
182 file_info = create_file(article_id, file_name, file_path, token)
183 upload_parts(file_info, file_path, token)
184 complete_upload(article_id, str(file_info["id"]), token)
185 console.print(f"[OK] {file_name} completed")
187 console.print("\nAll files uploaded successfully to Figshare!")
190if __name__ == "__main__": # pragma: no cover
191 parser = argparse.ArgumentParser(description="Upload files to Figshare.")
192 parser.add_argument("config", help="Path to the YAML configuration file.")
193 args = parser.parse_args()
194 main(args.config)