Coverage for src / piccione / upload / on_figshare.py: 81%

118 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-03-21 11:49 +0000

1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import argparse 

6import hashlib 

7import json 

8import os 

9import time 

10 

11import requests 

12import yaml 

13from requests.exceptions import HTTPError 

14from tqdm import tqdm 

15 

16BASE_URL = "https://api.figshare.com/v2/account/articles" 

17CHUNK_SIZE = 1048576 

18 

19 

20def get_file_check_data(file_name): 

21 with open(file_name, "rb") as fin: 

22 md5 = hashlib.md5() 

23 size = 0 

24 data = fin.read(CHUNK_SIZE) 

25 while data: 

26 size += len(data) 

27 md5.update(data) 

28 data = fin.read(CHUNK_SIZE) 

29 return md5.hexdigest(), size 

30 

31 

32def issue_request(method, url, token, data=None, binary=False): 

33 headers = {"Authorization": "token " + token} 

34 if data is not None and not binary: 

35 data = json.dumps(data) 

36 

37 attempt = 0 

38 while True: 

39 attempt += 1 

40 try: 

41 response = requests.request(method, url, headers=headers, data=data, timeout=(30, 300)) 

42 if response.status_code >= 500: 

43 print(f"[ERROR] Server error {response.status_code}: {response.text[:200]}") 

44 wait = min(2 ** (attempt - 1), 60) 

45 print(f"Retrying in {wait}s...") 

46 time.sleep(wait) 

47 continue 

48 response.raise_for_status() 

49 try: 

50 return json.loads(response.content) 

51 except ValueError: 

52 return response.content 

53 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: 

54 print(f"[ERROR] Network error: {e}") 

55 wait = min(2 ** (attempt - 1), 60) 

56 print(f"Retrying in {wait}s...") 

57 time.sleep(wait) 

58 except HTTPError as e: 

59 print(f"[ERROR] HTTP error: {e}") 

60 print("Body:", response.text) 

61 raise 

62 

63 

64def upload_parts(file_info, file_path, token): 

65 url = file_info["upload_url"] 

66 result = issue_request(method="GET", url=url, token=token) 

67 print(f"\nUploading {os.path.basename(file_path)}:") 

68 

69 total_size = sum( 

70 part["endOffset"] - part["startOffset"] + 1 for part in result["parts"] 

71 ) 

72 

73 with open(file_path, "rb") as fin: 

74 with tqdm( 

75 total=total_size, unit="B", unit_scale=True, unit_divisor=1024 

76 ) as pbar: 

77 for part in result["parts"]: 

78 chunk_size = part["endOffset"] - part["startOffset"] + 1 

79 upload_part(file_info, fin, part, token) 

80 pbar.update(chunk_size) 

81 

82 

83def upload_part(file_info, stream, part, token): 

84 udata = file_info.copy() 

85 udata.update(part) 

86 url = "{upload_url}/{partNo}".format(**udata) 

87 stream.seek(part["startOffset"]) 

88 data = stream.read(part["endOffset"] - part["startOffset"] + 1) 

89 issue_request(method="PUT", url=url, data=data, binary=True, token=token) 

90 print(" Uploaded part {partNo} from {startOffset} to {endOffset}".format(**part)) 

91 

92 

93def get_existing_files(article_id, token): 

94 url = f"{BASE_URL}/{article_id}/files" 

95 headers = {"Authorization": f"token {token}"} 

96 response = requests.get(url, headers=headers) 

97 response.raise_for_status() 

98 return {f["name"]: {"id": f["id"], "md5": f["computed_md5"]} for f in response.json()} 

99 

100 

101def delete_file(article_id, file_id, token): 

102 url = f"{BASE_URL}/{article_id}/files/{file_id}" 

103 headers = {"Authorization": f"token {token}"} 

104 response = requests.delete(url, headers=headers) 

105 response.raise_for_status() 

106 

107 

108def create_file(article_id, file_name, file_path, token): 

109 url = f"{BASE_URL}/{article_id}/files" 

110 headers = {"Authorization": f"token {token}"} 

111 md5, size = get_file_check_data(file_path) 

112 data = {"name": os.path.basename(file_name), "md5": md5, "size": size} 

113 post_response = requests.post(url, headers=headers, json=data) 

114 post_response.raise_for_status() 

115 get_response = requests.get(post_response.json()["location"], headers=headers) 

116 get_response.raise_for_status() 

117 return get_response.json() 

118 

119 

120def complete_upload(article_id, file_id, token): 

121 url = f"{BASE_URL}/{article_id}/files/{file_id}" 

122 issue_request(method="POST", url=url, token=token) 

123 print(f" Upload completion confirmed for file {file_id}") 

124 

125 

126def main(config_path): 

127 with open(config_path) as f: 

128 config = yaml.safe_load(f) 

129 

130 token = config["TOKEN"] 

131 article_id = config["ARTICLE_ID"] 

132 files_to_upload = config["files_to_upload"] 

133 

134 print(f"Starting upload of {len(files_to_upload)} files to Figshare...") 

135 existing_files = get_existing_files(article_id, token) 

136 print(f"Found {len(existing_files)} existing files in article") 

137 

138 for file_path in tqdm(files_to_upload, desc="Total progress", unit="file"): 

139 file_name = os.path.basename(file_path) 

140 local_md5, _ = get_file_check_data(file_path) 

141 

142 if file_name in existing_files: 

143 if existing_files[file_name]["md5"] == local_md5: 

144 print(f"\n[SKIP] {file_name} (already uploaded, MD5 matches)") 

145 continue 

146 print(f"\n[REPLACE] {file_name} (MD5 mismatch, deleting old version)") 

147 delete_file(article_id, existing_files[file_name]["id"], token) 

148 

149 print(f"\nPreparing {file_name}...") 

150 file_info = create_file(article_id, file_name, file_path, token) 

151 upload_parts(file_info, file_path, token) 

152 complete_upload(article_id, file_info["id"], token) 

153 print(f"[OK] {file_name} completed") 

154 

155 print("\nAll files uploaded successfully to Figshare!") 

156 

157 

158if __name__ == "__main__": # pragma: no cover 

159 parser = argparse.ArgumentParser(description="Upload files to Figshare.") 

160 parser.add_argument("config", help="Path to the YAML configuration file.") 

161 args = parser.parse_args() 

162 main(args.config)