Coverage for src / piccione / upload / on_figshare.py: 81%

118 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-02-28 16:52 +0000

1import argparse 

2import hashlib 

3import json 

4import os 

5import time 

6 

7import requests 

8import yaml 

9from requests.exceptions import HTTPError 

10from tqdm import tqdm 

11 

12BASE_URL = "https://api.figshare.com/v2/account/articles" 

13CHUNK_SIZE = 1048576 

14 

15 

16def get_file_check_data(file_name): 

17 with open(file_name, "rb") as fin: 

18 md5 = hashlib.md5() 

19 size = 0 

20 data = fin.read(CHUNK_SIZE) 

21 while data: 

22 size += len(data) 

23 md5.update(data) 

24 data = fin.read(CHUNK_SIZE) 

25 return md5.hexdigest(), size 

26 

27 

28def issue_request(method, url, token, data=None, binary=False): 

29 headers = {"Authorization": "token " + token} 

30 if data is not None and not binary: 

31 data = json.dumps(data) 

32 

33 attempt = 0 

34 while True: 

35 attempt += 1 

36 try: 

37 response = requests.request(method, url, headers=headers, data=data, timeout=(30, 300)) 

38 if response.status_code >= 500: 

39 print(f"[ERROR] Server error {response.status_code}: {response.text[:200]}") 

40 wait = min(2 ** (attempt - 1), 60) 

41 print(f"Retrying in {wait}s...") 

42 time.sleep(wait) 

43 continue 

44 response.raise_for_status() 

45 try: 

46 return json.loads(response.content) 

47 except ValueError: 

48 return response.content 

49 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: 

50 print(f"[ERROR] Network error: {e}") 

51 wait = min(2 ** (attempt - 1), 60) 

52 print(f"Retrying in {wait}s...") 

53 time.sleep(wait) 

54 except HTTPError as e: 

55 print(f"[ERROR] HTTP error: {e}") 

56 print("Body:", response.text) 

57 raise 

58 

59 

60def upload_parts(file_info, file_path, token): 

61 url = file_info["upload_url"] 

62 result = issue_request(method="GET", url=url, token=token) 

63 print(f"\nUploading {os.path.basename(file_path)}:") 

64 

65 total_size = sum( 

66 part["endOffset"] - part["startOffset"] + 1 for part in result["parts"] 

67 ) 

68 

69 with open(file_path, "rb") as fin: 

70 with tqdm( 

71 total=total_size, unit="B", unit_scale=True, unit_divisor=1024 

72 ) as pbar: 

73 for part in result["parts"]: 

74 chunk_size = part["endOffset"] - part["startOffset"] + 1 

75 upload_part(file_info, fin, part, token) 

76 pbar.update(chunk_size) 

77 

78 

79def upload_part(file_info, stream, part, token): 

80 udata = file_info.copy() 

81 udata.update(part) 

82 url = "{upload_url}/{partNo}".format(**udata) 

83 stream.seek(part["startOffset"]) 

84 data = stream.read(part["endOffset"] - part["startOffset"] + 1) 

85 issue_request(method="PUT", url=url, data=data, binary=True, token=token) 

86 print(" Uploaded part {partNo} from {startOffset} to {endOffset}".format(**part)) 

87 

88 

89def get_existing_files(article_id, token): 

90 url = f"{BASE_URL}/{article_id}/files" 

91 headers = {"Authorization": f"token {token}"} 

92 response = requests.get(url, headers=headers) 

93 response.raise_for_status() 

94 return {f["name"]: {"id": f["id"], "md5": f["computed_md5"]} for f in response.json()} 

95 

96 

97def delete_file(article_id, file_id, token): 

98 url = f"{BASE_URL}/{article_id}/files/{file_id}" 

99 headers = {"Authorization": f"token {token}"} 

100 response = requests.delete(url, headers=headers) 

101 response.raise_for_status() 

102 

103 

104def create_file(article_id, file_name, file_path, token): 

105 url = f"{BASE_URL}/{article_id}/files" 

106 headers = {"Authorization": f"token {token}"} 

107 md5, size = get_file_check_data(file_path) 

108 data = {"name": os.path.basename(file_name), "md5": md5, "size": size} 

109 post_response = requests.post(url, headers=headers, json=data) 

110 post_response.raise_for_status() 

111 get_response = requests.get(post_response.json()["location"], headers=headers) 

112 get_response.raise_for_status() 

113 return get_response.json() 

114 

115 

116def complete_upload(article_id, file_id, token): 

117 url = f"{BASE_URL}/{article_id}/files/{file_id}" 

118 issue_request(method="POST", url=url, token=token) 

119 print(f" Upload completion confirmed for file {file_id}") 

120 

121 

122def main(config_path): 

123 with open(config_path) as f: 

124 config = yaml.safe_load(f) 

125 

126 token = config["TOKEN"] 

127 article_id = config["ARTICLE_ID"] 

128 files_to_upload = config["files_to_upload"] 

129 

130 print(f"Starting upload of {len(files_to_upload)} files to Figshare...") 

131 existing_files = get_existing_files(article_id, token) 

132 print(f"Found {len(existing_files)} existing files in article") 

133 

134 for file_path in tqdm(files_to_upload, desc="Total progress", unit="file"): 

135 file_name = os.path.basename(file_path) 

136 local_md5, _ = get_file_check_data(file_path) 

137 

138 if file_name in existing_files: 

139 if existing_files[file_name]["md5"] == local_md5: 

140 print(f"\n[SKIP] {file_name} (already uploaded, MD5 matches)") 

141 continue 

142 print(f"\n[REPLACE] {file_name} (MD5 mismatch, deleting old version)") 

143 delete_file(article_id, existing_files[file_name]["id"], token) 

144 

145 print(f"\nPreparing {file_name}...") 

146 file_info = create_file(article_id, file_name, file_path, token) 

147 upload_parts(file_info, file_path, token) 

148 complete_upload(article_id, file_info["id"], token) 

149 print(f"[OK] {file_name} completed") 

150 

151 print("\nAll files uploaded successfully to Figshare!") 

152 

153 

154if __name__ == "__main__": # pragma: no cover 

155 parser = argparse.ArgumentParser(description="Upload files to Figshare.") 

156 parser.add_argument("config", help="Path to the YAML configuration file.") 

157 args = parser.parse_args() 

158 main(args.config)