Coverage for src/piccione/download/from

1#!/usr/bin/python

2# -*- coding: utf-8 -*-

5# Permission to use, copy, modify, and/or distribute this software for any purpose

6# with or without fee is hereby granted, provided that the above copyright notice

7# and this permission notice appear in all copies.

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS

15# SOFTWARE.

17"""

18Download files from a Figshare article using the Figshare API.

20This script downloads all files associated with a Figshare article ID.

21It uses the public Figshare API which works reliably unlike direct wget/curl

22on Figshare URLs.

23"""

25import argparse

26import hashlib

27import sys

28from pathlib import Path

30import requests

31from tqdm import tqdm

33BASE_URL = "https://api.figshare.com/v2"

34CHUNK_SIZE = 8192

37def get_article_metadata(article_id):

38 """Retrieve article metadata from Figshare API."""

39 url = f"{BASE_URL}/articles/{article_id}"

40 response = requests.get(url)

41 response.raise_for_status()

42 article_data = response.json()

44 # Figshare API has a default limit of 10 files. We need to fetch files separately with pagination.

45 files_url = f"{BASE_URL}/articles/{article_id}/files"

46 files_response = requests.get(files_url, params={"page_size": 1000})

47 files_response.raise_for_status()

48 article_data['files'] = files_response.json()

50 return article_data

53def download_file(download_url, output_path, expected_size, expected_md5=None):

54 """Download a file from URL with progress bar and optional MD5 verification."""

55 response = requests.get(download_url, stream=True)

56 response.raise_for_status()

58 md5_hash = hashlib.md5()

60 with open(output_path, 'wb') as f:

61 with tqdm(total=expected_size, unit='B', unit_scale=True, unit_divisor=1024) as pbar:

62 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):

63 f.write(chunk)

64 md5_hash.update(chunk)

65 pbar.update(len(chunk))

67 if expected_md5:

68 actual_md5 = md5_hash.hexdigest()

69 if actual_md5 != expected_md5:

70 raise ValueError(f"MD5 mismatch: expected {expected_md5}, got {actual_md5}")

71 print(f" MD5 checksum verified: {actual_md5}")

74def main(): # pragma: no cover

75 parser = argparse.ArgumentParser(

76 description="Download files from a Figshare article"

77 )

78 parser.add_argument(

79 "article_id",

80 type=int,

81 help="Figshare article ID"

82 )

83 parser.add_argument(

84 "-o", "--output-dir",

85 type=Path,

86 default=Path("."),

87 help="Output directory for downloaded files (default: current directory)"

88 )

90 args = parser.parse_args()

92 args.output_dir.mkdir(parents=True, exist_ok=True)

94 print(f"Fetching metadata for article {args.article_id}...")

95 metadata = get_article_metadata(args.article_id)

97 files = metadata.get("files", [])

98 if not files:

99 print("No files found in this article")

100 return 1

101

102 print(f"\nFound {len(files)} file(s) to download:")

103 for f in files:

104 size_mb = f['size'] / (1024 * 1024)

105 print(f" - {f['name']} ({size_mb:.2f} MB)")

106

107 print(f"\nDownloading to: {args.output_dir.absolute()}\n")

108

109 for file_info in files:

110 filename = file_info['name']

111 download_url = file_info['download_url']

112 size = file_info['size']

113 md5 = file_info.get('supplied_md5')

114

115 output_path = args.output_dir / filename

116

117 print(f"Downloading {filename}...")

118 download_file(download_url, output_path, size, md5)

119 print(f" Saved to {output_path}\n")

120

121 print("All files downloaded successfully")

122 return 0

123

124

125if __name__ == "__main__": # pragma: no cover

126 sys.exit(main())

Coverage for src / piccione / download / from_figshare.py: 100%

33 statements