blob: c83158d608212e2e1b96de7cd54fa7048bb17faf [file] [log] [blame]
Milan Verespej9dfa7642019-05-07 14:50:20 +02001#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4# COPYRIGHT NOTICE STARTS HERE
5
6# Copyright 2019 © Samsung Electronics Co., Ltd.
7#
8# Licensed under the Apache License, Version 2.0 (the "License");
9# you may not use this file except in compliance with the License.
10# You may obtain a copy of the License at
11#
12# http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17# See the License for the specific language governing permissions and
18# limitations under the License.
19
20# COPYRIGHT NOTICE ENDS HERE
21
22
23import argparse
24import concurrent.futures
25import logging
26import os
27import sys
28from retrying import retry
29
30import base
31
32log = logging.getLogger(__name__)
33
34@retry(stop_max_attempt_number=5, wait_fixed=2000)
35def get_file(file_uri):
36 """
37 Get file from the Internet
38 :param file_uri: address of file
39 :return: byte content of file
40 """
41 if not file_uri.startswith('http'):
42 file_uri = 'http://' + file_uri
43 file_req = base.make_get_request(file_uri)
44 return file_req.content
45
46
47def download_file(file_uri, dst_dir):
48 """
49 Download http file and save it to file.
50 :param file_uri: http address of file
51 :param dst_dir: directory where file will be saved
52 """
53 log.info('Downloading: {}'.format(file_uri))
54 dst_path = '{}/{}'.format(dst_dir, file_uri.rsplit('//')[-1])
55 try:
56 file_content = get_file(file_uri)
57 base.save_to_file(dst_path, file_content)
58 except Exception as err:
59 if os.path.isfile(dst_path):
60 os.remove(dst_path)
61 log.error('Error downloading: {}: {}'.format(file_uri, err))
62 raise err
63 log.info('Downloaded: {}'.format(file_uri))
64
65
66def missing(file_set, dst_dir):
67 return {file for file in file_set if not os.path.isfile('{}/{}'.format(dst_dir, file))}
68
69
70def download(data_list, dst_dir, check, progress, workers=None):
71 """
72 Download files specified in data list
73 :param data_list: path to file with list
74 :param dst_dir: destination directory
75 :param check: boolean check mode
76 :param progress: progressbar.ProgressBar to monitor progress
77 :param workers: workers to use for parallel execution
78 :return: 0 if success else number of errors
79 """
80 file_set = base.load_list(data_list)
81 missing_files = missing(file_set, dst_dir)
82 target_count = len(file_set)
83
84 if check:
85 log.info(base.simple_check_table(file_set, missing_files))
Milan Verespej455be472019-05-23 14:21:19 +020086 return
Milan Verespej9dfa7642019-05-07 14:50:20 +020087
88 skipping = file_set - missing_files
89
90 base.start_progress(progress, len(file_set), skipping, log)
91
92 error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir)
93
Milan Verespej455be472019-05-23 14:21:19 +020094 base.finish_progress(progress, error_count, log)
Milan Verespej9dfa7642019-05-07 14:50:20 +020095 if error_count > 0:
96 log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count))
Milan Verespej455be472019-05-23 14:21:19 +020097 raise RuntimeError()
Milan Verespej9dfa7642019-05-07 14:50:20 +020098
Milan Verespej9dfa7642019-05-07 14:50:20 +020099
100def run_cli():
101 """
102 Run as cli tool
103 """
104 parser = argparse.ArgumentParser(description='Download http files from list')
105 parser.add_argument('file_list', metavar='file-list',
106 help='File with list of http files to download')
107 parser.add_argument('--output-dir', '-o', default=os.getcwd(),
108 help='Destination directory for saving')
109 parser.add_argument('--check', '-c', action='store_true', default=False,
110 help='Check mode')
111 parser.add_argument('--debug', action='store_true', default=False,
112 help='Turn on debug output')
113 parser.add_argument('--workers', type=int, default=None,
114 help='Set maximum workers for parallel download (default: cores * 5)')
115
116 args = parser.parse_args()
117
118 if args.debug:
119 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
120 else:
121 logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
122
123 progress = base.init_progress('http files') if not args.check else None
124
Milan Verespej455be472019-05-23 14:21:19 +0200125 try:
126 download(args.file_list, args.output_dir, args.check, progress, args.workers)
127 except RuntimeError:
128 sys.exit(1)
Milan Verespej9dfa7642019-05-07 14:50:20 +0200129
130
131if __name__ == '__main__':
132 run_cli()
133