blob: 69adc4dd51b16f1b921a4d52b49969aa6c5fa0dc [file] [log] [blame]
Milan Verespej2e1328a2019-06-18 13:40:08 +02001#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4# COPYRIGHT NOTICE STARTS HERE
5
6# Copyright 2019 © Samsung Electronics Co., Ltd.
7#
8# Licensed under the Apache License, Version 2.0 (the "License");
9# you may not use this file except in compliance with the License.
10# You may obtain a copy of the License at
11#
12# http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17# See the License for the specific language governing permissions and
18# limitations under the License.
19
20# COPYRIGHT NOTICE ENDS HERE
21
22import argparse
23import datetime
24import logging
25import os
26import sys
27import timeit
28
29import requests
30from retrying import retry
31
32import http_file
33from concurrent_downloader import ConcurrentDownloader
34
35log = logging.getLogger(__name__)
36
37
38class HttpDownloader(ConcurrentDownloader):
39 def __init__(self, *list_args, workers=None):
40 super().__init__('http files', *list_args, workers=workers)
41
42 @property
43 def check_table(self):
44 """
45 Table with information what items from lists are downloaded
46 """
47 self.missing()
48 header = ['Name', 'Downloaded']
49 return self._check_table(header, {'Name': 'l'},
50 ((item, item not in self._missing) for item
51 in self._data_list))
52
53 @staticmethod
54 def _make_get_request(url):
55 """
56 Run http get request
57 :param url: url to reqeuest
58 :return: requests.Response
59 """
60 req = requests.get(url)
61 req.raise_for_status()
62 return req
63
64 def _is_missing(self, item):
65 """
66 Check if item is missing (not downloaded)
67 :param item: item to check
68 :return: boolean
69 """
70 return not os.path.isfile(
71 '{}/{}'.format(self._data_list[item], item.rsplit('//')[-1]))
72
73 @retry(stop_max_attempt_number=5, wait_fixed=2000)
74 def _get_file(self, file_uri):
75 """
76 Get http file from uri
77 :param file_uri: uri of the file
78 :return: file content
79 """
80 if not file_uri.startswith('http'):
81 file_uri = 'http://' + file_uri
82 file_req = self._make_get_request(file_uri)
83 return file_req.content
84
85 def _download_item(self, item):
86 """
87 Download http file
88 :param item: http file to be downloaded (tuple: (uri, dst_dir))
89 """
90 log.info('Downloading: {}'.format(item[0]))
91 dst_path = '{}/{}'.format(item[1], item[0].rsplit('//')[-1])
92 try:
93 f = http_file.HttpFile(item[0], self._get_file(item[0]), dst_path)
94 f.save_to_file()
95 except Exception as err:
96 log.exception('Error downloading: {}: {}'.format(item[0], err))
97 if os.path.isfile(dst_path):
98 os.remove(dst_path)
99 raise err
100 log.info('Downloaded: {}'.format(f.name))
101
102
103def run_cli():
104 """
105 Run as cli tool
106 """
107 parser = argparse.ArgumentParser(description='Download http files from list')
108 parser.add_argument('file_list', metavar='file-list',
109 help='File with list of http files to download')
110 parser.add_argument('--output-dir', '-o', default=os.getcwd(),
111 help='Destination directory for saving')
112 parser.add_argument('--check', '-c', action='store_true', default=False,
113 help='Check mode')
114 parser.add_argument('--debug', action='store_true', default=False,
115 help='Turn on debug output')
116 parser.add_argument('--workers', type=int, default=None,
117 help='Set maximum workers for parallel download (default: cores * 5)')
118
119 args = parser.parse_args()
120
121 if args.debug:
122 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
123 else:
124 logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
125
126 downloader = HttpDownloader([args.file_list, args.output_dir], workers=args.workers)
127
128 if args.check:
129 log.info('Check mode. No download will be executed.')
130 log.info(downloader.check_table)
131 sys.exit(0)
132
133 timer_start = timeit.default_timer()
134 try:
135 downloader.download()
136 except RuntimeError:
137 sys.exit(1)
138 finally:
139 log.info('Downloading finished in {}'.format(
140 datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
141
142
143if __name__ == '__main__':
144 run_cli()