blob: c83158d608212e2e1b96de7cd54fa7048bb17faf [file] [log] [blame]
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# COPYRIGHT NOTICE STARTS HERE
# Copyright 2019 © Samsung Electronics Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# COPYRIGHT NOTICE ENDS HERE
import argparse
import concurrent.futures
import logging
import os
import sys
from retrying import retry
import base
log = logging.getLogger(__name__)
@retry(stop_max_attempt_number=5, wait_fixed=2000)
def get_file(file_uri):
"""
Get file from the Internet
:param file_uri: address of file
:return: byte content of file
"""
if not file_uri.startswith('http'):
file_uri = 'http://' + file_uri
file_req = base.make_get_request(file_uri)
return file_req.content
def download_file(file_uri, dst_dir):
"""
Download http file and save it to file.
:param file_uri: http address of file
:param dst_dir: directory where file will be saved
"""
log.info('Downloading: {}'.format(file_uri))
dst_path = '{}/{}'.format(dst_dir, file_uri.rsplit('//')[-1])
try:
file_content = get_file(file_uri)
base.save_to_file(dst_path, file_content)
except Exception as err:
if os.path.isfile(dst_path):
os.remove(dst_path)
log.error('Error downloading: {}: {}'.format(file_uri, err))
raise err
log.info('Downloaded: {}'.format(file_uri))
def missing(file_set, dst_dir):
return {file for file in file_set if not os.path.isfile('{}/{}'.format(dst_dir, file))}
def download(data_list, dst_dir, check, progress, workers=None):
"""
Download files specified in data list
:param data_list: path to file with list
:param dst_dir: destination directory
:param check: boolean check mode
:param progress: progressbar.ProgressBar to monitor progress
:param workers: workers to use for parallel execution
:return: 0 if success else number of errors
"""
file_set = base.load_list(data_list)
missing_files = missing(file_set, dst_dir)
target_count = len(file_set)
if check:
log.info(base.simple_check_table(file_set, missing_files))
return
skipping = file_set - missing_files
base.start_progress(progress, len(file_set), skipping, log)
error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir)
base.finish_progress(progress, error_count, log)
if error_count > 0:
log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count))
raise RuntimeError()
def run_cli():
"""
Run as cli tool
"""
parser = argparse.ArgumentParser(description='Download http files from list')
parser.add_argument('file_list', metavar='file-list',
help='File with list of http files to download')
parser.add_argument('--output-dir', '-o', default=os.getcwd(),
help='Destination directory for saving')
parser.add_argument('--check', '-c', action='store_true', default=False,
help='Check mode')
parser.add_argument('--debug', action='store_true', default=False,
help='Turn on debug output')
parser.add_argument('--workers', type=int, default=None,
help='Set maximum workers for parallel download (default: cores * 5)')
args = parser.parse_args()
if args.debug:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
progress = base.init_progress('http files') if not args.check else None
try:
download(args.file_list, args.output_dir, args.check, progress, args.workers)
except RuntimeError:
sys.exit(1)
if __name__ == '__main__':
run_cli()