Milan Verespej | 6e372ca | 2019-07-08 12:42:30 +0200 | [diff] [blame] | 1 | #! /usr/bin/env python3 |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 2 | # -*- coding: utf-8 -*- |
| 3 | |
| 4 | # COPYRIGHT NOTICE STARTS HERE |
| 5 | |
| 6 | # Copyright 2019 © Samsung Electronics Co., Ltd. |
| 7 | # |
| 8 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | # you may not use this file except in compliance with the License. |
| 10 | # You may obtain a copy of the License at |
| 11 | # |
| 12 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | # |
| 14 | # Unless required by applicable law or agreed to in writing, software |
| 15 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | # See the License for the specific language governing permissions and |
| 18 | # limitations under the License. |
| 19 | |
| 20 | # COPYRIGHT NOTICE ENDS HERE |
| 21 | |
| 22 | import argparse |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 23 | import datetime |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 24 | import logging |
| 25 | import sys |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 26 | import timeit |
| 27 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 28 | import docker_downloader |
| 29 | import git_downloader |
| 30 | import http_downloader |
| 31 | import npm_downloader |
| 32 | import pypi_downloader |
| 33 | import rpm_downloader |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 34 | |
| 35 | log = logging.getLogger(name=__name__) |
| 36 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 37 | |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 38 | def parse_args(): |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 39 | """ |
| 40 | Parse command line arguments |
| 41 | :return: arguments |
| 42 | """ |
| 43 | parser = argparse.ArgumentParser(description='Download data from lists') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 44 | list_group = parser.add_argument_group() |
| 45 | list_group.add_argument('--docker', action='append', nargs='+', default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 46 | metavar=('list', 'dir-name'), |
| 47 | help='Docker type list. If second argument is specified ' |
| 48 | 'it is treated as directory where images will be saved ' |
| 49 | 'otherwise only pull operation is executed this can\'t ' |
| 50 | 'be mixed between multiple docker list specifications. ' |
| 51 | 'if one of the list does not have directory specified ' |
| 52 | 'all lists are only pulled!!!') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 53 | list_group.add_argument('--http', action='append', nargs=2, default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 54 | metavar=('list', 'dir-name'), |
| 55 | help='Http type list and directory to save downloaded files') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 56 | list_group.add_argument('--npm', action='append', nargs=2, default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 57 | metavar=('list', 'dir-name'), |
| 58 | help='npm type list and directory to save downloaded files') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 59 | list_group.add_argument('--rpm', action='append', nargs=2, default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 60 | metavar=('list', 'dir-name'), |
| 61 | help='rpm type list and directory to save downloaded files') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 62 | list_group.add_argument('--git', action='append', nargs=2, default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 63 | metavar=('list', 'dir-name'), |
| 64 | help='git repo type list and directory to save downloaded files') |
Milan Verespej | d85b2d7 | 2019-06-05 13:58:38 +0200 | [diff] [blame] | 65 | list_group.add_argument('--pypi', action='append', nargs=2, default=[], |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 66 | metavar=('list', 'dir-name'), |
| 67 | help='pypi packages type list and directory to save downloaded files') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 68 | parser.add_argument('--npm-registry', default='https://registry.npmjs.org', |
| 69 | help='npm registry to use (default: https://registry.npmjs.org)') |
| 70 | parser.add_argument('--check', '-c', action='store_true', default=False, |
| 71 | help='Check what is missing. No download.') |
| 72 | parser.add_argument('--debug', action='store_true', default=False, |
| 73 | help='Turn on debug output') |
| 74 | |
| 75 | args = parser.parse_args() |
| 76 | |
Milan Verespej | d85b2d7 | 2019-06-05 13:58:38 +0200 | [diff] [blame] | 77 | for arg in ('docker', 'npm', 'http', 'rpm', 'git', 'pypi'): |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 78 | if getattr(args, arg): |
| 79 | return args |
| 80 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 81 | parser.error('One of --docker, --npm, --http, --rpm, --git or --pypi must be specified') |
| 82 | |
| 83 | |
| 84 | def log_start(item_type): |
| 85 | """ |
| 86 | Log starting message |
| 87 | :param item_type: type of resources |
| 88 | :return: |
| 89 | """ |
| 90 | log.info('Starting download of {}.'.format(item_type)) |
| 91 | |
| 92 | |
| 93 | def handle_download(downloader, check_mode, errorred_lists, start_time): |
| 94 | """ |
| 95 | Handle download of resources |
| 96 | :param downloader: downloader to use |
| 97 | :param check_mode: run in check mode (boolean) |
| 98 | :param errorred_lists: list of data types of failed lists |
| 99 | :param start_time: timeit.default_timer() right before download |
| 100 | :return: timeit.default_timer() at the end of download |
| 101 | """ |
| 102 | if check_mode: |
| 103 | print(downloader.check_table) |
| 104 | else: |
| 105 | log_start(downloader.list_type) |
| 106 | try: |
| 107 | downloader.download() |
| 108 | except RuntimeError: |
| 109 | errorred_lists.append(downloader.list_type) |
| 110 | return log_time_interval(start_time, downloader.list_type) |
| 111 | |
| 112 | |
| 113 | def handle_command_download(downloader_class, check_mode, errorred_lists, start_time, *args): |
| 114 | """ |
| 115 | Handle download of resources where shell command is used |
| 116 | :param downloader_class: Class of command_downloader.CommandDownloader to use |
| 117 | :param check_mode: run in check mode (boolean) |
| 118 | :param errorred_lists: list of data types of failed lists |
| 119 | :param start_time: timeit.default_timer() right before download |
| 120 | :param args: arguments for downloader class initialization |
| 121 | :return: timeit.default_timer() at the end of download |
| 122 | """ |
| 123 | try: |
| 124 | downloader = downloader_class(*args) |
| 125 | return handle_download(downloader, check_mode, errorred_lists, start_time) |
| 126 | except FileNotFoundError as err: |
| 127 | classname = type(downloader_class).__name__ |
| 128 | log.exception('Error initializing: {}: {}'.format(classname, err)) |
| 129 | return timeit.default_timer() |
| 130 | |
| 131 | |
| 132 | def log_time_interval(start, resource_type=''): |
| 133 | """ |
| 134 | Log how long the download took |
| 135 | :param start: timeit.default_timer() when interval started |
| 136 | :param resource_type: type of data that was downloaded. (empty string for whole download) |
| 137 | :return: timeit.default_timer() after logging |
| 138 | """ |
| 139 | e_time = datetime.timedelta(seconds=timeit.default_timer() - start) |
| 140 | if resource_type: |
| 141 | msg = 'Download of {} took {}\n'.format(resource_type, e_time) |
| 142 | else: |
| 143 | msg = 'Execution ended. Total elapsed time {}'.format(e_time) |
| 144 | log.info(msg) |
| 145 | return timeit.default_timer() |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 146 | |
| 147 | |
| 148 | def run_cli(): |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 149 | if sys.version_info.major < 3: |
| 150 | log.error('Unfortunately Python 2 is not supported for data download.') |
| 151 | sys.exit(1) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 152 | args = parse_args() |
| 153 | |
| 154 | console_handler = logging.StreamHandler(sys.stdout) |
| 155 | console_formatter = logging.Formatter('%(message)s') |
| 156 | console_handler.setFormatter(console_formatter) |
| 157 | now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') |
| 158 | log_file = 'download_data-{}.log'.format(now) |
| 159 | file_format = "%(asctime)s: %(filename)s: %(levelname)s: %(message)s" |
| 160 | |
| 161 | if args.debug: |
| 162 | logging.basicConfig(level=logging.DEBUG, filename=log_file, format=file_format) |
| 163 | else: |
| 164 | logging.basicConfig(level=logging.INFO, filename=log_file, format=file_format) |
| 165 | root_logger = logging.getLogger() |
| 166 | root_logger.addHandler(console_handler) |
| 167 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 168 | errorred_lists = [] |
| 169 | timer_start = interval_start = timeit.default_timer() |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 170 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 171 | if args.check: |
| 172 | log.info('Check mode. No download will be executed.') |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 173 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 174 | if args.docker: |
| 175 | save = True if len(list(filter(lambda x: len(x) == 2, args.docker))) == len(args.docker) else False |
| 176 | docker = docker_downloader.DockerDownloader(save, *args.docker, workers=3) |
| 177 | interval_start = handle_download(docker, args.check, errorred_lists, interval_start) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 178 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 179 | if args.http: |
| 180 | http = http_downloader.HttpDownloader(*args.http) |
| 181 | interval_start = handle_download(http, args.check, errorred_lists, interval_start) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 182 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 183 | if args.npm: |
| 184 | npm = npm_downloader.NpmDownloader(args.npm_registry, *args.npm) |
| 185 | interval_start = handle_download(npm, args.check, errorred_lists, interval_start) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 186 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 187 | if args.rpm: |
| 188 | interval_start = handle_command_download(rpm_downloader.RpmDownloader, args.check, errorred_lists, |
| 189 | interval_start, *args.rpm) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 190 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 191 | if args.git: |
| 192 | interval_start = handle_command_download(git_downloader.GitDownloader, args.check, errorred_lists, |
| 193 | interval_start, *args.git) |
Milan Verespej | d85b2d7 | 2019-06-05 13:58:38 +0200 | [diff] [blame] | 194 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 195 | if args.pypi: |
| 196 | handle_command_download(pypi_downloader.PyPiDownloader, args.check, errorred_lists, |
| 197 | interval_start, *args.pypi) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 198 | |
Milan Verespej | 11e84f5 | 2019-06-18 18:34:26 +0200 | [diff] [blame] | 199 | if not args.check: |
| 200 | log_time_interval(timer_start) |
| 201 | |
| 202 | if errorred_lists: |
| 203 | log.error('Errors encountered while processing these types:' |
| 204 | '\n{}'.format('\n'.join(errorred_lists))) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 205 | sys.exit(1) |
| 206 | |
| 207 | |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 208 | if __name__ == '__main__': |
| 209 | run_cli() |