Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | # -*- coding: utf-8 -*- |
| 3 | |
| 4 | # COPYRIGHT NOTICE STARTS HERE |
| 5 | |
| 6 | # Copyright 2019 © Samsung Electronics Co., Ltd. |
| 7 | # |
| 8 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | # you may not use this file except in compliance with the License. |
| 10 | # You may obtain a copy of the License at |
| 11 | # |
| 12 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | # |
| 14 | # Unless required by applicable law or agreed to in writing, software |
| 15 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | # See the License for the specific language governing permissions and |
| 18 | # limitations under the License. |
| 19 | |
| 20 | # COPYRIGHT NOTICE ENDS HERE |
| 21 | |
| 22 | |
| 23 | import argparse |
| 24 | import concurrent.futures |
| 25 | import docker |
| 26 | import itertools |
| 27 | import json |
| 28 | import logging |
| 29 | import os |
| 30 | import prettytable |
| 31 | import sys |
| 32 | import threading |
| 33 | from retrying import retry |
| 34 | |
| 35 | import base |
| 36 | |
| 37 | log = logging.getLogger(__name__) |
| 38 | |
| 39 | |
| 40 | def image_filename(image_name): |
| 41 | """ |
| 42 | Get a name of a file where image will be saved. |
| 43 | :param image_name: Name of the image from list |
| 44 | :return: Filename of the image |
| 45 | """ |
| 46 | return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_')) |
| 47 | |
| 48 | |
| 49 | def image_registry_name(image_name): |
| 50 | """ |
| 51 | Get the name as shown in local registry. Since some strings are not part of name |
| 52 | when using default registry e.g. docker.io |
| 53 | :param image_name: name of the image from the list |
| 54 | :return: name of the image as it is shown by docker |
| 55 | """ |
| 56 | name = image_name |
| 57 | |
| 58 | if name.startswith('docker.io/'): |
| 59 | name = name.replace('docker.io/', '') |
| 60 | |
| 61 | if name.startswith('library/'): |
| 62 | name = name.replace('library/', '') |
| 63 | |
| 64 | if ':' not in name.rsplit('/')[-1]: |
| 65 | name = '{}:latest'.format(name) |
| 66 | |
| 67 | return name |
| 68 | |
| 69 | |
| 70 | def not_pulled_images(docker_client, target_list): |
| 71 | """ |
| 72 | Get set of images that are not pulled on local system. |
| 73 | :param docker_client: docker.client.DockerClient |
| 74 | :param target_list: list of images to look for |
| 75 | :return: (set) images that are not present on local system |
| 76 | """ |
| 77 | pulled = set(itertools.chain.from_iterable((image.tags for image |
| 78 | in docker_client.images.list()))) |
| 79 | return {image for image in target_list if image_registry_name(image) not in pulled} |
| 80 | |
| 81 | |
| 82 | def not_saved(target_images, target_dir): |
| 83 | """ |
| 84 | Get set of images that are not saved in target directory |
| 85 | :param target_images: List of images to check for |
| 86 | :param target_dir: Directory where those images should be |
| 87 | :return: (set) Images that are missing from target directory |
| 88 | """ |
| 89 | return set(image for image in target_images |
| 90 | if not os.path.isfile('/'.join((target_dir, image_filename(image))))) |
| 91 | |
| 92 | |
| 93 | def missing(docker_client, target_list, save, target_dir): |
| 94 | """ |
| 95 | Get dictionary of images not present locally. |
| 96 | :param docker_client: docker.client.DockerClient for communication with docker |
| 97 | :param target_list: list of desired images |
| 98 | :param save: (boolean) check for saved images |
| 99 | :param target_dir: target directory for saved images |
| 100 | :return: Dictionary of missing images ('not_pulled', 'not_saved') |
| 101 | """ |
| 102 | return {'not_pulled': not_pulled_images(docker_client, target_list), |
| 103 | 'not_saved': not_saved(target_list, target_dir) if save else set()} |
| 104 | |
| 105 | |
| 106 | def merge_dict_sets(dictionary): |
| 107 | return set.union(*dictionary.values()) |
| 108 | |
| 109 | |
| 110 | def check_table(check_list, missing, save): |
| 111 | table = prettytable.PrettyTable(['Image', 'Pulled', 'Saved']) |
| 112 | table.align['Image'] = 'l' |
| 113 | for image in sorted(check_list): |
| 114 | pulled = not image in missing['not_pulled'] |
| 115 | download_state = [pulled] |
| 116 | if save: |
| 117 | # if not pulled save anyway |
| 118 | download_state.append(pulled and not image in missing['not_saved']) |
| 119 | else: |
| 120 | download_state.append('Not checked') |
| 121 | table.add_row([image] + download_state) |
| 122 | return table |
| 123 | |
| 124 | |
| 125 | @retry(stop_max_attempt_number=5, wait_fixed=5000) |
| 126 | def pull_image(docker_client, image_name): |
| 127 | """ |
| 128 | Pull docker image. |
| 129 | :param docker_client: docker.client.DockerClient for communication with docker |
| 130 | :param image_name: name of the image to be pulled |
| 131 | :return: pulled image (image object) |
| 132 | :raises docker.errors.APIError: after unsuccessful retries |
| 133 | """ |
| 134 | if ':' not in image_name.rsplit('/')[-1]: |
| 135 | image_name = '{}:latest'.format(image_name) |
| 136 | try: |
| 137 | image = docker_client.images.pull(image_name) |
| 138 | log.info('Image {} pulled'.format(image_name)) |
| 139 | return image |
| 140 | except docker.errors.APIError as err: |
| 141 | log.warning('Failed: {}: {}. Retrying...'.format(image_name, err)) |
| 142 | raise err |
| 143 | |
| 144 | |
| 145 | def save_image(image_name, image, output_dir, docker_client=None): |
| 146 | """ |
| 147 | Save image to tar. |
| 148 | :param output_dir: path to destination directory |
| 149 | :param image: image object from pull_image function |
| 150 | :param image_name: name of the image from list |
| 151 | :param docker_client: docker.client.DockerClient for communication with docker |
| 152 | :return: None |
| 153 | """ |
| 154 | dst = '{}/{}'.format(output_dir, image_filename(image_name)) |
| 155 | if not os.path.exists(output_dir): |
| 156 | os.makedirs(output_dir) |
| 157 | if not isinstance(image, docker.models.images.Image): |
| 158 | image = docker_client.images.get(image_name) |
| 159 | try: |
| 160 | with open(dst, 'wb') as f: |
| 161 | for chunk in image.save(named=image_registry_name(image_name)): |
| 162 | f.write(chunk) |
| 163 | log.info('Image {} saved as {}'.format(image_name, dst)) |
| 164 | except Exception as err: |
| 165 | os.remove(dst) |
| 166 | raise err |
| 167 | |
| 168 | |
| 169 | def download_docker_image(image, save, output_dir, docker_client): |
| 170 | """ Pull and save docker image from specified docker registry |
| 171 | :param docker_client: docker.client.DockerClient for communication with docker |
| 172 | :param image: image to be downloaded |
| 173 | :param save: boolean - save image to disk or skip saving |
| 174 | :param output_dir: directory where image will be saved |
| 175 | :return: None |
| 176 | """ |
| 177 | log.info('Downloading image: {}'.format(image)) |
| 178 | try: |
| 179 | pulled_image = pull_image(docker_client, image) |
| 180 | if save: |
| 181 | save_image(image, pulled_image, output_dir) |
| 182 | except Exception as err: |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 183 | log.exception('Error downloading {}: {}'.format(image, err)) |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 184 | raise err |
| 185 | |
| 186 | |
| 187 | def download(image_list, save, output_dir, check_mode, progress, workers=3): |
| 188 | """ |
| 189 | Download images from list |
| 190 | :param image_list: list of images to be downloaded |
| 191 | :param save: whether images should be saved to disk |
| 192 | :param output_dir: directory where images will be saved |
| 193 | :param check_mode: only check for missing images. No download |
| 194 | :param progress_bar: progressbar.ProgressBar to show how far download is |
| 195 | :return: None |
| 196 | """ |
| 197 | try: |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 198 | # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB) |
| 199 | docker_client = docker.client.DockerClient(version='auto', timeout=300) |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 200 | except docker.errors.DockerException as err: |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 201 | log.exception('Error creating docker client. Check if is docker installed and running' |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 202 | ' or if you have right permissions.') |
| 203 | raise err |
| 204 | |
| 205 | target_images = base.load_list(image_list) |
| 206 | missing_images = missing(docker_client, target_images, save, output_dir) |
| 207 | |
| 208 | if check_mode: |
| 209 | log.info(check_table(target_images, missing_images, save)) |
| 210 | return |
| 211 | |
| 212 | skipping = target_images - merge_dict_sets(missing_images) |
| 213 | |
| 214 | base.start_progress(progress, len(target_images), skipping, log) |
| 215 | |
| 216 | # if pulling and save is True. Save every pulled image to assure parity |
| 217 | error_count = base.run_concurrent(workers, progress, download_docker_image, missing_images['not_pulled'], |
| 218 | save, output_dir, docker_client) |
| 219 | # only save those that are pulled already but not saved |
| 220 | error_count += base.run_concurrent(workers, progress, save_image, |
| 221 | missing_images['not_saved'] - missing_images['not_pulled'], |
| 222 | None, output_dir, docker_client) |
| 223 | |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 224 | base.finish_progress(progress, error_count, log) |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 225 | if error_count > 0: |
| 226 | log.error('{} images were not downloaded'.format(error_count)) |
| 227 | missing_images = missing(docker_client, target_images, save, output_dir) |
| 228 | log.info(check_table(merge_dict_sets(missing_images), missing_images, save)) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 229 | raise RuntimeError() |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 230 | |
| 231 | |
| 232 | def run_cli(): |
| 233 | parser = argparse.ArgumentParser(description='Download docker images from list') |
| 234 | parser.add_argument('image_list', metavar='image-list', |
| 235 | help='File with list of images to download.') |
| 236 | parser.add_argument('--save', '-s', action='store_true', default=False, |
| 237 | help='Save images (without it only pull is executed)') |
| 238 | parser.add_argument('--output-dir', '-o', default=os.getcwd(), |
| 239 | help='Download destination') |
| 240 | parser.add_argument('--check', '-c', action='store_true', default=False, |
| 241 | help='Check what is missing. No download.' |
| 242 | 'Use with combination with -s to check saved images as well.') |
| 243 | parser.add_argument('--debug', action='store_true', default=False, |
| 244 | help='Turn on debug output') |
| 245 | parser.add_argument('--workers', type=int, default=3, |
| 246 | help='Set maximum workers for parallel download (default: 3)') |
| 247 | |
| 248 | args = parser.parse_args() |
| 249 | |
| 250 | if args.debug: |
| 251 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) |
| 252 | else: |
| 253 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') |
| 254 | |
| 255 | progress = base.init_progress('Docker images') if not args.check else None |
| 256 | try: |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 257 | download(args.image_list, args.save, args.output_dir, args.check, |
| 258 | progress, args.workers) |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 259 | except docker.errors.DockerException: |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 260 | log.exception('Irrecoverable error detected.') |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 261 | sys.exit(1) |
Milan Verespej | 455be47 | 2019-05-23 14:21:19 +0200 | [diff] [blame] | 262 | except RuntimeError as err: |
| 263 | log.exception(err) |
Milan Verespej | 8786632 | 2019-04-18 14:37:51 +0200 | [diff] [blame] | 264 | |
| 265 | |
| 266 | if __name__ == '__main__': |
| 267 | run_cli() |
| 268 | |