Refactor and improve docker image downloading
Issue-ID: OOM-1803
Change-Id: I4e648d49835faa60165725d1ca4ec22ba1e3e12b
Signed-off-by: Milan Verespej <m.verespej@partner.samsung.com>
diff --git a/build/download/docker_downloader.py b/build/download/docker_downloader.py
new file mode 100755
index 0000000..13323d3
--- /dev/null
+++ b/build/download/docker_downloader.py
@@ -0,0 +1,242 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# COPYRIGHT NOTICE STARTS HERE
+
+# Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# COPYRIGHT NOTICE ENDS HERE
+
+import argparse
+import datetime
+import itertools
+import logging
+import os
+import sys
+import timeit
+
+import docker
+from retrying import retry
+
+from concurrent_downloader import ConcurrentDownloader
+
+log = logging.getLogger(__name__)
+
+
+class DockerDownloader(ConcurrentDownloader):
+ def __init__(self, save, *list_args, workers=3):
+ self._save = save
+ try:
+ # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
+ self._docker_client = docker.client.DockerClient(version='auto', timeout=300)
+ except docker.errors.DockerException as err:
+ log.exception(
+ 'Error creating docker client. Check if is docker installed and running'
+ ' or if you have right permissions.')
+ raise err
+ self._pulled_images = set(itertools.chain.from_iterable((image.tags for image
+ in self._docker_client.images.list())))
+ list_args = ([*x, None] if len(x) < 2 else x for x in list_args)
+ super().__init__('docker images', *list_args, workers=workers)
+
+ @staticmethod
+ def image_registry_name(image_name):
+ """
+ Get the name as shown in local registry. Since some strings are not part of name
+ when using default registry e.g. docker.io
+ :param image_name: name of the image from the list
+ :return: name of the image as it is shown by docker
+ """
+ name = image_name
+
+ if name.startswith('docker.io/'):
+ name = name.replace('docker.io/', '')
+
+ if name.startswith('library/'):
+ name = name.replace('library/', '')
+
+ if ':' not in name.rsplit('/')[-1]:
+ name = '{}:latest'.format(name)
+
+ return name
+
+ @property
+ def check_table(self):
+ """
+ Table showing information of which images are pulled/saved
+ """
+ self.missing()
+ return self._table(self._data_list)
+
+ @property
+ def fail_table(self):
+ """
+ Table showing information about state of download of images
+ that encountered problems while downloading
+ """
+ return self._table(self.missing())
+
+ @staticmethod
+ def _image_filename(image_name):
+ """
+ Get a name of a file where image will be saved.
+ :param image_name: Name of the image from list
+ :return: Filename of the image
+ """
+ return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
+
+ def _table(self, images):
+ """
+ Get table in format for images
+ :param images: images to put into table
+ :return: check table format with specified images
+ """
+ header = ['Name', 'Pulled', 'Saved']
+ data = []
+ for item in images:
+ if item not in self._missing:
+ data.append((item, True, True if self._save else 'N/A'))
+ else:
+ data.append((item, self._missing[item]['pulled'], self._missing[item]['saved']))
+ return self._check_table(header, {'Name': 'l'}, data)
+
+ def _is_pulled(self, image):
+ return self.image_registry_name(image) in self._pulled_images
+
+ def _is_saved(self, image):
+ dst = '{}/{}'.format(self._data_list[image], self._image_filename(image))
+ return os.path.isfile(dst)
+
+ def _is_missing(self, item):
+ """
+ Missing docker images are checked slightly differently.
+ """
+ pass
+
+ def missing(self):
+ """
+ Get dictionary of images not present locally.
+ """
+ missing = dict()
+ for image, dst in self._data_list.items():
+ pulled = self._is_pulled(image)
+ if self._save:
+ # if pulling and save is True. Save every pulled image to assure parity
+ saved = False if not pulled else self._is_saved(image)
+ else:
+ saved = 'N/A'
+ if not pulled or not saved:
+ missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved}
+ self._missing = missing
+ return self._missing
+
+ @retry(stop_max_attempt_number=5, wait_fixed=5000)
+ def _pull_image(self, image_name):
+ """
+ Pull docker image.
+ :param image_name: name of the image to be pulled
+ :return: pulled image (image object)
+ :raises docker.errors.APIError: after unsuccessful retries
+ """
+ if ':' not in image_name.rsplit('/')[-1]:
+ image_name = '{}:latest'.format(image_name)
+ try:
+ image = self._docker_client.images.pull(image_name)
+ log.info('Image {} pulled'.format(image_name))
+ return image
+ except docker.errors.APIError as err:
+ log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
+ raise err
+
+ def _save_image(self, image_name, image, output_dir):
+ """
+ Save image to tar.
+ :param output_dir: path to destination directory
+ :param image: image object from pull_image function
+ :param image_name: name of the image from list
+ """
+ dst = '{}/{}'.format(output_dir, self._image_filename(image_name))
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ try:
+ with open(dst, 'wb') as f:
+ for chunk in image.save(named=self.image_registry_name(image_name)):
+ f.write(chunk)
+ log.info('Image {} saved as {}'.format(image_name, dst))
+ except Exception as err:
+ if os.path.isfile(dst):
+ os.remove(dst)
+ raise err
+
+ def _download_item(self, image):
+ """ Pull and save docker image from specified docker registry
+ :param image: image to be downloaded
+ """
+ image_name, image_dict = image
+ log.info('Downloading image: {}'.format(image_name))
+ try:
+ if image_dict['pulled']:
+ image_to_save = self._docker_client.images.get(image_name)
+ else:
+ image_to_save = self._pull_image(image_name)
+ if self._save:
+ self._save_image(image_name, image_to_save, image_dict['dst'])
+ except Exception as err:
+ log.exception('Error downloading {}: {}'.format(image_name, err))
+ raise err
+
+
+def run_cli():
+ parser = argparse.ArgumentParser(description='Download docker images from list')
+ parser.add_argument('image_list', metavar='image-list',
+ help='File with list of images to download.')
+ parser.add_argument('--save', '-s', action='store_true', default=False,
+ help='Save images (without it only pull is executed)')
+ parser.add_argument('--output-dir', '-o', default=os.getcwd(),
+ help='Download destination')
+ parser.add_argument('--check', '-c', action='store_true', default=False,
+ help='Check what is missing. No download.'
+ 'Use with combination with -s to check saved images as well.')
+ parser.add_argument('--debug', action='store_true', default=False,
+ help='Turn on debug output')
+ parser.add_argument('--workers', type=int, default=3,
+ help='Set maximum workers for parallel download (default: 3)')
+
+ args = parser.parse_args()
+
+ if args.debug:
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+ else:
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+
+ downloader = DockerDownloader(args.save, [args.file_list, args.output_dir], workers=args.workers)
+
+ if args.check:
+ log.info('Check mode. No download will be executed.')
+ log.info(downloader.check_table)
+ sys.exit(0)
+
+ timer_start = timeit.default_timer()
+ try:
+ downloader.download()
+ except RuntimeError:
+ sys.exit(1)
+ finally:
+ log.info('Downloading finished in {}'.format(
+ datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
+
+
+if __name__ == '__main__':
+ run_cli()