Refactor http files download Original download scripts got out of hand. This series of commits improves style of code (code duplicates, etc.) Issue-ID: OOM-1803 Change-Id: I7b82c1711d27fe450430fbe6d962a450301b0be0 Signed-off-by: Milan Verespej <m.verespej@partner.samsung.com>

commit: 2e1328a8867190f203043fb5758dc8117ba3d673 [log] [tgz]
author: Milan Verespej <m.verespej@partner.samsung.com> Tue Jun 18 13:40:08 2019 +0200
committer: Milan Verespej <m.verespej@partner.samsung.com> Tue Jun 18 18:41:41 2019 +0200
tree: 485de0595f1dcd275f8c7d9d14a1137bbe3e04f6
parent: f2f06700b7b9ad99c9c182fc01ee5cc0782ead78 [diff] [blame]
diff --git a/build/download/http_downloader.py b/build/download/http_downloader.py
new file mode 100644
index 0000000..69adc4d
--- /dev/null
+++ b/build/download/http_downloader.py

@@ -0,0 +1,144 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#   COPYRIGHT NOTICE STARTS HERE
+
+#   Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#   COPYRIGHT NOTICE ENDS HERE
+
+import argparse
+import datetime
+import logging
+import os
+import sys
+import timeit
+
+import requests
+from retrying import retry
+
+import http_file
+from concurrent_downloader import ConcurrentDownloader
+
+log = logging.getLogger(__name__)
+
+
+class HttpDownloader(ConcurrentDownloader):
+    def __init__(self, *list_args, workers=None):
+        super().__init__('http files', *list_args, workers=workers)
+
+    @property
+    def check_table(self):
+        """
+        Table with information what items from lists are downloaded
+        """
+        self.missing()
+        header = ['Name', 'Downloaded']
+        return self._check_table(header, {'Name': 'l'},
+                                 ((item, item not in self._missing) for item
+                                  in self._data_list))
+
+    @staticmethod
+    def _make_get_request(url):
+        """
+        Run http get request
+        :param url: url to reqeuest
+        :return: requests.Response
+        """
+        req = requests.get(url)
+        req.raise_for_status()
+        return req
+
+    def _is_missing(self, item):
+        """
+        Check if item is missing (not downloaded)
+        :param item: item to check
+        :return: boolean
+        """
+        return not os.path.isfile(
+            '{}/{}'.format(self._data_list[item], item.rsplit('//')[-1]))
+
+    @retry(stop_max_attempt_number=5, wait_fixed=2000)
+    def _get_file(self, file_uri):
+        """
+        Get http file from uri
+        :param file_uri: uri of the file
+        :return: file content
+        """
+        if not file_uri.startswith('http'):
+            file_uri = 'http://' + file_uri
+        file_req = self._make_get_request(file_uri)
+        return file_req.content
+
+    def _download_item(self, item):
+        """
+        Download http file
+        :param item: http file to be downloaded (tuple: (uri, dst_dir))
+        """
+        log.info('Downloading: {}'.format(item[0]))
+        dst_path = '{}/{}'.format(item[1], item[0].rsplit('//')[-1])
+        try:
+            f = http_file.HttpFile(item[0], self._get_file(item[0]), dst_path)
+            f.save_to_file()
+        except Exception as err:
+            log.exception('Error downloading: {}: {}'.format(item[0], err))
+            if os.path.isfile(dst_path):
+                os.remove(dst_path)
+            raise err
+        log.info('Downloaded: {}'.format(f.name))
+
+
+def run_cli():
+    """
+    Run as cli tool
+    """
+    parser = argparse.ArgumentParser(description='Download http files from list')
+    parser.add_argument('file_list', metavar='file-list',
+                        help='File with list of http files to download')
+    parser.add_argument('--output-dir', '-o', default=os.getcwd(),
+                        help='Destination directory for saving')
+    parser.add_argument('--check', '-c', action='store_true', default=False,
+                        help='Check mode')
+    parser.add_argument('--debug', action='store_true', default=False,
+                        help='Turn on debug output')
+    parser.add_argument('--workers', type=int, default=None,
+                        help='Set maximum workers for parallel download (default: cores * 5)')
+
+    args = parser.parse_args()
+
+    if args.debug:
+        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+    else:
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+
+    downloader = HttpDownloader([args.file_list, args.output_dir], workers=args.workers)
+
+    if args.check:
+        log.info('Check mode. No download will be executed.')
+        log.info(downloader.check_table)
+        sys.exit(0)
+
+    timer_start = timeit.default_timer()
+    try:
+        downloader.download()
+    except RuntimeError:
+        sys.exit(1)
+    finally:
+        log.info('Downloading finished in {}'.format(
+            datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
+
+
+if __name__ == '__main__':
+    run_cli()
commit	2e1328a8867190f203043fb5758dc8117ba3d673	[log] [tgz]
author	Milan Verespej <m.verespej@partner.samsung.com>	Tue Jun 18 13:40:08 2019 +0200
committer	Milan Verespej <m.verespej@partner.samsung.com>	Tue Jun 18 18:41:41 2019 +0200
tree	485de0595f1dcd275f8c7d9d14a1137bbe3e04f6
parent	f2f06700b7b9ad99c9c182fc01ee5cc0782ead78 [diff] [blame]