Source code for renga_deployer.engines

# -*- coding: utf-8 -*-
#
# Copyright 2017 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Engine sub-module."""

import logging
import os
import re
import shlex
import time
from enum import Enum
from functools import wraps

from flask import abort, current_app
from werkzeug.exceptions import NotFound
from werkzeug.utils import cached_property

from renga_deployer.serializers import ContextSchema, ExecutionSchema

from .models import Context, Execution, ExecutionStates
from .utils import decode_bytes, resource_available

context_schema = ContextSchema()
execution_schema = ExecutionSchema()


[docs]class Engine(object):
    """Base engine class."""

[docs]    def launch(self, context, **kwargs):
        """Create new execution environment for a given context."""
        raise NotImplemented

[docs]    def stop(self, execution, remove=False):
        """Stop an execution."""
        raise NotImplemented

[docs]    def get_logs(self, execution):
        """Extract logs for a container."""
        raise NotImplemented

[docs]    def get_host_port(self, execution):
        """Retrieve the host/port where the application can be reached."""
        raise NotImplemented

[docs]    def get_execution_environment(self, execution) -> dict:
        """Retrieve the environment specified for an execution container."""
        raise NotImplemented

[docs]    def get_state(self, execution):
        """Check the state of an execution."""
        raise NotImplemented


[docs]class DockerEngine(Engine):
    """Class for deploying contexts on docker."""

[docs]    class EXECUTION_STATE_MAPPING(Enum):
        """State mappings for the Docker engine."""

        running = ExecutionStates.RUNNING
        exited = ExecutionStates.EXITED
        restarting = ExecutionStates.UNAVAILABLE
        paused = ExecutionStates.UNAVAILABLE

    def __init__(self):
        """Initialize the docker engine."""
        import docker
        self._docker = docker

[docs]    @cached_property
    def logger(self):
        """Create a logger instance."""
        return logging.getLogger('renga.deployer.engines.docker')

[docs]    @cached_property
    def client(self):
        """Create a docker client from local environment."""
        return self._docker.from_env()

[docs]    def launch(self, execution, **kwargs):
        """Launch a docker container with the context image."""
        context = execution.context

        execution.environment.setdefault(
            'DEPLOYER_BASE_URL',
            current_app.config.get('DEPLOYER_DEFAULT_BASE_URL')
        )

        if context.spec.get('ports'):
            ports = {port: None for port in context.spec.get('ports')}
        else:
            ports = None

        # Fix an unexpected behaviour of the python docker client which
        # leads to all images being downloaded when no tag is specified.
        image = context.spec['image']
        if ':' not in image:
            image += ':latest'

        container = self.client.containers.run(
            image=image,
            ports=ports,
            command=context.spec.get('command'),
            detach=True,
            environment=execution.environment or None)

        self.logger.info(
            'Launched container for execution {1} of context {0}'.format(
                execution.id, context.id),
            extra={
                'container_attrs': container.attrs,
                'execution': execution_schema.dump(execution).data,
                'context': context_schema.dump(execution.context).data
            })

        execution.engine_id = container.id

        return execution

[docs]    def stop(self, execution, remove=False):
        """Stop a running container, optionally removing it."""
        from docker.errors import NotFound

        try:
            container = self.client.containers.get(execution.engine_id)
        except NotFound:
            return execution

        container.stop()
        if remove:
            container.remove()

        self.logger.info(
            'Stopped execution {0} of context {1}'.format(
                execution.id, execution.context.id),
            extra={
                'container_attrs': container.attrs,
                'execution': execution_schema.dump(execution).data,
                'context': context_schema.dump(execution.context).data
            })

        return execution

[docs]    def get_logs(self, execution):
        """Extract logs for a container."""
        try:
            return decode_bytes(
                self.client.containers.get(execution.engine_id).logs)()
        except self._docker.errors.NotFound:
            # FIXME: implement proper exception handling and propagation
            raise NotFound('Execution container not found.')

[docs]    def get_host_ports(self, execution):
        """Return host ip and port bindings for the running execution."""
        if not execution.check_state(ExecutionStates.RUNNING, self):
            return {'ports': []}

        container = self.client.containers.get(execution.engine_id)
        port_bindings = container.attrs['NetworkSettings'].get('Ports', {})
        return {
            'ports': [{
                'specified':
                container_port.split('/')[0],
                'protocol':
                container_port.split('/')[1].upper(),
                'host':
                current_app.config[
                    'DEPLOYER_DOCKER_CONTAINER_IP'] or host_spec['HostIp'],
                'exposed':
                host_spec['HostPort'],
            }
                      for container_port, host_specs in port_bindings.items()
                      for host_spec in host_specs],
        }

[docs]    def get_execution_environment(self, execution) -> dict:
        """Retrieve the environment specified for an execution container."""
        container = self.client.containers.get(execution.engine_id)
        return {
            k: v
            for (
                k,
                v) in [e.split('=') for e in container.attrs['Config']['Env']]
        }

[docs]    def get_state(self, execution):
        """Return the status of an execution."""
        try:
            return getattr(
                self.__class__.EXECUTION_STATE_MAPPING,
                self.client.containers.get(execution.engine_id).status).value
        except self._docker.errors.NotFound:
            return ExecutionStates.UNAVAILABLE


[docs]class K8SEngine(Engine):
    """Class for deploying contexts on Kubernetes."""

[docs]    class EXECUTION_STATE_MAPPING(Enum):
        """State mappings for the K8S engine."""

        running = ExecutionStates.RUNNING
        terminated = ExecutionStates.EXITED
        waiting = ExecutionStates.UNAVAILABLE

    def __init__(self, config=None, timeout=10):
        """Create a K8SNode instance."""
        # FIXME add super
        import kubernetes
        self._kubernetes = kubernetes
        self.timeout = timeout
        self.config = config

        if self.config is None:
            kubernetes.config.load_kube_config()
            self.config = kubernetes.config.kube_config.Configuration()
            self.logger.debug(
                'Loaded k8s configuration.', extra=self.config.__dict__)

[docs]    @cached_property
    def logger(self):
        """Create a logger instance."""
        return logging.getLogger('renga.deployer.engines.k8s')

[docs]    def launch(self, execution, engine=None, **kwargs):
        """Launch a Kubernetes Job with the context spec."""
        context = execution.context

        execution.environment.setdefault(
            'DEPLOYER_BASE_URL',
            '/interactive/{0}'.format(execution.id)
            if current_app.config.get('DEPLOYER_K8S_INGRESS')
            else current_app.config.get('DEPLOYER_DEFAULT_BASE_URL')
        )

        batch = self._kubernetes.client.BatchV1Api()
        namespace = kwargs.pop('namespace', 'default')
        job_spec = self._k8s_job_template(namespace, execution)
        self.logger.debug('Context spec: {}'.format(context.spec))
        self.logger.debug('Job spec created: {}'.format(job_spec))
        job = batch.create_namespaced_job(namespace, job_spec)
        uid = job.metadata.labels['controller-uid']

        self.logger.info(
            'Created job for execution {0} of context {1}'.format(
                execution.id, execution.context.id),
            extra={
                'job': job.to_dict(),
                'execution': execution_schema.dump(execution).data,
                'context': context_schema.dump(execution.context).data
            })

        # assume that if the user specified a port to open, they want
        # it available from the outside
        if context.spec.get('ports'):
            # To expose an interactive job, we need to start a service.
            # We use the job controller-uid to link the service.
            api = self._kubernetes.client.CoreV1Api()
            service_spec = self._k8s_service_template(namespace, context, uid)
            service = api.create_namespaced_service(namespace, service_spec)

            self.logger.info(
                'Created service for namespaced job {}'.format(uid),
                extra={'service': service.to_dict()})

            # if using an ingress, need to make an additional object
            if current_app.config.get(
                    'DEPLOYER_K8S_INGRESS'):
                beta_api = self._kubernetes.client.ExtensionsV1beta1Api()
                ingress = beta_api.create_namespaced_ingress(
                    namespace,
                    self._k8s_ingress_template(uid, service, execution))
                self.logger.info(
                    'Created ingress for service {}'.format(
                        service.metadata.name),
                    extra={'ingress': ingress.to_dict()})

        execution.engine_id = uid
        execution.namespace = namespace
        return execution

[docs]    def stop(self, execution, remove=False):
        """Stop a running job."""
        if not execution.check_state(
                {ExecutionStates.RUNNING, ExecutionStates.EXITED}, self):
            return execution

        api = self._kubernetes.client.CoreV1Api()
        batch = self._kubernetes.client.BatchV1Api()

        if execution.context.spec.get('ports'):
            service = api.list_namespaced_service(
                execution.namespace,
                label_selector='job-uid={0}'.format(execution.engine_id))

            if service.items:
                service = service.items[0]
                api.delete_namespaced_service(
                    service.metadata.name,
                    execution.namespace, )

                self.logger.info(
                    'Deleted namespaced service {}'.format(
                        service.metadata.name),
                    extra={'service': service.to_dict()})

                if current_app.config.get(
                        'DEPLOYER_K8S_INGRESS'):
                    beta_api = self._kubernetes.client.ExtensionsV1beta1Api()
                    ingress = beta_api.list_namespaced_ingress(
                        execution.namespace,
                        label_selector='job-uid={0}'.format(
                            execution.engine_id)).items[0]
                    beta_api.delete_namespaced_ingress(
                        ingress.metadata.name, execution.namespace,
                        self._kubernetes.client.V1DeleteOptions())

                    self.logger.info(
                        'Deleted namespaced ingress {0} for service {1}'.
                        format(ingress.metadata.uid, service.metadata.name),
                        extra={'ingress': ingress.to_dict()})

        batch.delete_collection_namespaced_job(
            execution.namespace,
            label_selector='controller-uid={0}'.format(execution.engine_id))

        self.logger.info(
            'Deleted namespaced job for execution {}'.format(
                execution.engine_id),
            extra={
                'execution': execution_schema.dump(execution).data,
                'context': context_schema.dump(execution.context).data
            })

        if remove:
            api.delete_collection_namespaced_pod(
                execution.namespace,
                label_selector='controller-uid={0}'.format(
                    execution.engine_id))

            self.logger.info('Deleted namespaced pod for execution {}'.format(
                execution.engine_id))

        return execution

[docs]    def get_state(self, execution):
        """Get status of a running job."""
        api = self._kubernetes.client.CoreV1Api()
        pod = api.list_namespaced_pod(
            execution.namespace,
            label_selector='controller-uid={}'.format(execution.engine_id))

        if not pod.items:
            return ExecutionStates.UNAVAILABLE

        status = list(
            filter(lambda c: c.name == str(execution.context.id), pod.items[0]
                   .status.container_statuses))[0]

        return getattr(
            self.__class__.EXECUTION_STATE_MAPPING,
            list(filter(lambda x: x[1], status.state.to_dict().items()))[0][
                0]).value

    @staticmethod
    def _k8s_job_template(namespace, execution):
        """Return simple kubernetes job JSON."""
        # required spec
        context = execution.context
        context_spec = context.spec.copy()

        spec = {
            "containers": [{
                "name": "{0}".format(context.id),
                "image": "{0}".format(context_spec.pop('image'))
            }],
            "restartPolicy":
            "Never"
        }

        # optional spec
        if context_spec.get('ports'):
            spec['containers'][0]['ports'] = [{
                'containerPort': int(port)
            } for port in context_spec.pop('ports')]

        if context_spec.get('command'):
            command = shlex.split(context_spec.pop('command'))
            spec['containers'][0]['command'] = [command[0]]
            if len(command) > 1:
                spec['containers'][0]['args'] = command[1:]

        spec['containers'][0]['env'] = [{
            'name': k,
            'value': str(v)
        } for k, v in execution.environment.items()]

        spec['containers'][0]['env'] += context_spec.pop('env', [])
        spec['volumes'] = context_spec.pop('volumes', [])

        # add all other stuff to spec
        spec['containers'][0].update(context_spec)

        # finalize job template
        template = {
            "kind": "Job",
            "metadata": {
                "namespace": "{0}".format(namespace),
                "generateName": "{0}-".format(context.id)
            },
            "spec": {
                "template": {
                    "spec": spec
                }
            }
        }

        return template

    @staticmethod
    def _k8s_service_template(namespace, context, uid):
        """Return simple kubernetes service JSON."""
        return {
            'apiVersion': 'v1',
            'kind': 'Service',
            'metadata': {
                'generateName': re.sub('[^\w]+', '-', context.spec['image']),
                'namespace': namespace,
                'labels': {
                    'job-uid': "{0}".format(uid)
                }
            },
            'spec': {
                'hostNetwork': 'true',
                'ports': [{
                    'port': int(port)
                } for port in context.spec['ports']],
                'selector': {
                    'controller-uid': "{0}".format(uid)
                },
                'type': 'NodePort'
            }
        }

    @staticmethod
    def _k8s_ingress_template(uid, service, execution):
        """Return kubernetes ingress JSON."""
        service_name = service.metadata.name
        service_port = execution.context.spec['ports'][0]
        return {
            'apiVersion': 'extensions/v1beta1',
            'kind': 'Ingress',
            'metadata': {
                'generateName': 'interactive',
                'annotations': {
                    'kubernetes.io/ingress.class': current_app.config.get(
                        'DEPLOYER_K8S_INGRESS_CLASS'),
                },
                'labels': {
                    'job-uid': "{0}".format(uid)
                }
            },
            'spec': {
                'rules': [
                    {
                        'http': {
                            'paths': [
                                {
                                    'path': '/interactive/{0}'.format(
                                        execution.id),
                                    'backend': {
                                        'serviceName': service_name,
                                        'servicePort': int(service_port)
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        }

[docs]    def get_logs(self, execution, timeout=None, **kwargs):
        """Extract logs for the Job from the Pod."""
        api = self._kubernetes.client.CoreV1Api()
        namespace = execution.namespace

        pod = api.list_namespaced_pod(
            namespace,
            label_selector='controller-uid={0}'.format(execution.engine_id))

        if not pod.items:
            # FIXME: implement proper exception handling and propagation
            raise NotFound('Execution container not found.')

        timein = time.time()
        while not resource_available(api.read_namespaced_pod_log)(
                pod.items[0].metadata.name, namespace):
            if time.time() - timein > (timeout or self.timeout):
                raise RuntimeError("Timeout while fetching logs.")

        return api.read_namespaced_pod_log(pod.items[0].metadata.name,
                                           namespace)

[docs]    def get_host_ports(self, execution):
        """Return host ip and port bindings for the running execution."""
        api = self._kubernetes.client.CoreV1Api()
        service = api.list_namespaced_service(
            execution.namespace,
            label_selector='job-uid={0}'.format(execution.engine_id))

        pod = api.list_namespaced_pod(
            execution.namespace,
            label_selector='controller-uid={0}'.format(execution.engine_id))

        if not service.items or not execution.check_state(
                ExecutionStates.RUNNING, self):
            # this service doesn't exist or job isn't running yet
            return {'ports': []}

        else:
            if current_app.config.get(
                    'DEPLOYER_K8S_INGRESS'):

                beta_api = self._kubernetes.client.ExtensionsV1beta1Api()
                ingress = beta_api.list_namespaced_ingress(
                    execution.namespace,
                    label_selector='job-uid={0}'.format(
                        execution.engine_id)).items[0]

                if not ingress.status.load_balancer.ingress or \
                        not ingress.status.load_balancer.ingress[0].ip:
                    host = None

                else:
                    host = ingress.status.load_balancer.ingress[0].ip

                return {
                    'ports': [{
                        'specified':
                        port.port,
                        'host':
                        current_app.config[
                            'DEPLOYER_K8S_CONTAINER_IP'] or host,
                        'path':
                        ingress.spec.rules[0].http.paths[0].path,
                        'exposed':
                        '443',
                        'protocol':
                        port.protocol,
                    } for port in service.items[0].spec.ports]
                }

            return {
                'ports': [{
                    'specified':
                    port.port,
                    'host':
                    current_app.config[
                        'DEPLOYER_K8S_CONTAINER_IP'] or pod.items[
                            0].status.host_ip,
                    'exposed':
                    port.node_port,
                    'protocol':
                    port.protocol,
                } for port in service.items[0].spec.ports]
            }

[docs]    def get_execution_environment(self, execution) -> dict:
        """Retrieve the environment specified for an execution container."""
        client = self._kubernetes.client.BatchV1Api()
        job = client.list_namespaced_job(
            execution.namespace,
            label_selector='controller-uid={0}'.format(execution.engine_id))
        if not job.items:
            # FIXME: implement proper exception handling and propagation
            raise NotFound('Execution container not found.')

        return {
            e.name: e.value
            for e in job.items[0].spec.template.spec.containers[0].env
        }
Source code for renga_deployer.engines

Renga-Deployer

Navigation

Related Topics