"Fossies" - the Fresh Open Source Software Archive

Member "ironic-12.1.1/ironic/drivers/modules/agent.py" (6 Jun 2019, 43387 Bytes) of package /linux/misc/openstack/ironic-12.1.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "agent.py" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 11.1.2_vs_12.1.0.

    1 # Copyright 2014 Rackspace, Inc.
    2 #
    3 # Licensed under the Apache License, Version 2.0 (the "License");
    4 # you may not use this file except in compliance with the License.
    5 # You may obtain a copy of the License at
    6 #
    7 #    http://www.apache.org/licenses/LICENSE-2.0
    8 #
    9 # Unless required by applicable law or agreed to in writing, software
   10 # distributed under the License is distributed on an "AS IS" BASIS,
   11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   12 # See the License for the specific language governing permissions and
   13 # limitations under the License.
   14 
   15 from ironic_lib import metrics_utils
   16 from ironic_lib import utils as il_utils
   17 from oslo_log import log
   18 from oslo_utils import excutils
   19 from oslo_utils import units
   20 import six.moves.urllib_parse as urlparse
   21 
   22 from ironic.common import dhcp_factory
   23 from ironic.common import exception
   24 from ironic.common.glance_service import service_utils
   25 from ironic.common.i18n import _
   26 from ironic.common import images
   27 from ironic.common import raid
   28 from ironic.common import states
   29 from ironic.common import utils
   30 from ironic.conductor import task_manager
   31 from ironic.conductor import utils as manager_utils
   32 from ironic.conf import CONF
   33 from ironic.drivers import base
   34 from ironic.drivers.modules import agent_base_vendor
   35 from ironic.drivers.modules import boot_mode_utils
   36 from ironic.drivers.modules import deploy_utils
   37 
   38 
   39 LOG = log.getLogger(__name__)
   40 
   41 METRICS = metrics_utils.get_metrics_logger(__name__)
   42 
   43 REQUIRED_PROPERTIES = {
   44     'deploy_kernel': _('UUID (from Glance) of the deployment kernel. '
   45                        'Required.'),
   46     'deploy_ramdisk': _('UUID (from Glance) of the ramdisk with agent that is '
   47                         'used at deploy time. Required.'),
   48 }
   49 
   50 OPTIONAL_PROPERTIES = {
   51     'image_http_proxy': _('URL of a proxy server for HTTP connections. '
   52                           'Optional.'),
   53     'image_https_proxy': _('URL of a proxy server for HTTPS connections. '
   54                            'Optional.'),
   55     'image_no_proxy': _('A comma-separated list of host names, IP addresses '
   56                         'and domain names (with optional :port) that will be '
   57                         'excluded from proxying. To denote a domain name, use '
   58                         'a dot to prefix the domain name. This value will be '
   59                         'ignored if ``image_http_proxy`` and '
   60                         '``image_https_proxy`` are not specified. Optional.'),
   61 }
   62 
   63 COMMON_PROPERTIES = REQUIRED_PROPERTIES.copy()
   64 COMMON_PROPERTIES.update(OPTIONAL_PROPERTIES)
   65 COMMON_PROPERTIES.update(agent_base_vendor.VENDOR_PROPERTIES)
   66 
   67 PARTITION_IMAGE_LABELS = ('kernel', 'ramdisk', 'root_gb', 'root_mb', 'swap_mb',
   68                           'ephemeral_mb', 'ephemeral_format', 'configdrive',
   69                           'preserve_ephemeral', 'image_type',
   70                           'deploy_boot_mode')
   71 
   72 
   73 @METRICS.timer('check_image_size')
   74 def check_image_size(task, image_source):
   75     """Check if the requested image is larger than the ram size.
   76 
   77     :param task: a TaskManager instance containing the node to act on.
   78     :param image_source: href of the image.
   79     :raises: InvalidParameterValue if size of the image is greater than
   80         the available ram size.
   81     """
   82     node = task.node
   83     properties = node.properties
   84     # skip check if 'memory_mb' is not defined
   85     if 'memory_mb' not in properties:
   86         LOG.warning('Skip the image size check as memory_mb is not '
   87                     'defined in properties on node %s.', node.uuid)
   88         return
   89 
   90     image_show = images.image_show(task.context, image_source)
   91     if CONF.agent.stream_raw_images and image_show.get('disk_format') == 'raw':
   92         LOG.debug('Skip the image size check since the image is going to be '
   93                   'streamed directly onto the disk for node %s', node.uuid)
   94         return
   95 
   96     memory_size = int(properties.get('memory_mb'))
   97     image_size = int(image_show['size'])
   98     reserved_size = CONF.agent.memory_consumed_by_agent
   99     if (image_size + (reserved_size * units.Mi)) > (memory_size * units.Mi):
  100         msg = (_('Memory size is too small for requested image, if it is '
  101                  'less than (image size + reserved RAM size), will break '
  102                  'the IPA deployments. Image size: %(image_size)d MiB, '
  103                  'Memory size: %(memory_size)d MiB, Reserved size: '
  104                  '%(reserved_size)d MiB.')
  105                % {'image_size': image_size / units.Mi,
  106                   'memory_size': memory_size,
  107                   'reserved_size': reserved_size})
  108         raise exception.InvalidParameterValue(msg)
  109 
  110 
  111 @METRICS.timer('validate_image_proxies')
  112 def validate_image_proxies(node):
  113     """Check that the provided proxy parameters are valid.
  114 
  115     :param node: an Ironic node.
  116     :raises: InvalidParameterValue if any of the provided proxy parameters are
  117         incorrect.
  118     """
  119     invalid_proxies = {}
  120     for scheme in ('http', 'https'):
  121         proxy_param = 'image_%s_proxy' % scheme
  122         proxy = node.driver_info.get(proxy_param)
  123         if proxy:
  124             chunks = urlparse.urlparse(proxy)
  125             # NOTE(vdrok) If no scheme specified, this is still a valid
  126             # proxy address. It is also possible for a proxy to have a
  127             # scheme different from the one specified in the image URL,
  128             # e.g. it is possible to use https:// proxy for downloading
  129             # http:// image.
  130             if chunks.scheme not in ('', 'http', 'https'):
  131                 invalid_proxies[proxy_param] = proxy
  132     msg = ''
  133     if invalid_proxies:
  134         msg += _("Proxy URL should either have HTTP(S) scheme "
  135                  "or no scheme at all, the following URLs are "
  136                  "invalid: %s.") % invalid_proxies
  137     no_proxy = node.driver_info.get('image_no_proxy')
  138     if no_proxy is not None and not utils.is_valid_no_proxy(no_proxy):
  139         msg += _(
  140             "image_no_proxy should be a list of host names, IP addresses "
  141             "or domain names to exclude from proxying, the specified list "
  142             "%s is incorrect. To denote a domain name, prefix it with a dot "
  143             "(instead of e.g. '.*').") % no_proxy
  144     if msg:
  145         raise exception.InvalidParameterValue(msg)
  146 
  147 
  148 def validate_http_provisioning_configuration(node):
  149     """Validate configuration options required to perform HTTP provisioning.
  150 
  151     :param node: an ironic node object
  152     :raises: MissingParameterValue if required option(s) is not set.
  153     """
  154     image_source = node.instance_info.get('image_source')
  155     if (not service_utils.is_glance_image(image_source) or
  156             CONF.agent.image_download_source != 'http'):
  157         return
  158 
  159     params = {
  160         '[deploy]http_url': CONF.deploy.http_url,
  161         '[deploy]http_root': CONF.deploy.http_root,
  162         '[deploy]http_image_subdir': CONF.deploy.http_image_subdir
  163     }
  164     error_msg = _('Node %s failed to validate http provisoning. Some '
  165                   'configuration options were missing') % node.uuid
  166     deploy_utils.check_for_missing_params(params, error_msg)
  167 
  168 
  169 class AgentDeployMixin(agent_base_vendor.AgentDeployMixin):
  170 
  171     @METRICS.timer('AgentDeployMixin.deploy_has_started')
  172     def deploy_has_started(self, task):
  173         commands = self._client.get_commands_status(task.node)
  174 
  175         for command in commands:
  176             if command['command_name'] == 'prepare_image':
  177                 # deploy did start at some point
  178                 return True
  179         return False
  180 
  181     @METRICS.timer('AgentDeployMixin.deploy_is_done')
  182     def deploy_is_done(self, task):
  183         commands = self._client.get_commands_status(task.node)
  184         if not commands:
  185             return False
  186 
  187         last_command = commands[-1]
  188 
  189         if last_command['command_name'] != 'prepare_image':
  190             # catches race condition where prepare_image is still processing
  191             # so deploy hasn't started yet
  192             return False
  193 
  194         if last_command['command_status'] != 'RUNNING':
  195             return True
  196 
  197         return False
  198 
  199     @METRICS.timer('AgentDeployMixin.continue_deploy')
  200     @task_manager.require_exclusive_lock
  201     def continue_deploy(self, task):
  202         task.process_event('resume')
  203         node = task.node
  204         image_source = node.instance_info.get('image_source')
  205         LOG.debug('Continuing deploy for node %(node)s with image %(img)s',
  206                   {'node': node.uuid, 'img': image_source})
  207 
  208         image_info = {
  209             'id': image_source.split('/')[-1],
  210             'urls': [node.instance_info['image_url']],
  211             'checksum': node.instance_info['image_checksum'],
  212             # NOTE(comstud): Older versions of ironic do not set
  213             # 'disk_format' nor 'container_format', so we use .get()
  214             # to maintain backwards compatibility in case code was
  215             # upgraded in the middle of a build request.
  216             'disk_format': node.instance_info.get('image_disk_format'),
  217             'container_format': node.instance_info.get(
  218                 'image_container_format'),
  219             'stream_raw_images': CONF.agent.stream_raw_images,
  220         }
  221 
  222         if (node.instance_info.get('image_os_hash_algo') and
  223                 node.instance_info.get('image_os_hash_value')):
  224             image_info['os_hash_algo'] = node.instance_info[
  225                 'image_os_hash_algo']
  226             image_info['os_hash_value'] = node.instance_info[
  227                 'image_os_hash_value']
  228 
  229         proxies = {}
  230         for scheme in ('http', 'https'):
  231             proxy_param = 'image_%s_proxy' % scheme
  232             proxy = node.driver_info.get(proxy_param)
  233             if proxy:
  234                 proxies[scheme] = proxy
  235         if proxies:
  236             image_info['proxies'] = proxies
  237             no_proxy = node.driver_info.get('image_no_proxy')
  238             if no_proxy is not None:
  239                 image_info['no_proxy'] = no_proxy
  240 
  241         image_info['node_uuid'] = node.uuid
  242         iwdi = node.driver_internal_info.get('is_whole_disk_image')
  243         if not iwdi:
  244             for label in PARTITION_IMAGE_LABELS:
  245                 image_info[label] = node.instance_info.get(label)
  246             boot_option = deploy_utils.get_boot_option(node)
  247             image_info['deploy_boot_mode'] = (
  248                 boot_mode_utils.get_boot_mode(node))
  249             image_info['boot_option'] = boot_option
  250             disk_label = deploy_utils.get_disk_label(node)
  251             if disk_label is not None:
  252                 image_info['disk_label'] = disk_label
  253 
  254         # Tell the client to download and write the image with the given args
  255         self._client.prepare_image(node, image_info)
  256 
  257         task.process_event('wait')
  258 
  259     def _get_uuid_from_result(self, task, type_uuid):
  260         command = self._client.get_commands_status(task.node)[-1]
  261 
  262         if command['command_result'] is not None:
  263             words = command['command_result']['result'].split()
  264             for word in words:
  265                 if type_uuid in word:
  266                     result = word.split('=')[1]
  267                     if not result:
  268                         msg = (_('Command result did not return %(type_uuid)s '
  269                                  'for node %(node)s. The version of the IPA '
  270                                  'ramdisk used in the deployment might not '
  271                                  'have support for provisioning of '
  272                                  'partition images.') %
  273                                {'type_uuid': type_uuid,
  274                                 'node': task.node.uuid})
  275                         LOG.error(msg)
  276                         deploy_utils.set_failed_state(task, msg)
  277                         return
  278                     return result
  279 
  280     @METRICS.timer('AgentDeployMixin.check_deploy_success')
  281     def check_deploy_success(self, node):
  282         # should only ever be called after we've validated that
  283         # the prepare_image command is complete
  284         command = self._client.get_commands_status(node)[-1]
  285         if command['command_status'] == 'FAILED':
  286             return command['command_error']
  287 
  288     @METRICS.timer('AgentDeployMixin.reboot_to_instance')
  289     def reboot_to_instance(self, task):
  290         task.process_event('resume')
  291         node = task.node
  292         iwdi = task.node.driver_internal_info.get('is_whole_disk_image')
  293         cpu_arch = task.node.properties.get('cpu_arch')
  294         error = self.check_deploy_success(node)
  295         if error is not None:
  296             # TODO(jimrollenhagen) power off if using neutron dhcp to
  297             #                      align with pxe driver?
  298             msg = (_('node %(node)s command status errored: %(error)s') %
  299                    {'node': node.uuid, 'error': error})
  300             LOG.error(msg)
  301             deploy_utils.set_failed_state(task, msg)
  302             return
  303 
  304         # If `boot_option` is set to `netboot`, PXEBoot.prepare_instance()
  305         # would need root_uuid of the whole disk image to add it into the
  306         # pxe config to perform chain boot.
  307         # IPA would have returned us the 'root_uuid_or_disk_id' if image
  308         # being provisioned is a whole disk image. IPA would also provide us
  309         # 'efi_system_partition_uuid' if the image being provisioned is a
  310         # partition image.
  311         # In case of local boot using partition image, we need both
  312         # 'root_uuid_or_disk_id' and 'efi_system_partition_uuid' to configure
  313         # bootloader for local boot.
  314         # NOTE(mjturek): In the case of local boot using a partition image on
  315         # ppc64* hardware we need to provide the 'PReP_Boot_partition_uuid' to
  316         # direct where the bootloader should be installed.
  317         driver_internal_info = task.node.driver_internal_info
  318         root_uuid = self._get_uuid_from_result(task, 'root_uuid')
  319         if root_uuid:
  320             driver_internal_info['root_uuid_or_disk_id'] = root_uuid
  321             task.node.driver_internal_info = driver_internal_info
  322             task.node.save()
  323         elif iwdi and CONF.agent.manage_agent_boot:
  324             # IPA version less than 3.1.0 will not return root_uuid for
  325             # whole disk image. Also IPA version introduced a requirement
  326             # for hexdump utility that may not be always available. Need to
  327             # fall back to older behavior for the same.
  328             LOG.warning("With the deploy ramdisk based on Ironic Python Agent "
  329                         "version 3.1.0 and beyond, the drivers using "
  330                         "`direct` deploy interface performs `netboot` or "
  331                         "`local` boot for whole disk image based on value "
  332                         "of boot option setting. When you upgrade Ironic "
  333                         "Python Agent in your deploy ramdisk, ensure that "
  334                         "boot option is set appropriately for the node %s. "
  335                         "The boot option can be set using configuration "
  336                         "`[deploy]/default_boot_option` or as a `boot_option` "
  337                         "capability in node's `properties['capabilities']`. "
  338                         "Also please note that this functionality requires "
  339                         "`hexdump` command in the ramdisk.", node.uuid)
  340 
  341         efi_sys_uuid = None
  342         if not iwdi:
  343             if boot_mode_utils.get_boot_mode(node) == 'uefi':
  344                 efi_sys_uuid = (self._get_uuid_from_result(task,
  345                                 'efi_system_partition_uuid'))
  346 
  347         prep_boot_part_uuid = None
  348         if cpu_arch is not None and cpu_arch.startswith('ppc64'):
  349             prep_boot_part_uuid = (self._get_uuid_from_result(task,
  350                                    'PReP_Boot_partition_uuid'))
  351 
  352         LOG.info('Image successfully written to node %s', node.uuid)
  353 
  354         if CONF.agent.manage_agent_boot:
  355             # It is necessary to invoke prepare_instance() of the node's
  356             # boot interface, so that the any necessary configurations like
  357             # setting of the boot mode (e.g. UEFI secure boot) which cannot
  358             # be done on node during deploy stage can be performed.
  359             LOG.debug('Executing driver specific tasks before booting up the '
  360                       'instance for node %s', node.uuid)
  361             self.prepare_instance_to_boot(task, root_uuid,
  362                                           efi_sys_uuid, prep_boot_part_uuid)
  363         else:
  364             manager_utils.node_set_boot_device(task, 'disk', persistent=True)
  365 
  366         # Remove symbolic link when deploy is done.
  367         if CONF.agent.image_download_source == 'http':
  368             deploy_utils.remove_http_instance_symlink(task.node.uuid)
  369 
  370         LOG.debug('Rebooting node %s to instance', node.uuid)
  371         self.reboot_and_finish_deploy(task)
  372 
  373 
  374 class AgentDeploy(AgentDeployMixin, base.DeployInterface):
  375     """Interface for deploy-related actions."""
  376 
  377     def get_properties(self):
  378         """Return the properties of the interface.
  379 
  380         :returns: dictionary of <property name>:<property description> entries.
  381         """
  382         return COMMON_PROPERTIES
  383 
  384     @METRICS.timer('AgentDeploy.validate')
  385     def validate(self, task):
  386         """Validate the driver-specific Node deployment info.
  387 
  388         This method validates whether the properties of the supplied node
  389         contain the required information for this driver to deploy images to
  390         the node.
  391 
  392         :param task: a TaskManager instance
  393         :raises: MissingParameterValue, if any of the required parameters are
  394             missing.
  395         :raises: InvalidParameterValue, if any of the parameters have invalid
  396             value.
  397         """
  398         if CONF.agent.manage_agent_boot:
  399             task.driver.boot.validate(task)
  400 
  401         node = task.node
  402 
  403         # Validate node capabilities
  404         deploy_utils.validate_capabilities(node)
  405 
  406         if not task.driver.storage.should_write_image(task):
  407             # NOTE(TheJulia): There is no reason to validate
  408             # image properties if we will not be writing an image
  409             # in a boot from volume case. As such, return to the caller.
  410             LOG.debug('Skipping complete deployment interface validation '
  411                       'for node %s as it is set to boot from a remote '
  412                       'volume.', node.uuid)
  413             return
  414 
  415         params = {}
  416         image_source = node.instance_info.get('image_source')
  417         params['instance_info.image_source'] = image_source
  418         error_msg = _('Node %s failed to validate deploy image info. Some '
  419                       'parameters were missing') % node.uuid
  420 
  421         deploy_utils.check_for_missing_params(params, error_msg)
  422 
  423         if not service_utils.is_glance_image(image_source):
  424             if not node.instance_info.get('image_checksum'):
  425                 raise exception.MissingParameterValue(_(
  426                     "image_source's image_checksum must be provided in "
  427                     "instance_info for node %s") % node.uuid)
  428 
  429         validate_http_provisioning_configuration(node)
  430 
  431         check_image_size(task, image_source)
  432         # Validate the root device hints
  433         try:
  434             root_device = node.properties.get('root_device')
  435             il_utils.parse_root_device_hints(root_device)
  436         except ValueError as e:
  437             raise exception.InvalidParameterValue(
  438                 _('Failed to validate the root device hints for node '
  439                   '%(node)s. Error: %(error)s') % {'node': node.uuid,
  440                                                    'error': e})
  441 
  442         validate_image_proxies(node)
  443 
  444     @METRICS.timer('AgentDeploy.deploy')
  445     @base.deploy_step(priority=100)
  446     @task_manager.require_exclusive_lock
  447     def deploy(self, task):
  448         """Perform a deployment to a node.
  449 
  450         Perform the necessary work to deploy an image onto the specified node.
  451         This method will be called after prepare(), which may have already
  452         performed any preparatory steps, such as pre-caching some data for the
  453         node.
  454 
  455         :param task: a TaskManager instance.
  456         :returns: status of the deploy. One of ironic.common.states.
  457         """
  458         if manager_utils.is_fast_track(task):
  459             LOG.debug('Performing a fast track deployment for %(node)s.',
  460                       {'node': task.node.uuid})
  461             # Update the database for the API and the task tracking resumes
  462             # the state machine state going from DEPLOYWAIT -> DEPLOYING
  463             task.process_event('wait')
  464             self.continue_deploy(task)
  465         elif task.driver.storage.should_write_image(task):
  466             manager_utils.node_power_action(task, states.REBOOT)
  467             return states.DEPLOYWAIT
  468         else:
  469             # TODO(TheJulia): At some point, we should de-dupe this code
  470             # as it is nearly identical to the iscsi deploy interface.
  471             # This is not being done now as it is expected to be
  472             # refactored in the near future.
  473             manager_utils.node_power_action(task, states.POWER_OFF)
  474             power_state_to_restore = (
  475                 manager_utils.power_on_node_if_needed(task))
  476             task.driver.network.remove_provisioning_network(task)
  477             task.driver.network.configure_tenant_networks(task)
  478             manager_utils.restore_power_state_if_needed(
  479                 task, power_state_to_restore)
  480             task.driver.boot.prepare_instance(task)
  481             manager_utils.node_power_action(task, states.POWER_ON)
  482             LOG.info('Deployment to node %s done', task.node.uuid)
  483             return None
  484 
  485     @METRICS.timer('AgentDeploy.tear_down')
  486     @task_manager.require_exclusive_lock
  487     def tear_down(self, task):
  488         """Tear down a previous deployment on the task's node.
  489 
  490         :param task: a TaskManager instance.
  491         :returns: status of the deploy. One of ironic.common.states.
  492         :raises: NetworkError if the cleaning ports cannot be removed.
  493         :raises: InvalidParameterValue when the wrong power state is specified
  494              or the wrong driver info is specified for power management.
  495         :raises: StorageError when the storage interface attached volumes fail
  496              to detach.
  497         :raises: other exceptions by the node's power driver if something
  498              wrong occurred during the power action.
  499         """
  500         manager_utils.node_power_action(task, states.POWER_OFF)
  501         task.driver.storage.detach_volumes(task)
  502         deploy_utils.tear_down_storage_configuration(task)
  503         power_state_to_restore = manager_utils.power_on_node_if_needed(task)
  504         task.driver.network.unconfigure_tenant_networks(task)
  505         # NOTE(mgoddard): If the deployment was unsuccessful the node may have
  506         # ports on the provisioning network which were not deleted.
  507         task.driver.network.remove_provisioning_network(task)
  508         manager_utils.restore_power_state_if_needed(
  509             task, power_state_to_restore)
  510         return states.DELETED
  511 
  512     @METRICS.timer('AgentDeploy.prepare')
  513     @task_manager.require_exclusive_lock
  514     def prepare(self, task):
  515         """Prepare the deployment environment for this node.
  516 
  517         :param task: a TaskManager instance.
  518         :raises: NetworkError: if the previous cleaning ports cannot be removed
  519             or if new cleaning ports cannot be created.
  520         :raises: InvalidParameterValue when the wrong power state is specified
  521             or the wrong driver info is specified for power management.
  522         :raises: StorageError If the storage driver is unable to attach the
  523             configured volumes.
  524         :raises: other exceptions by the node's power driver if something
  525             wrong occurred during the power action.
  526         :raises: exception.ImageRefValidationFailed if image_source is not
  527             Glance href and is not HTTP(S) URL.
  528         :raises: exception.InvalidParameterValue if network validation fails.
  529         :raises: any boot interface's prepare_ramdisk exceptions.
  530         """
  531 
  532         def _update_instance_info():
  533             node.instance_info = (
  534                 deploy_utils.build_instance_info_for_deploy(task))
  535             node.save()
  536 
  537         node = task.node
  538         deploy_utils.populate_storage_driver_internal_info(task)
  539         if node.provision_state == states.DEPLOYING:
  540             # Validate network interface to ensure that it supports boot
  541             # options configured on the node.
  542             try:
  543                 task.driver.network.validate(task)
  544             except exception.InvalidParameterValue:
  545                 # For 'neutron' network interface validation will fail
  546                 # if node is using 'netboot' boot option while provisioning
  547                 # a whole disk image. Updating 'boot_option' in node's
  548                 # 'instance_info' to 'local for backward compatibility.
  549                 # TODO(stendulker): Fail here once the default boot
  550                 # option is local.
  551                 with excutils.save_and_reraise_exception(reraise=False) as ctx:
  552                     instance_info = node.instance_info
  553                     capabilities = utils.parse_instance_info_capabilities(node)
  554                     if 'boot_option' not in capabilities:
  555                         capabilities['boot_option'] = 'local'
  556                         instance_info['capabilities'] = capabilities
  557                         node.instance_info = instance_info
  558                         node.save()
  559                         # Re-validate the network interface
  560                         task.driver.network.validate(task)
  561                     else:
  562                         ctx.reraise = True
  563             # Determine if this is a fast track sequence
  564             fast_track_deploy = manager_utils.is_fast_track(task)
  565             if fast_track_deploy:
  566                 # The agent has already recently checked in and we are
  567                 # configured to take that as an indicator that we can
  568                 # skip ahead.
  569                 LOG.debug('The agent for node %(node)s has recently checked '
  570                           'in, and the node power will remain unmodified.',
  571                           {'node': task.node.uuid})
  572             else:
  573                 # Powering off node to setup networking for port and
  574                 # ensure that the state is reset if it is inadvertently
  575                 # on for any unknown reason.
  576                 manager_utils.node_power_action(task, states.POWER_OFF)
  577             if task.driver.storage.should_write_image(task):
  578                 # NOTE(vdrok): in case of rebuild, we have tenant network
  579                 # already configured, unbind tenant ports if present
  580                 if not fast_track_deploy:
  581                     power_state_to_restore = (
  582                         manager_utils.power_on_node_if_needed(task))
  583 
  584                 task.driver.network.unconfigure_tenant_networks(task)
  585                 task.driver.network.add_provisioning_network(task)
  586                 if not fast_track_deploy:
  587                     manager_utils.restore_power_state_if_needed(
  588                         task, power_state_to_restore)
  589                 else:
  590                     # Fast track sequence in progress
  591                     _update_instance_info()
  592             # Signal to storage driver to attach volumes
  593             task.driver.storage.attach_volumes(task)
  594             if (not task.driver.storage.should_write_image(task)
  595                 or fast_track_deploy):
  596                 # We have nothing else to do as this is handled in the
  597                 # backend storage system, and we can return to the caller
  598                 # as we do not need to boot the agent to deploy.
  599                 # Alternatively, we could be in a fast track deployment
  600                 # and again, we should have nothing to do here.
  601                 return
  602         if node.provision_state in (states.ACTIVE, states.UNRESCUING):
  603             # Call is due to conductor takeover
  604             task.driver.boot.prepare_instance(task)
  605         elif node.provision_state != states.ADOPTING:
  606             if node.provision_state not in (states.RESCUING, states.RESCUEWAIT,
  607                                             states.RESCUE, states.RESCUEFAIL):
  608                 _update_instance_info()
  609             if CONF.agent.manage_agent_boot:
  610                 deploy_opts = deploy_utils.build_agent_options(node)
  611                 task.driver.boot.prepare_ramdisk(task, deploy_opts)
  612 
  613     @METRICS.timer('AgentDeploy.clean_up')
  614     @task_manager.require_exclusive_lock
  615     def clean_up(self, task):
  616         """Clean up the deployment environment for this node.
  617 
  618         If preparation of the deployment environment ahead of time is possible,
  619         this method should be implemented by the driver. It should erase
  620         anything cached by the `prepare` method.
  621 
  622         If implemented, this method must be idempotent. It may be called
  623         multiple times for the same node on the same conductor, and it may be
  624         called by multiple conductors in parallel. Therefore, it must not
  625         require an exclusive lock.
  626 
  627         This method is called before `tear_down`.
  628 
  629         :param task: a TaskManager instance.
  630         """
  631         if CONF.agent.manage_agent_boot:
  632             task.driver.boot.clean_up_ramdisk(task)
  633         task.driver.boot.clean_up_instance(task)
  634         provider = dhcp_factory.DHCPFactory()
  635         provider.clean_dhcp(task)
  636         if CONF.agent.image_download_source == 'http':
  637             deploy_utils.destroy_http_instance_images(task.node)
  638 
  639     def take_over(self, task):
  640         """Take over management of this node from a dead conductor.
  641 
  642         :param task: a TaskManager instance.
  643         """
  644         pass
  645 
  646     @METRICS.timer('AgentDeploy.get_clean_steps')
  647     def get_clean_steps(self, task):
  648         """Get the list of clean steps from the agent.
  649 
  650         :param task: a TaskManager object containing the node
  651         :raises NodeCleaningFailure: if the clean steps are not yet
  652             available (cached), for example, when a node has just been
  653             enrolled and has not been cleaned yet.
  654         :returns: A list of clean step dictionaries
  655         """
  656         new_priorities = {
  657             'erase_devices': CONF.deploy.erase_devices_priority,
  658             'erase_devices_metadata':
  659                 CONF.deploy.erase_devices_metadata_priority,
  660         }
  661         return deploy_utils.agent_get_clean_steps(
  662             task, interface='deploy',
  663             override_priorities=new_priorities)
  664 
  665     @METRICS.timer('AgentDeploy.execute_clean_step')
  666     def execute_clean_step(self, task, step):
  667         """Execute a clean step asynchronously on the agent.
  668 
  669         :param task: a TaskManager object containing the node
  670         :param step: a clean step dictionary to execute
  671         :raises: NodeCleaningFailure if the agent does not return a command
  672             status
  673         :returns: states.CLEANWAIT to signify the step will be completed async
  674         """
  675         return deploy_utils.agent_execute_clean_step(task, step)
  676 
  677     @METRICS.timer('AgentDeploy.prepare_cleaning')
  678     def prepare_cleaning(self, task):
  679         """Boot into the agent to prepare for cleaning.
  680 
  681         :param task: a TaskManager object containing the node
  682         :raises: NodeCleaningFailure, NetworkError if the previous cleaning
  683             ports cannot be removed or if new cleaning ports cannot be created.
  684         :raises: InvalidParameterValue if cleaning network UUID config option
  685             has an invalid value.
  686         :returns: states.CLEANWAIT to signify an asynchronous prepare
  687         """
  688         return deploy_utils.prepare_inband_cleaning(
  689             task, manage_boot=CONF.agent.manage_agent_boot)
  690 
  691     @METRICS.timer('AgentDeploy.tear_down_cleaning')
  692     def tear_down_cleaning(self, task):
  693         """Clean up the PXE and DHCP files after cleaning.
  694 
  695         :param task: a TaskManager object containing the node
  696         :raises: NodeCleaningFailure, NetworkError if the cleaning ports cannot
  697             be removed
  698         """
  699         deploy_utils.tear_down_inband_cleaning(
  700             task, manage_boot=CONF.agent.manage_agent_boot)
  701 
  702 
  703 class AgentRAID(base.RAIDInterface):
  704     """Implementation of RAIDInterface which uses agent ramdisk."""
  705 
  706     def get_properties(self):
  707         """Return the properties of the interface."""
  708         return {}
  709 
  710     @METRICS.timer('AgentRAID.create_configuration')
  711     @base.clean_step(priority=0)
  712     def create_configuration(self, task,
  713                              create_root_volume=True,
  714                              create_nonroot_volumes=True):
  715         """Create a RAID configuration on a bare metal using agent ramdisk.
  716 
  717         This method creates a RAID configuration on the given node.
  718 
  719         :param task: a TaskManager instance.
  720         :param create_root_volume: If True, a root volume is created
  721             during RAID configuration. Otherwise, no root volume is
  722             created. Default is True.
  723         :param create_nonroot_volumes: If True, non-root volumes are
  724             created. If False, no non-root volumes are created. Default
  725             is True.
  726         :returns: states.CLEANWAIT if operation was successfully invoked.
  727         :raises: MissingParameterValue, if node.target_raid_config is missing
  728             or was found to be empty after skipping root volume and/or non-root
  729             volumes.
  730         """
  731         node = task.node
  732         LOG.debug("Agent RAID create_configuration invoked for node %(node)s "
  733                   "with create_root_volume=%(create_root_volume)s and "
  734                   "create_nonroot_volumes=%(create_nonroot_volumes)s with the "
  735                   "following target_raid_config: %(target_raid_config)s.",
  736                   {'node': node.uuid,
  737                    'create_root_volume': create_root_volume,
  738                    'create_nonroot_volumes': create_nonroot_volumes,
  739                    'target_raid_config': node.target_raid_config})
  740 
  741         target_raid_config = raid.filter_target_raid_config(
  742             node,
  743             create_root_volume=create_root_volume,
  744             create_nonroot_volumes=create_nonroot_volumes)
  745         # Rewrite it back to the node object, but no need to save it as
  746         # we need to just send this to the agent ramdisk.
  747         node.driver_internal_info['target_raid_config'] = target_raid_config
  748 
  749         LOG.debug("Calling agent RAID create_configuration for node %(node)s "
  750                   "with the following target RAID configuration: %(target)s",
  751                   {'node': node.uuid, 'target': target_raid_config})
  752         step = node.clean_step
  753         return deploy_utils.agent_execute_clean_step(task, step)
  754 
  755     @staticmethod
  756     @agent_base_vendor.post_clean_step_hook(
  757         interface='raid', step='create_configuration')
  758     def _create_configuration_final(task, command):
  759         """Clean step hook after a RAID configuration was created.
  760 
  761         This method is invoked as a post clean step hook by the Ironic
  762         conductor once a create raid configuration is completed successfully.
  763         The node (properties, capabilities, RAID information) will be updated
  764         to reflect the actual RAID configuration that was created.
  765 
  766         :param task: a TaskManager instance.
  767         :param command: A command result structure of the RAID operation
  768             returned from agent ramdisk on query of the status of command(s).
  769         :raises: InvalidParameterValue, if 'current_raid_config' has more than
  770             one root volume or if node.properties['capabilities'] is malformed.
  771         :raises: IronicException, if clean_result couldn't be found within
  772             the 'command' argument passed.
  773         """
  774         try:
  775             clean_result = command['command_result']['clean_result']
  776         except KeyError:
  777             raise exception.IronicException(
  778                 _("Agent ramdisk didn't return a proper command result while "
  779                   "cleaning %(node)s. It returned '%(result)s' after command "
  780                   "execution.") % {'node': task.node.uuid,
  781                                    'result': command})
  782 
  783         raid.update_raid_info(task.node, clean_result)
  784 
  785     @METRICS.timer('AgentRAID.delete_configuration')
  786     @base.clean_step(priority=0)
  787     def delete_configuration(self, task):
  788         """Deletes RAID configuration on the given node.
  789 
  790         :param task: a TaskManager instance.
  791         :returns: states.CLEANWAIT if operation was successfully invoked
  792         """
  793         LOG.debug("Agent RAID delete_configuration invoked for node %s.",
  794                   task.node.uuid)
  795         step = task.node.clean_step
  796         return deploy_utils.agent_execute_clean_step(task, step)
  797 
  798     @staticmethod
  799     @agent_base_vendor.post_clean_step_hook(
  800         interface='raid', step='delete_configuration')
  801     def _delete_configuration_final(task, command):
  802         """Clean step hook after RAID configuration was deleted.
  803 
  804         This method is invoked as a post clean step hook by the Ironic
  805         conductor once a delete raid configuration is completed successfully.
  806         It sets node.raid_config to empty dictionary.
  807 
  808         :param task: a TaskManager instance.
  809         :param command: A command result structure of the RAID operation
  810             returned from agent ramdisk on query of the status of command(s).
  811         :returns: None
  812         """
  813         task.node.raid_config = {}
  814         task.node.save()
  815 
  816 
  817 class AgentRescue(base.RescueInterface):
  818     """Implementation of RescueInterface which uses agent ramdisk."""
  819 
  820     def get_properties(self):
  821         """Return the properties of the interface. """
  822         return {}
  823 
  824     @METRICS.timer('AgentRescue.rescue')
  825     @task_manager.require_exclusive_lock
  826     def rescue(self, task):
  827         """Boot a rescue ramdisk on the node.
  828 
  829         :param task: a TaskManager instance.
  830         :raises: NetworkError if the tenant ports cannot be removed.
  831         :raises: InvalidParameterValue when the wrong power state is specified
  832              or the wrong driver info is specified for power management.
  833         :raises: other exceptions by the node's power driver if something
  834              wrong occurred during the power action.
  835         :raises: any boot interface's prepare_ramdisk exceptions.
  836         :returns: Returns states.RESCUEWAIT
  837         """
  838         manager_utils.node_power_action(task, states.POWER_OFF)
  839         # NOTE(TheJulia): Revealing that the power is off at any time can
  840         # cause external power sync to decide that the node must be off.
  841         # This may result in a post-rescued instance being turned off
  842         # unexpectedly after rescue has started.
  843         # TODO(TheJulia): Once we have power/state callbacks to nova,
  844         # the reset of the power_state can be removed.
  845         task.node.power_state = states.POWER_ON
  846         task.node.save()
  847 
  848         task.driver.boot.clean_up_instance(task)
  849         power_state_to_restore = manager_utils.power_on_node_if_needed(task)
  850         task.driver.network.unconfigure_tenant_networks(task)
  851         task.driver.network.add_rescuing_network(task)
  852         manager_utils.restore_power_state_if_needed(
  853             task, power_state_to_restore)
  854         if CONF.agent.manage_agent_boot:
  855             ramdisk_opts = deploy_utils.build_agent_options(task.node)
  856             # prepare_ramdisk will set the boot device
  857             task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
  858         manager_utils.node_power_action(task, states.POWER_ON)
  859 
  860         return states.RESCUEWAIT
  861 
  862     @METRICS.timer('AgentRescue.unrescue')
  863     @task_manager.require_exclusive_lock
  864     def unrescue(self, task):
  865         """Attempt to move a rescued node back to active state.
  866 
  867         :param task: a TaskManager instance.
  868         :raises: NetworkError if the rescue ports cannot be removed.
  869         :raises: InvalidParameterValue when the wrong power state is specified
  870              or the wrong driver info is specified for power management.
  871         :raises: other exceptions by the node's power driver if something
  872              wrong occurred during the power action.
  873         :raises: any boot interface's prepare_instance exceptions.
  874         :returns: Returns states.ACTIVE
  875         """
  876         manager_utils.node_power_action(task, states.POWER_OFF)
  877 
  878         # NOTE(TheJulia): Revealing that the power is off at any time can
  879         # cause external power sync to decide that the node must be off.
  880         # This may result in a post-rescued insance being turned off
  881         # unexpectedly after unrescue.
  882         # TODO(TheJulia): Once we have power/state callbacks to nova,
  883         # the reset of the power_state can be removed.
  884         task.node.power_state = states.POWER_ON
  885         task.node.save()
  886 
  887         self.clean_up(task)
  888         power_state_to_restore = manager_utils.power_on_node_if_needed(task)
  889         task.driver.network.configure_tenant_networks(task)
  890         manager_utils.restore_power_state_if_needed(
  891             task, power_state_to_restore)
  892         task.driver.boot.prepare_instance(task)
  893         manager_utils.node_power_action(task, states.POWER_ON)
  894 
  895         return states.ACTIVE
  896 
  897     @METRICS.timer('AgentRescue.validate')
  898     def validate(self, task):
  899         """Validate that the node has required properties for agent rescue.
  900 
  901         :param task: a TaskManager instance with the node being checked
  902         :raises: InvalidParameterValue if 'instance_info/rescue_password' has
  903             empty password or rescuing network UUID config option
  904             has an invalid value.
  905         :raises: MissingParameterValue if node is missing one or more required
  906             parameters
  907         """
  908         # Validate rescuing network
  909         task.driver.network.validate_rescue(task)
  910         if CONF.agent.manage_agent_boot:
  911             # Validate boot properties
  912             task.driver.boot.validate(task)
  913             # Validate boot properties related to rescue
  914             task.driver.boot.validate_rescue(task)
  915 
  916         node = task.node
  917         rescue_pass = node.instance_info.get('rescue_password')
  918         if rescue_pass is None:
  919             msg = _("Node %(node)s is missing "
  920                     "'instance_info/rescue_password'. "
  921                     "It is required for rescuing node.")
  922             raise exception.MissingParameterValue(msg % {'node': node.uuid})
  923 
  924         if not rescue_pass.strip():
  925             msg = (_("The 'instance_info/rescue_password' is an empty string "
  926                      "for node %s. The 'rescue_password' must be a non-empty "
  927                      "string value.") % node.uuid)
  928             raise exception.InvalidParameterValue(msg)
  929 
  930     @METRICS.timer('AgentRescue.clean_up')
  931     def clean_up(self, task):
  932         """Clean up after RESCUEWAIT timeout/failure or finishing rescue.
  933 
  934         Rescue password should be removed from the node and ramdisk boot
  935         environment should be cleaned if Ironic is managing the ramdisk boot.
  936 
  937         :param task: a TaskManager instance with the node.
  938         :raises: NetworkError if the rescue ports cannot be removed.
  939         """
  940         manager_utils.remove_node_rescue_password(task.node, save=True)
  941         if CONF.agent.manage_agent_boot:
  942             task.driver.boot.clean_up_ramdisk(task)
  943         power_state_to_restore = manager_utils.power_on_node_if_needed(task)
  944         task.driver.network.remove_rescuing_network(task)
  945         manager_utils.restore_power_state_if_needed(
  946             task, power_state_to_restore)