"Fossies" - the Fresh Open Source Software Archive 
Member "nova-22.0.1/nova/virt/libvirt/driver.py" (19 Nov 2020, 520928 Bytes) of package /linux/misc/openstack/nova-22.0.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "driver.py" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
22.0.0_vs_22.0.1.
1 # Copyright 2010 United States Government as represented by the
2 # Administrator of the National Aeronautics and Space Administration.
3 # All Rights Reserved.
4 # Copyright (c) 2010 Citrix Systems, Inc.
5 # Copyright (c) 2011 Piston Cloud Computing, Inc
6 # Copyright (c) 2012 University Of Minho
7 # (c) Copyright 2013 Hewlett-Packard Development Company, L.P.
8 #
9 # Licensed under the Apache License, Version 2.0 (the "License"); you may
10 # not use this file except in compliance with the License. You may obtain
11 # a copy of the License at
12 #
13 # http://www.apache.org/licenses/LICENSE-2.0
14 #
15 # Unless required by applicable law or agreed to in writing, software
16 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18 # License for the specific language governing permissions and limitations
19 # under the License.
20
21 """
22 A connection to a hypervisor through libvirt.
23
24 Supports KVM, LXC, QEMU, UML, XEN and Parallels.
25
26 """
27
28 import binascii
29 import collections
30 from collections import deque
31 import contextlib
32 import copy
33 import errno
34 import functools
35 import glob
36 import grp
37 import itertools
38 import operator
39 import os
40 import pwd
41 import random
42 import shutil
43 import sys
44 import tempfile
45 import time
46 import typing as ty
47 import uuid
48
49 from castellan import key_manager
50 from copy import deepcopy
51 import eventlet
52 from eventlet import greenthread
53 from eventlet import tpool
54 from lxml import etree
55 from os_brick import encryptors
56 from os_brick.encryptors import luks as luks_encryptor
57 from os_brick import exception as brick_exception
58 from os_brick.initiator import connector
59 import os_resource_classes as orc
60 import os_traits as ot
61 from oslo_concurrency import processutils
62 from oslo_log import log as logging
63 from oslo_serialization import base64
64 from oslo_serialization import jsonutils
65 from oslo_service import loopingcall
66 from oslo_utils import encodeutils
67 from oslo_utils import excutils
68 from oslo_utils import fileutils
69 from oslo_utils import importutils
70 from oslo_utils import netutils as oslo_netutils
71 from oslo_utils import strutils
72 from oslo_utils import timeutils
73 from oslo_utils import units
74 from oslo_utils import uuidutils
75 import six
76 from six.moves import range
77
78 from nova.api.metadata import base as instance_metadata
79 from nova.api.metadata import password
80 from nova import block_device
81 from nova.compute import power_state
82 from nova.compute import provider_tree
83 from nova.compute import task_states
84 from nova.compute import utils as compute_utils
85 from nova.compute import vm_states
86 import nova.conf
87 from nova.console import serial as serial_console
88 from nova.console import type as ctype
89 from nova import context as nova_context
90 from nova import crypto
91 from nova.db import constants as db_const
92 from nova import exception
93 from nova.i18n import _
94 from nova.image import glance
95 from nova.network import model as network_model
96 from nova import objects
97 from nova.objects import diagnostics as diagnostics_obj
98 from nova.objects import fields
99 from nova.pci import manager as pci_manager
100 from nova.pci import utils as pci_utils
101 import nova.privsep.libvirt
102 import nova.privsep.path
103 import nova.privsep.utils
104 from nova.storage import rbd_utils
105 from nova import utils
106 from nova import version
107 from nova.virt import arch
108 from nova.virt import block_device as driver_block_device
109 from nova.virt import configdrive
110 from nova.virt.disk import api as disk_api
111 from nova.virt.disk.vfs import guestfs
112 from nova.virt import driver
113 from nova.virt import hardware
114 from nova.virt.image import model as imgmodel
115 from nova.virt import images
116 from nova.virt.libvirt import blockinfo
117 from nova.virt.libvirt import config as vconfig
118 from nova.virt.libvirt import designer
119 from nova.virt.libvirt import guest as libvirt_guest
120 from nova.virt.libvirt import host
121 from nova.virt.libvirt import imagebackend
122 from nova.virt.libvirt import imagecache
123 from nova.virt.libvirt import instancejobtracker
124 from nova.virt.libvirt import migration as libvirt_migrate
125 from nova.virt.libvirt.storage import dmcrypt
126 from nova.virt.libvirt.storage import lvm
127 from nova.virt.libvirt import utils as libvirt_utils
128 from nova.virt.libvirt import vif as libvirt_vif
129 from nova.virt.libvirt.volume import fs
130 from nova.virt.libvirt.volume import mount
131 from nova.virt.libvirt.volume import remotefs
132 from nova.virt import netutils
133 from nova.volume import cinder
134
135 libvirt: ty.Any = None
136
137 uefi_logged = False
138
139 LOG = logging.getLogger(__name__)
140
141 CONF = nova.conf.CONF
142
143 DEFAULT_UEFI_LOADER_PATH = {
144 "x86_64": ['/usr/share/OVMF/OVMF_CODE.fd',
145 '/usr/share/OVMF/OVMF_CODE.secboot.fd',
146 '/usr/share/qemu/ovmf-x86_64-code.bin'],
147 "aarch64": ['/usr/share/AAVMF/AAVMF_CODE.fd',
148 '/usr/share/qemu/aavmf-aarch64-code.bin']
149 }
150
151 MAX_CONSOLE_BYTES = 100 * units.Ki
152 VALID_DISK_CACHEMODES = [
153 "default", "none", "writethrough", "writeback", "directsync", "unsafe",
154 ]
155
156 # The libvirt driver will prefix any disable reason codes with this string.
157 DISABLE_PREFIX = 'AUTO: '
158 # Disable reason for the service which was enabled or disabled without reason
159 DISABLE_REASON_UNDEFINED = None
160
161 # Guest config console string
162 CONSOLE = "console=tty0 console=ttyS0 console=hvc0"
163
164 GuestNumaConfig = collections.namedtuple(
165 'GuestNumaConfig', ['cpuset', 'cputune', 'numaconfig', 'numatune'])
166
167
168 class InjectionInfo(collections.namedtuple(
169 'InjectionInfo', ['network_info', 'files', 'admin_pass'])):
170 __slots__ = ()
171
172 def __repr__(self):
173 return ('InjectionInfo(network_info=%r, files=%r, '
174 'admin_pass=<SANITIZED>)') % (self.network_info, self.files)
175
176
177 libvirt_volume_drivers = [
178 'iscsi=nova.virt.libvirt.volume.iscsi.LibvirtISCSIVolumeDriver',
179 'iser=nova.virt.libvirt.volume.iser.LibvirtISERVolumeDriver',
180 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
181 'fake=nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
182 'rbd=nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
183 'nfs=nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
184 'smbfs=nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
185 'fibre_channel='
186 'nova.virt.libvirt.volume.fibrechannel.'
187 'LibvirtFibreChannelVolumeDriver',
188 'gpfs=nova.virt.libvirt.volume.gpfs.LibvirtGPFSVolumeDriver',
189 'quobyte=nova.virt.libvirt.volume.quobyte.LibvirtQuobyteVolumeDriver',
190 'scaleio=nova.virt.libvirt.volume.scaleio.LibvirtScaleIOVolumeDriver',
191 'vzstorage='
192 'nova.virt.libvirt.volume.vzstorage.LibvirtVZStorageVolumeDriver',
193 'storpool=nova.virt.libvirt.volume.storpool.LibvirtStorPoolVolumeDriver',
194 'nvmeof=nova.virt.libvirt.volume.nvme.LibvirtNVMEVolumeDriver',
195 ]
196
197
198 def patch_tpool_proxy():
199 """eventlet.tpool.Proxy doesn't work with old-style class in __str__()
200 or __repr__() calls. See bug #962840 for details.
201 We perform a monkey patch to replace those two instance methods.
202 """
203 def str_method(self):
204 return str(self._obj)
205
206 def repr_method(self):
207 return repr(self._obj)
208
209 tpool.Proxy.__str__ = str_method
210 tpool.Proxy.__repr__ = repr_method
211
212
213 patch_tpool_proxy()
214
215 # For information about when MIN_LIBVIRT_VERSION and
216 # NEXT_MIN_LIBVIRT_VERSION can be changed, consult
217 #
218 # https://wiki.openstack.org/wiki/LibvirtDistroSupportMatrix
219 #
220 # Currently this is effectively the min version for i686/x86_64
221 # + KVM/QEMU, as other architectures/hypervisors require newer
222 # versions. Over time, this will become a common min version
223 # for all architectures/hypervisors, as this value rises to
224 # meet them.
225 MIN_LIBVIRT_VERSION = (5, 0, 0)
226 MIN_QEMU_VERSION = (4, 0, 0)
227 # TODO(berrange): Re-evaluate this at start of each release cycle
228 # to decide if we want to plan a future min version bump.
229 # MIN_LIBVIRT_VERSION can be updated to match this after
230 # NEXT_MIN_LIBVIRT_VERSION has been at a higher value for
231 # one cycle
232 NEXT_MIN_LIBVIRT_VERSION = (6, 0, 0)
233 NEXT_MIN_QEMU_VERSION = (4, 2, 0)
234
235 # Virtuozzo driver support
236 MIN_VIRTUOZZO_VERSION = (7, 0, 0)
237
238
239 # Names of the types that do not get compressed during migration
240 NO_COMPRESSION_TYPES = ('qcow2',)
241
242
243 # number of serial console limit
244 QEMU_MAX_SERIAL_PORTS = 4
245 # Qemu supports 4 serial consoles, we remove 1 because of the PTY one defined
246 ALLOWED_QEMU_SERIAL_PORTS = QEMU_MAX_SERIAL_PORTS - 1
247
248 VGPU_RESOURCE_SEMAPHORE = 'vgpu_resources'
249
250 LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_'
251
252 MIN_LIBVIRT_FILE_BACKED_DISCARD_VERSION = (4, 4, 0)
253
254 MIN_LIBVIRT_NATIVE_TLS_VERSION = (4, 4, 0)
255 MIN_QEMU_NATIVE_TLS_VERSION = (2, 11, 0)
256
257 # If the host has this libvirt version, then we skip the retry loop of
258 # instance destroy() call, as libvirt itself increased the wait time
259 # before the SIGKILL signal takes effect.
260 MIN_LIBVIRT_BETTER_SIGKILL_HANDLING = (4, 7, 0)
261
262 # Persistent Memory (PMEM/NVDIMM) Device Support
263 MIN_LIBVIRT_PMEM_SUPPORT = (5, 0, 0)
264 MIN_QEMU_PMEM_SUPPORT = (3, 1, 0)
265
266 # -blockdev support (replacing -drive)
267 MIN_LIBVIRT_BLOCKDEV = (6, 0, 0)
268 MIN_QEMU_BLOCKDEV = (4, 2, 0)
269
270 MIN_LIBVIRT_VIR_ERR_DEVICE_MISSING = (4, 1, 0)
271
272 # Virtual TPM (vTPM) support
273 MIN_LIBVIRT_VTPM = (5, 6, 0)
274
275 MIN_LIBVIRT_S390X_CPU_COMPARE = (5, 9, 0)
276
277 # see https://libvirt.org/formatdomain.html#elementsVideo
278 MIN_LIBVIRT_VIDEO_MODEL_VERSIONS = {
279 fields.VideoModel.NONE: (4, 6, 0),
280 }
281
282
283 class LibvirtDriver(driver.ComputeDriver):
284 def __init__(self, virtapi, read_only=False):
285 # NOTE(aspiers) Some of these are dynamic, so putting
286 # capabilities on the instance rather than on the class.
287 # This prevents the risk of one test setting a capability
288 # which bleeds over into other tests.
289
290 # LVM and RBD require raw images. If we are not configured to
291 # force convert images into raw format, then we _require_ raw
292 # images only.
293 raw_only = ('rbd', 'lvm')
294 requires_raw_image = (CONF.libvirt.images_type in raw_only and
295 not CONF.force_raw_images)
296 requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
297
298 self.capabilities = {
299 "has_imagecache": True,
300 "supports_evacuate": True,
301 "supports_migrate_to_same_host": False,
302 "supports_attach_interface": True,
303 "supports_device_tagging": True,
304 "supports_tagged_attach_interface": True,
305 "supports_tagged_attach_volume": True,
306 "supports_extend_volume": True,
307 "supports_multiattach": True,
308 "supports_trusted_certs": True,
309 # Supported image types
310 "supports_image_type_aki": True,
311 "supports_image_type_ari": True,
312 "supports_image_type_ami": True,
313 # FIXME(danms): I can see a future where people might want to
314 # configure certain compute nodes to not allow giant raw images
315 # to be booted (like nodes that are across a WAN). Thus, at some
316 # point we may want to be able to _not_ expose "supports raw" on
317 # some nodes by policy. Until then, raw is always supported.
318 "supports_image_type_raw": True,
319 "supports_image_type_iso": True,
320 # NOTE(danms): Certain backends do not work with complex image
321 # formats. If we are configured for those backends, then we
322 # should not expose the corresponding support traits.
323 "supports_image_type_qcow2": not requires_raw_image,
324 "supports_image_type_ploop": requires_ploop_image,
325 "supports_pcpus": True,
326 "supports_accelerators": True,
327 "supports_bfv_rescue": True,
328 "supports_vtpm": CONF.libvirt.swtpm_enabled,
329 }
330 super(LibvirtDriver, self).__init__(virtapi)
331
332 if not sys.platform.startswith('linux'):
333 raise exception.InternalError(
334 _('The libvirt driver only works on Linux'))
335
336 global libvirt
337 if libvirt is None:
338 libvirt = importutils.import_module('libvirt')
339 libvirt_migrate.libvirt = libvirt
340
341 self._host = host.Host(self._uri(), read_only,
342 lifecycle_event_handler=self.emit_event,
343 conn_event_handler=self._handle_conn_event)
344 self._supported_perf_events = []
345
346 self.vif_driver = libvirt_vif.LibvirtGenericVIFDriver()
347
348 # TODO(mriedem): Long-term we should load up the volume drivers on
349 # demand as needed rather than doing this on startup, as there might
350 # be unsupported volume drivers in this list based on the underlying
351 # platform.
352 self.volume_drivers = self._get_volume_drivers()
353
354 self._disk_cachemode = None
355 self.image_cache_manager = imagecache.ImageCacheManager()
356 self.image_backend = imagebackend.Backend(CONF.use_cow_images)
357
358 self.disk_cachemodes = {}
359
360 for mode_str in CONF.libvirt.disk_cachemodes:
361 disk_type, sep, cache_mode = mode_str.partition('=')
362 if cache_mode not in VALID_DISK_CACHEMODES:
363 LOG.warning('Invalid cachemode %(cache_mode)s specified '
364 'for disk type %(disk_type)s.',
365 {'cache_mode': cache_mode, 'disk_type': disk_type})
366 continue
367 self.disk_cachemodes[disk_type] = cache_mode
368
369 self._volume_api = cinder.API()
370 self._image_api = glance.API()
371
372 # The default choice for the sysinfo_serial config option is "unique"
373 # which does not have a special function since the value is just the
374 # instance.uuid.
375 sysinfo_serial_funcs = {
376 'none': lambda: None,
377 'hardware': self._get_host_sysinfo_serial_hardware,
378 'os': self._get_host_sysinfo_serial_os,
379 'auto': self._get_host_sysinfo_serial_auto,
380 }
381
382 self._sysinfo_serial_func = sysinfo_serial_funcs.get(
383 CONF.libvirt.sysinfo_serial, lambda: None)
384
385 self.job_tracker = instancejobtracker.InstanceJobTracker()
386 self._remotefs = remotefs.RemoteFilesystem()
387
388 self._live_migration_flags = self._block_migration_flags = 0
389 self.active_migrations = {}
390
391 # Compute reserved hugepages from conf file at the very
392 # beginning to ensure any syntax error will be reported and
393 # avoid any re-calculation when computing resources.
394 self._reserved_hugepages = hardware.numa_get_reserved_huge_pages()
395
396 # Copy of the compute service ProviderTree object that is updated
397 # every time update_provider_tree() is called.
398 # NOTE(sbauza): We only want a read-only cache, this attribute is not
399 # intended to be updatable directly
400 self.provider_tree: provider_tree.ProviderTree = None
401
402 # driver traits will not change during the runtime of the agent
403 # so calcuate them once and save them
404 self._static_traits = None
405
406 # The CPU models in the configuration are case-insensitive, but the CPU
407 # model in the libvirt is case-sensitive, therefore create a mapping to
408 # map the lower case CPU model name to normal CPU model name.
409 self.cpu_models_mapping = {}
410 self.cpu_model_flag_mapping = {}
411
412 self._vpmems_by_name, self._vpmems_by_rc = self._discover_vpmems(
413 vpmem_conf=CONF.libvirt.pmem_namespaces)
414
415 # We default to not support vGPUs unless the configuration is set.
416 self.pgpu_type_mapping = collections.defaultdict(str)
417 self.supported_vgpu_types = self._get_supported_vgpu_types()
418
419 def _discover_vpmems(self, vpmem_conf=None):
420 """Discover vpmems on host and configuration.
421
422 :param vpmem_conf: pmem namespaces configuration from CONF
423 :returns: a dict of vpmem keyed by name, and
424 a dict of vpmem list keyed by resource class
425 :raises: exception.InvalidConfiguration if Libvirt or QEMU version
426 does not meet requirement.
427 """
428 if not vpmem_conf:
429 return {}, {}
430
431 if not self._host.has_min_version(lv_ver=MIN_LIBVIRT_PMEM_SUPPORT,
432 hv_ver=MIN_QEMU_PMEM_SUPPORT):
433 raise exception.InvalidConfiguration(
434 _('Nova requires QEMU version %(qemu)s or greater '
435 'and Libvirt version %(libvirt)s or greater '
436 'for NVDIMM (Persistent Memory) support.') % {
437 'qemu': libvirt_utils.version_to_string(
438 MIN_QEMU_PMEM_SUPPORT),
439 'libvirt': libvirt_utils.version_to_string(
440 MIN_LIBVIRT_PMEM_SUPPORT)})
441
442 # vpmem keyed by name {name: objects.LibvirtVPMEMDevice,...}
443 vpmems_by_name: ty.Dict[str, 'objects.LibvirtVPMEMDevice'] = {}
444 # vpmem list keyed by resource class
445 # {'RC_0': [objects.LibvirtVPMEMDevice, ...], 'RC_1': [...]}
446 vpmems_by_rc: ty.Dict[str, ty.List['objects.LibvirtVPMEMDevice']] = (
447 collections.defaultdict(list)
448 )
449
450 vpmems_host = self._get_vpmems_on_host()
451 for ns_conf in vpmem_conf:
452 try:
453 ns_label, ns_names = ns_conf.split(":", 1)
454 except ValueError:
455 reason = _("The configuration doesn't follow the format")
456 raise exception.PMEMNamespaceConfigInvalid(
457 reason=reason)
458 ns_names = ns_names.split("|")
459 for ns_name in ns_names:
460 if ns_name not in vpmems_host:
461 reason = _("The PMEM namespace %s isn't on host") % ns_name
462 raise exception.PMEMNamespaceConfigInvalid(
463 reason=reason)
464 if ns_name in vpmems_by_name:
465 reason = (_("Duplicated PMEM namespace %s configured") %
466 ns_name)
467 raise exception.PMEMNamespaceConfigInvalid(
468 reason=reason)
469 pmem_ns_updated = vpmems_host[ns_name]
470 pmem_ns_updated.label = ns_label
471 vpmems_by_name[ns_name] = pmem_ns_updated
472 rc = orc.normalize_name(
473 "PMEM_NAMESPACE_%s" % ns_label)
474 vpmems_by_rc[rc].append(pmem_ns_updated)
475
476 return vpmems_by_name, vpmems_by_rc
477
478 def _get_vpmems_on_host(self):
479 """Get PMEM namespaces on host using ndctl utility."""
480 try:
481 output = nova.privsep.libvirt.get_pmem_namespaces()
482 except Exception as e:
483 reason = _("Get PMEM namespaces by ndctl utility, "
484 "please ensure ndctl is installed: %s") % e
485 raise exception.GetPMEMNamespacesFailed(reason=reason)
486
487 if not output:
488 return {}
489 namespaces = jsonutils.loads(output)
490 vpmems_host = {} # keyed by namespace name
491 for ns in namespaces:
492 # store namespace info parsed from ndctl utility return
493 if not ns.get('name'):
494 # The name is used to identify namespaces, it's optional
495 # config when creating namespace. If an namespace don't have
496 # name, it can not be used by Nova, we will skip it.
497 continue
498 vpmems_host[ns['name']] = objects.LibvirtVPMEMDevice(
499 name=ns['name'],
500 devpath= '/dev/' + ns['daxregion']['devices'][0]['chardev'],
501 size=ns['size'],
502 align=ns['daxregion']['align'])
503 return vpmems_host
504
505 def _get_volume_drivers(self):
506 driver_registry = dict()
507
508 for driver_str in libvirt_volume_drivers:
509 driver_type, _sep, driver = driver_str.partition('=')
510 driver_class = importutils.import_class(driver)
511 try:
512 driver_registry[driver_type] = driver_class(self._host)
513 except brick_exception.InvalidConnectorProtocol:
514 LOG.debug('Unable to load volume driver %s. It is not '
515 'supported on this host.', driver)
516
517 return driver_registry
518
519 @property
520 def disk_cachemode(self):
521 # It can be confusing to understand the QEMU cache mode
522 # behaviour, because each cache=$MODE is a convenient shorthand
523 # to toggle _three_ cache.* booleans. Consult the below table
524 # (quoting from the QEMU man page):
525 #
526 # | cache.writeback | cache.direct | cache.no-flush
527 # --------------------------------------------------------------
528 # writeback | on | off | off
529 # none | on | on | off
530 # writethrough | off | off | off
531 # directsync | off | on | off
532 # unsafe | on | off | on
533 #
534 # Where:
535 #
536 # - 'cache.writeback=off' means: QEMU adds an automatic fsync()
537 # after each write request.
538 #
539 # - 'cache.direct=on' means: Use Linux's O_DIRECT, i.e. bypass
540 # the kernel page cache. Caches in any other layer (disk
541 # cache, QEMU metadata caches, etc.) can still be present.
542 #
543 # - 'cache.no-flush=on' means: Ignore flush requests, i.e.
544 # never call fsync(), even if the guest explicitly requested
545 # it.
546 #
547 # Use cache mode "none" (cache.writeback=on, cache.direct=on,
548 # cache.no-flush=off) for consistent performance and
549 # migration correctness. Some filesystems don't support
550 # O_DIRECT, though. For those we fallback to the next
551 # reasonable option that is "writeback" (cache.writeback=on,
552 # cache.direct=off, cache.no-flush=off).
553
554 if self._disk_cachemode is None:
555 self._disk_cachemode = "none"
556 if not nova.privsep.utils.supports_direct_io(CONF.instances_path):
557 self._disk_cachemode = "writeback"
558 return self._disk_cachemode
559
560 def _set_cache_mode(self, conf):
561 """Set cache mode on LibvirtConfigGuestDisk object."""
562 try:
563 source_type = conf.source_type
564 driver_cache = conf.driver_cache
565 except AttributeError:
566 return
567
568 # Shareable disks like for a multi-attach volume need to have the
569 # driver cache disabled.
570 if getattr(conf, 'shareable', False):
571 conf.driver_cache = 'none'
572 else:
573 cache_mode = self.disk_cachemodes.get(source_type,
574 driver_cache)
575 conf.driver_cache = cache_mode
576
577 # NOTE(acewit): If the [libvirt]disk_cachemodes is set as
578 # `block=writeback` or `block=writethrough` or `block=unsafe`,
579 # whose correponding Linux's IO semantic is not O_DIRECT in
580 # file nova.conf, then it will result in an attachment failure
581 # because of the libvirt bug
582 # (https://bugzilla.redhat.com/show_bug.cgi?id=1086704)
583 if ((getattr(conf, 'driver_io', None) == "native") and
584 conf.driver_cache not in [None, 'none', 'directsync']):
585 conf.driver_io = "threads"
586 LOG.warning("The guest disk driver io mode has fallen back "
587 "from 'native' to 'threads' because the "
588 "disk cache mode is set as %(cachemode)s, which does "
589 "not use O_DIRECT. See the following bug report "
590 "for more details: https://launchpad.net/bugs/1841363",
591 {'cachemode': conf.driver_cache})
592
593 def _do_quality_warnings(self):
594 """Warn about potential configuration issues.
595
596 This will log a warning message for things such as untested driver or
597 host arch configurations in order to indicate potential issues to
598 administrators.
599 """
600 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):
601 LOG.warning(
602 "Support for the '%(type)s' libvirt backend has been "
603 "deprecated and will be removed in a future release.",
604 {'type': CONF.libvirt.virt_type},
605 )
606
607 caps = self._host.get_capabilities()
608 hostarch = caps.host.cpu.arch
609 if hostarch not in (
610 fields.Architecture.I686, fields.Architecture.X86_64,
611 ):
612 LOG.warning(
613 'The libvirt driver is not tested on %(arch)s by the '
614 'OpenStack project and thus its quality can not be ensured. '
615 'For more information, see: https://docs.openstack.org/'
616 'nova/latest/user/support-matrix.html',
617 {'arch': hostarch},
618 )
619
620 def _handle_conn_event(self, enabled, reason):
621 LOG.info("Connection event '%(enabled)d' reason '%(reason)s'",
622 {'enabled': enabled, 'reason': reason})
623 self._set_host_enabled(enabled, reason)
624
625 def init_host(self, host):
626 self._host.initialize()
627
628 self._check_cpu_set_configuration()
629
630 self._do_quality_warnings()
631
632 self._parse_migration_flags()
633
634 self._supported_perf_events = self._get_supported_perf_events()
635
636 self._check_file_backed_memory_support()
637
638 self._check_my_ip()
639
640 if (CONF.libvirt.virt_type == 'lxc' and
641 not (CONF.libvirt.uid_maps and CONF.libvirt.gid_maps)):
642 LOG.warning("Running libvirt-lxc without user namespaces is "
643 "dangerous. Containers spawned by Nova will be run "
644 "as the host's root user. It is highly suggested "
645 "that user namespaces be used in a public or "
646 "multi-tenant environment.")
647
648 # Stop libguestfs using KVM unless we're also configured
649 # to use this. This solves problem where people need to
650 # stop Nova use of KVM because nested-virt is broken
651 if CONF.libvirt.virt_type != "kvm":
652 guestfs.force_tcg()
653
654 if not self._host.has_min_version(MIN_LIBVIRT_VERSION):
655 raise exception.InternalError(
656 _('Nova requires libvirt version %s or greater.') %
657 libvirt_utils.version_to_string(MIN_LIBVIRT_VERSION))
658
659 if CONF.libvirt.virt_type in ("qemu", "kvm"):
660 if not self._host.has_min_version(hv_ver=MIN_QEMU_VERSION):
661 raise exception.InternalError(
662 _('Nova requires QEMU version %s or greater.') %
663 libvirt_utils.version_to_string(MIN_QEMU_VERSION))
664
665 if CONF.libvirt.virt_type == 'parallels':
666 if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION):
667 raise exception.InternalError(
668 _('Nova requires Virtuozzo version %s or greater.') %
669 libvirt_utils.version_to_string(MIN_VIRTUOZZO_VERSION))
670
671 # Give the cloud admin a heads up if we are intending to
672 # change the MIN_LIBVIRT_VERSION in the next release.
673 if not self._host.has_min_version(NEXT_MIN_LIBVIRT_VERSION):
674 LOG.warning('Running Nova with a libvirt version less than '
675 '%(version)s is deprecated. The required minimum '
676 'version of libvirt will be raised to %(version)s '
677 'in the next release.',
678 {'version': libvirt_utils.version_to_string(
679 NEXT_MIN_LIBVIRT_VERSION)})
680 if (CONF.libvirt.virt_type in ("qemu", "kvm") and
681 not self._host.has_min_version(hv_ver=NEXT_MIN_QEMU_VERSION)):
682 LOG.warning('Running Nova with a QEMU version less than '
683 '%(version)s is deprecated. The required minimum '
684 'version of QEMU will be raised to %(version)s '
685 'in the next release.',
686 {'version': libvirt_utils.version_to_string(
687 NEXT_MIN_QEMU_VERSION)})
688
689 # Allowing both "tunnelling via libvirtd" (which will be
690 # deprecated once the MIN_{LIBVIRT,QEMU}_VERSION is sufficiently
691 # new enough) and "native TLS" options at the same time is
692 # nonsensical.
693 if (CONF.libvirt.live_migration_tunnelled and
694 CONF.libvirt.live_migration_with_native_tls):
695 msg = _("Setting both 'live_migration_tunnelled' and "
696 "'live_migration_with_native_tls' at the same "
697 "time is invalid. If you have the relevant "
698 "libvirt and QEMU versions, and TLS configured "
699 "in your environment, pick "
700 "'live_migration_with_native_tls'.")
701 raise exception.Invalid(msg)
702
703 # Some imagebackends are only able to import raw disk images,
704 # and will fail if given any other format. See the bug
705 # https://bugs.launchpad.net/nova/+bug/1816686 for more details.
706 if CONF.libvirt.images_type in ('rbd',):
707 if not CONF.force_raw_images:
708 msg = _("'[DEFAULT]/force_raw_images = False' is not "
709 "allowed with '[libvirt]/images_type = rbd'. "
710 "Please check the two configs and if you really "
711 "do want to use rbd as images_type, set "
712 "force_raw_images to True.")
713 raise exception.InvalidConfiguration(msg)
714
715 # TODO(sbauza): Remove this code once mediated devices are persisted
716 # across reboots.
717 self._recreate_assigned_mediated_devices()
718
719 self._check_cpu_compatibility()
720
721 self._check_vtpm_support()
722
723 def _check_cpu_compatibility(self):
724 mode = CONF.libvirt.cpu_mode
725 models = CONF.libvirt.cpu_models
726
727 if (CONF.libvirt.virt_type not in ("kvm", "qemu") and
728 mode not in (None, 'none')):
729 msg = _("Config requested an explicit CPU model, but "
730 "the current libvirt hypervisor '%s' does not "
731 "support selecting CPU models") % CONF.libvirt.virt_type
732 raise exception.Invalid(msg)
733
734 if mode != "custom":
735 if not models:
736 return
737 msg = _("The cpu_models option is not required when "
738 "cpu_mode!=custom")
739 raise exception.Invalid(msg)
740
741 if not models:
742 msg = _("The cpu_models option is required when cpu_mode=custom")
743 raise exception.Invalid(msg)
744
745 cpu = vconfig.LibvirtConfigGuestCPU()
746 for model in models:
747 cpu.model = self._get_cpu_model_mapping(model)
748 try:
749 self._compare_cpu(cpu, self._get_cpu_info(), None)
750 except exception.InvalidCPUInfo as e:
751 msg = (_("Configured CPU model: %(model)s is not "
752 "compatible with host CPU. Please correct your "
753 "config and try again. %(e)s") % {
754 'model': model, 'e': e})
755 raise exception.InvalidCPUInfo(msg)
756
757 # Use guest CPU model to check the compatibility between guest CPU and
758 # configured extra_flags
759 cpu = vconfig.LibvirtConfigGuestCPU()
760 cpu.model = self._host.get_capabilities().host.cpu.model
761 for flag in set(x.lower() for x in CONF.libvirt.cpu_model_extra_flags):
762 cpu.add_feature(vconfig.LibvirtConfigCPUFeature(flag))
763 try:
764 self._compare_cpu(cpu, self._get_cpu_info(), None)
765 except exception.InvalidCPUInfo as e:
766 msg = (_("Configured extra flag: %(flag)s it not correct, or "
767 "the host CPU does not support this flag. Please "
768 "correct the config and try again. %(e)s") % {
769 'flag': flag, 'e': e})
770 raise exception.InvalidCPUInfo(msg)
771
772 def _check_vtpm_support(self) -> None:
773 # TODO(efried): A key manager must be configured to create/retrieve
774 # secrets. Is there a way to check that one is set up correctly?
775 # CONF.key_manager.backend is optional :(
776 if not CONF.libvirt.swtpm_enabled:
777 return
778
779 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):
780 msg = _(
781 "vTPM support requires '[libvirt] virt_type' of 'qemu' or "
782 "'kvm'; found '%s'.")
783 raise exception.InvalidConfiguration(msg % CONF.libvirt.virt_type)
784
785 if not self._host.has_min_version(lv_ver=MIN_LIBVIRT_VTPM):
786 msg = _(
787 'vTPM support requires Libvirt version %(libvirt)s or '
788 'greater.')
789 raise exception.InvalidConfiguration(msg % {
790 'libvirt': libvirt_utils.version_to_string(MIN_LIBVIRT_VTPM),
791 })
792
793 # These executables need to be installed for libvirt to make use of
794 # emulated TPM.
795 # NOTE(stephenfin): This checks using the PATH of the user running
796 # nova-compute rather than the libvirtd service, meaning it's an
797 # imperfect check but the best we can do
798 if not any(shutil.which(cmd) for cmd in ('swtpm_setup', 'swtpm')):
799 msg = _(
800 "vTPM support is configured but the 'swtpm' and "
801 "'swtpm_setup' binaries could not be found on PATH.")
802 raise exception.InvalidConfiguration(msg)
803
804 # The user and group must be valid on this host for cold migration and
805 # resize to function.
806 try:
807 pwd.getpwnam(CONF.libvirt.swtpm_user)
808 except KeyError:
809 msg = _(
810 "The user configured in '[libvirt] swtpm_user' does not exist "
811 "on this host; expected '%s'.")
812 raise exception.InvalidConfiguration(msg % CONF.libvirt.swtpm_user)
813
814 try:
815 grp.getgrnam(CONF.libvirt.swtpm_group)
816 except KeyError:
817 msg = _(
818 "The group configured in '[libvirt] swtpm_group' does not "
819 "exist on this host; expected '%s'.")
820 raise exception.InvalidConfiguration(
821 msg % CONF.libvirt.swtpm_group)
822
823 LOG.debug('Enabling emulated TPM support')
824
825 @staticmethod
826 def _is_existing_mdev(uuid):
827 # FIXME(sbauza): Some kernel can have a uevent race meaning that the
828 # libvirt daemon won't know when a mediated device is created unless
829 # you restart that daemon. Until all kernels we support are not having
830 # that possible race, check the sysfs directly instead of asking the
831 # libvirt API.
832 # See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref.
833 return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid))
834
835 def _recreate_assigned_mediated_devices(self):
836 """Recreate assigned mdevs that could have disappeared if we reboot
837 the host.
838 """
839 # NOTE(sbauza): This method just calls sysfs to recreate mediated
840 # devices by looking up existing guest XMLs and doesn't use
841 # the Placement API so it works with or without a vGPU reshape.
842 mdevs = self._get_all_assigned_mediated_devices()
843 for (mdev_uuid, instance_uuid) in six.iteritems(mdevs):
844 if not self._is_existing_mdev(mdev_uuid):
845 dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid)
846 dev_info = self._get_mediated_device_information(dev_name)
847 parent = dev_info['parent']
848 parent_type = self._get_vgpu_type_per_pgpu(parent)
849 if dev_info['type'] != parent_type:
850 # NOTE(sbauza): The mdev was created by using a different
851 # vGPU type. We can't recreate the mdev until the operator
852 # modifies the configuration.
853 parent = "{}:{}:{}.{}".format(*parent[4:].split('_'))
854 msg = ("The instance UUID %(inst)s uses a VGPU that "
855 "its parent pGPU %(parent)s no longer "
856 "supports as the instance vGPU type %(type)s "
857 "is not accepted for the pGPU. Please correct "
858 "the configuration accordingly." %
859 {'inst': instance_uuid,
860 'parent': parent,
861 'type': dev_info['type']})
862 raise exception.InvalidLibvirtGPUConfig(reason=msg)
863 self._create_new_mediated_device(parent, uuid=mdev_uuid)
864
865 def _check_file_backed_memory_support(self):
866 if not CONF.libvirt.file_backed_memory:
867 return
868
869 # file_backed_memory is only compatible with qemu/kvm virts
870 if CONF.libvirt.virt_type not in ("qemu", "kvm"):
871 raise exception.InternalError(
872 _('Running Nova with file_backed_memory and virt_type '
873 '%(type)s is not supported. file_backed_memory is only '
874 'supported with qemu and kvm types.') %
875 {'type': CONF.libvirt.virt_type})
876
877 # file-backed memory doesn't work with memory overcommit.
878 # Block service startup if file-backed memory is enabled and
879 # ram_allocation_ratio is not 1.0
880 if CONF.ram_allocation_ratio != 1.0:
881 raise exception.InternalError(
882 'Running Nova with file_backed_memory requires '
883 'ram_allocation_ratio configured to 1.0')
884
885 if CONF.reserved_host_memory_mb:
886 # this is a hard failure as placement won't allow total < reserved
887 if CONF.reserved_host_memory_mb >= CONF.libvirt.file_backed_memory:
888 msg = _(
889 "'[libvirt] file_backed_memory', which represents total "
890 "memory reported to placement, must be greater than "
891 "reserved memory configured via '[DEFAULT] "
892 "reserved_host_memory_mb'"
893 )
894 raise exception.InternalError(msg)
895
896 # TODO(stephenfin): Change this to an exception in W or later
897 LOG.warning(
898 "Reserving memory via '[DEFAULT] reserved_host_memory_mb' "
899 "is not compatible with file-backed memory. Consider "
900 "setting '[DEFAULT] reserved_host_memory_mb' to 0. This will "
901 "be an error in a future release."
902 )
903
904 def _check_my_ip(self):
905 ips = compute_utils.get_machine_ips()
906 if CONF.my_ip not in ips:
907 LOG.warning('my_ip address (%(my_ip)s) was not found on '
908 'any of the interfaces: %(ifaces)s',
909 {'my_ip': CONF.my_ip, 'ifaces': ", ".join(ips)})
910
911 def _check_cpu_set_configuration(self):
912 # evaluate these now to force a quick fail if they're invalid
913 vcpu_pin_set = hardware.get_vcpu_pin_set() or set()
914 cpu_shared_set = hardware.get_cpu_shared_set() or set()
915 cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
916
917 # TODO(stephenfin): Remove this in U once we remove the 'vcpu_pin_set'
918 # option
919 if not vcpu_pin_set:
920 if not (cpu_shared_set or cpu_dedicated_set):
921 return
922
923 if not cpu_dedicated_set.isdisjoint(cpu_shared_set):
924 msg = _(
925 "The '[compute] cpu_dedicated_set' and '[compute] "
926 "cpu_shared_set' configuration options must be "
927 "disjoint.")
928 raise exception.InvalidConfiguration(msg)
929
930 if CONF.reserved_host_cpus:
931 msg = _(
932 "The 'reserved_host_cpus' config option cannot be defined "
933 "alongside the '[compute] cpu_shared_set' or '[compute] "
934 "cpu_dedicated_set' options. Unset 'reserved_host_cpus'.")
935 raise exception.InvalidConfiguration(msg)
936
937 return
938
939 if cpu_dedicated_set:
940 # NOTE(stephenfin): This is a new option in Train so it can be
941 # an error
942 msg = _(
943 "The 'vcpu_pin_set' config option has been deprecated and "
944 "cannot be defined alongside '[compute] cpu_dedicated_set'. "
945 "Unset 'vcpu_pin_set'.")
946 raise exception.InvalidConfiguration(msg)
947
948 if cpu_shared_set:
949 LOG.warning(
950 "The '[compute] cpu_shared_set' and 'vcpu_pin_set' config "
951 "options have both been defined. While 'vcpu_pin_set' is "
952 "defined, it will continue to be used to configure the "
953 "specific host CPUs used for 'VCPU' inventory, while "
954 "'[compute] cpu_shared_set' will only be used for guest "
955 "emulator threads when 'hw:emulator_threads_policy=shared' "
956 "is defined in the flavor. This is legacy behavior and will "
957 "not be supported in a future release. "
958 "If you wish to define specific host CPUs to be used for "
959 "'VCPU' or 'PCPU' inventory, you must migrate the "
960 "'vcpu_pin_set' config option value to '[compute] "
961 "cpu_shared_set' and '[compute] cpu_dedicated_set', "
962 "respectively, and undefine 'vcpu_pin_set'.")
963 else:
964 LOG.warning(
965 "The 'vcpu_pin_set' config option has been deprecated and "
966 "will be removed in a future release. When defined, "
967 "'vcpu_pin_set' will be used to calculate 'VCPU' inventory "
968 "and schedule instances that have 'VCPU' allocations. "
969 "If you wish to define specific host CPUs to be used for "
970 "'VCPU' or 'PCPU' inventory, you must migrate the "
971 "'vcpu_pin_set' config option value to '[compute] "
972 "cpu_shared_set' and '[compute] cpu_dedicated_set', "
973 "respectively, and undefine 'vcpu_pin_set'.")
974
975 def _prepare_migration_flags(self):
976 migration_flags = 0
977
978 migration_flags |= libvirt.VIR_MIGRATE_LIVE
979
980 # Adding p2p flag only if xen is not in use, because xen does not
981 # support p2p migrations
982 if CONF.libvirt.virt_type != 'xen':
983 migration_flags |= libvirt.VIR_MIGRATE_PEER2PEER
984
985 # Adding VIR_MIGRATE_UNDEFINE_SOURCE because, without it, migrated
986 # instance will remain defined on the source host
987 migration_flags |= libvirt.VIR_MIGRATE_UNDEFINE_SOURCE
988
989 # Adding VIR_MIGRATE_PERSIST_DEST to persist the VM on the
990 # destination host
991 migration_flags |= libvirt.VIR_MIGRATE_PERSIST_DEST
992
993 live_migration_flags = block_migration_flags = migration_flags
994
995 # Adding VIR_MIGRATE_NON_SHARED_INC, otherwise all block-migrations
996 # will be live-migrations instead
997 block_migration_flags |= libvirt.VIR_MIGRATE_NON_SHARED_INC
998
999 return (live_migration_flags, block_migration_flags)
1000
1001 # TODO(kchamart) Once the MIN_LIBVIRT_VERSION and MIN_QEMU_VERSION
1002 # reach 4.4.0 and 2.11.0, which provide "native TLS" support by
1003 # default, deprecate and remove the support for "tunnelled live
1004 # migration" (and related config attribute), because:
1005 #
1006 # (a) it cannot handle live migration of disks in a non-shared
1007 # storage setup (a.k.a. "block migration");
1008 #
1009 # (b) has a huge performance overhead and latency, because it burns
1010 # more CPU and memory bandwidth due to increased number of data
1011 # copies on both source and destination hosts.
1012 #
1013 # Both the above limitations are addressed by the QEMU-native TLS
1014 # support (`live_migration_with_native_tls`).
1015 def _handle_live_migration_tunnelled(self, migration_flags):
1016 if CONF.libvirt.live_migration_tunnelled:
1017 migration_flags |= libvirt.VIR_MIGRATE_TUNNELLED
1018 return migration_flags
1019
1020 def _is_native_tls_available(self):
1021 return self._host.has_min_version(MIN_LIBVIRT_NATIVE_TLS_VERSION,
1022 MIN_QEMU_NATIVE_TLS_VERSION)
1023
1024 def _handle_native_tls(self, migration_flags):
1025 if (CONF.libvirt.live_migration_with_native_tls and
1026 self._is_native_tls_available()):
1027 migration_flags |= libvirt.VIR_MIGRATE_TLS
1028 return migration_flags
1029
1030 def _handle_live_migration_post_copy(self, migration_flags):
1031 if CONF.libvirt.live_migration_permit_post_copy:
1032 migration_flags |= libvirt.VIR_MIGRATE_POSTCOPY
1033 return migration_flags
1034
1035 def _handle_live_migration_auto_converge(self, migration_flags):
1036 if self._is_post_copy_enabled(migration_flags):
1037 LOG.info('The live_migration_permit_post_copy is set to '
1038 'True and post copy live migration is available '
1039 'so auto-converge will not be in use.')
1040 elif CONF.libvirt.live_migration_permit_auto_converge:
1041 migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE
1042 return migration_flags
1043
1044 def _parse_migration_flags(self):
1045 (live_migration_flags,
1046 block_migration_flags) = self._prepare_migration_flags()
1047
1048 live_migration_flags = self._handle_live_migration_tunnelled(
1049 live_migration_flags)
1050 block_migration_flags = self._handle_live_migration_tunnelled(
1051 block_migration_flags)
1052
1053 live_migration_flags = self._handle_native_tls(
1054 live_migration_flags)
1055 block_migration_flags = self._handle_native_tls(
1056 block_migration_flags)
1057
1058 live_migration_flags = self._handle_live_migration_post_copy(
1059 live_migration_flags)
1060 block_migration_flags = self._handle_live_migration_post_copy(
1061 block_migration_flags)
1062
1063 live_migration_flags = self._handle_live_migration_auto_converge(
1064 live_migration_flags)
1065 block_migration_flags = self._handle_live_migration_auto_converge(
1066 block_migration_flags)
1067
1068 self._live_migration_flags = live_migration_flags
1069 self._block_migration_flags = block_migration_flags
1070
1071 # TODO(sahid): This method is targeted for removal when the tests
1072 # have been updated to avoid its use
1073 #
1074 # All libvirt API calls on the libvirt.Connect object should be
1075 # encapsulated by methods on the nova.virt.libvirt.host.Host
1076 # object, rather than directly invoking the libvirt APIs. The goal
1077 # is to avoid a direct dependency on the libvirt API from the
1078 # driver.py file.
1079 def _get_connection(self):
1080 return self._host.get_connection()
1081
1082 _conn = property(_get_connection)
1083
1084 @staticmethod
1085 def _uri():
1086 if CONF.libvirt.virt_type == 'uml':
1087 uri = CONF.libvirt.connection_uri or 'uml:///system'
1088 elif CONF.libvirt.virt_type == 'xen':
1089 uri = CONF.libvirt.connection_uri or 'xen:///'
1090 elif CONF.libvirt.virt_type == 'lxc':
1091 uri = CONF.libvirt.connection_uri or 'lxc:///'
1092 elif CONF.libvirt.virt_type == 'parallels':
1093 uri = CONF.libvirt.connection_uri or 'parallels:///system'
1094 else:
1095 uri = CONF.libvirt.connection_uri or 'qemu:///system'
1096 return uri
1097
1098 @staticmethod
1099 def _live_migration_uri(dest):
1100 uris = {
1101 'kvm': 'qemu+%(scheme)s://%(dest)s/system',
1102 'qemu': 'qemu+%(scheme)s://%(dest)s/system',
1103 'xen': 'xenmigr://%(dest)s/system',
1104 'parallels': 'parallels+tcp://%(dest)s/system',
1105 }
1106 dest = oslo_netutils.escape_ipv6(dest)
1107
1108 virt_type = CONF.libvirt.virt_type
1109 # TODO(pkoniszewski): Remove fetching live_migration_uri in Pike
1110 uri = CONF.libvirt.live_migration_uri
1111 if uri:
1112 return uri % dest
1113
1114 uri = uris.get(virt_type)
1115 if uri is None:
1116 raise exception.LiveMigrationURINotAvailable(virt_type=virt_type)
1117
1118 str_format = {
1119 'dest': dest,
1120 'scheme': CONF.libvirt.live_migration_scheme or 'tcp',
1121 }
1122 return uri % str_format
1123
1124 @staticmethod
1125 def _migrate_uri(dest):
1126 uri = None
1127 dest = oslo_netutils.escape_ipv6(dest)
1128
1129 # Only QEMU live migrations supports migrate-uri parameter
1130 virt_type = CONF.libvirt.virt_type
1131 if virt_type in ('qemu', 'kvm'):
1132 # QEMU accept two schemes: tcp and rdma. By default
1133 # libvirt build the URI using the remote hostname and the
1134 # tcp schema.
1135 uri = 'tcp://%s' % dest
1136 # Because dest might be of type unicode, here we might return value of
1137 # type unicode as well which is not acceptable by libvirt python
1138 # binding when Python 2.7 is in use, so let's convert it explicitly
1139 # back to string. When Python 3.x is in use, libvirt python binding
1140 # accepts unicode type so it is completely fine to do a no-op str(uri)
1141 # conversion which will return value of type unicode.
1142 return uri and str(uri)
1143
1144 def instance_exists(self, instance):
1145 """Efficient override of base instance_exists method."""
1146 try:
1147 self._host.get_guest(instance)
1148 return True
1149 except (exception.InternalError, exception.InstanceNotFound):
1150 return False
1151
1152 def list_instances(self):
1153 names = []
1154 for guest in self._host.list_guests(only_running=False):
1155 names.append(guest.name)
1156
1157 return names
1158
1159 def list_instance_uuids(self):
1160 uuids = []
1161 for guest in self._host.list_guests(only_running=False):
1162 uuids.append(guest.uuid)
1163
1164 return uuids
1165
1166 def plug_vifs(self, instance, network_info):
1167 """Plug VIFs into networks."""
1168 for vif in network_info:
1169 self.vif_driver.plug(instance, vif)
1170
1171 def _unplug_vifs(self, instance, network_info, ignore_errors):
1172 """Unplug VIFs from networks."""
1173 for vif in network_info:
1174 try:
1175 self.vif_driver.unplug(instance, vif)
1176 except exception.NovaException:
1177 if not ignore_errors:
1178 raise
1179
1180 def unplug_vifs(self, instance, network_info):
1181 self._unplug_vifs(instance, network_info, False)
1182
1183 def _teardown_container(self, instance):
1184 inst_path = libvirt_utils.get_instance_path(instance)
1185 container_dir = os.path.join(inst_path, 'rootfs')
1186 rootfs_dev = instance.system_metadata.get('rootfs_device_name')
1187 LOG.debug('Attempting to teardown container at path %(dir)s with '
1188 'root device: %(rootfs_dev)s',
1189 {'dir': container_dir, 'rootfs_dev': rootfs_dev},
1190 instance=instance)
1191 disk_api.teardown_container(container_dir, rootfs_dev)
1192
1193 def _destroy(self, instance, attempt=1):
1194 try:
1195 guest = self._host.get_guest(instance)
1196 if CONF.serial_console.enabled:
1197 # This method is called for several events: destroy,
1198 # rebuild, hard-reboot, power-off - For all of these
1199 # events we want to release the serial ports acquired
1200 # for the guest before destroying it.
1201 serials = self._get_serial_ports_from_guest(guest)
1202 for hostname, port in serials:
1203 serial_console.release_port(host=hostname, port=port)
1204 except exception.InstanceNotFound:
1205 guest = None
1206
1207 # If the instance is already terminated, we're still happy
1208 # Otherwise, destroy it
1209 old_domid = -1
1210 if guest is not None:
1211 try:
1212 old_domid = guest.id
1213 guest.poweroff()
1214
1215 except libvirt.libvirtError as e:
1216 is_okay = False
1217 errcode = e.get_error_code()
1218 if errcode == libvirt.VIR_ERR_NO_DOMAIN:
1219 # Domain already gone. This can safely be ignored.
1220 is_okay = True
1221 elif errcode == libvirt.VIR_ERR_OPERATION_INVALID:
1222 # If the instance is already shut off, we get this:
1223 # Code=55 Error=Requested operation is not valid:
1224 # domain is not running
1225
1226 state = guest.get_power_state(self._host)
1227 if state == power_state.SHUTDOWN:
1228 is_okay = True
1229 elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR:
1230 errmsg = e.get_error_message()
1231 if (CONF.libvirt.virt_type == 'lxc' and
1232 errmsg == 'internal error: '
1233 'Some processes refused to die'):
1234 # Some processes in the container didn't die
1235 # fast enough for libvirt. The container will
1236 # eventually die. For now, move on and let
1237 # the wait_for_destroy logic take over.
1238 is_okay = True
1239 elif errcode == libvirt.VIR_ERR_OPERATION_TIMEOUT:
1240 LOG.warning("Cannot destroy instance, operation time out",
1241 instance=instance)
1242 reason = _("operation time out")
1243 raise exception.InstancePowerOffFailure(reason=reason)
1244 elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR:
1245 if e.get_int1() == errno.EBUSY:
1246 # NOTE(danpb): When libvirt kills a process it sends it
1247 # SIGTERM first and waits 10 seconds. If it hasn't gone
1248 # it sends SIGKILL and waits another 5 seconds. If it
1249 # still hasn't gone then you get this EBUSY error.
1250 # Usually when a QEMU process fails to go away upon
1251 # SIGKILL it is because it is stuck in an
1252 # uninterruptible kernel sleep waiting on I/O from
1253 # some non-responsive server.
1254 # Given the CPU load of the gate tests though, it is
1255 # conceivable that the 15 second timeout is too short,
1256 # particularly if the VM running tempest has a high
1257 # steal time from the cloud host. ie 15 wallclock
1258 # seconds may have passed, but the VM might have only
1259 # have a few seconds of scheduled run time.
1260 #
1261 # TODO(kchamart): Once MIN_LIBVIRT_VERSION
1262 # reaches v4.7.0, (a) rewrite the above note,
1263 # and (b) remove the following code that retries
1264 # _destroy() API call (which gives SIGKILL 30
1265 # seconds to take effect) -- because from v4.7.0
1266 # onwards, libvirt _automatically_ increases the
1267 # timeout to 30 seconds. This was added in the
1268 # following libvirt commits:
1269 #
1270 # - 9a4e4b942 (process: wait longer 5->30s on
1271 # hard shutdown)
1272 #
1273 # - be2ca0444 (process: wait longer on kill
1274 # per assigned Hostdev)
1275 with excutils.save_and_reraise_exception() as ctxt:
1276 if not self._host.has_min_version(
1277 MIN_LIBVIRT_BETTER_SIGKILL_HANDLING):
1278 LOG.warning('Error from libvirt during '
1279 'destroy. Code=%(errcode)s '
1280 'Error=%(e)s; attempt '
1281 '%(attempt)d of 6 ',
1282 {'errcode': errcode, 'e': e,
1283 'attempt': attempt},
1284 instance=instance)
1285 # Try up to 6 times before giving up.
1286 if attempt < 6:
1287 ctxt.reraise = False
1288 self._destroy(instance, attempt + 1)
1289 return
1290
1291 if not is_okay:
1292 with excutils.save_and_reraise_exception():
1293 LOG.error('Error from libvirt during destroy. '
1294 'Code=%(errcode)s Error=%(e)s',
1295 {'errcode': errcode, 'e': e},
1296 instance=instance)
1297
1298 def _wait_for_destroy(expected_domid):
1299 """Called at an interval until the VM is gone."""
1300 # NOTE(vish): If the instance disappears during the destroy
1301 # we ignore it so the cleanup can still be
1302 # attempted because we would prefer destroy to
1303 # never fail.
1304 try:
1305 dom_info = self.get_info(instance)
1306 state = dom_info.state
1307 new_domid = dom_info.internal_id
1308 except exception.InstanceNotFound:
1309 LOG.debug("During wait destroy, instance disappeared.",
1310 instance=instance)
1311 state = power_state.SHUTDOWN
1312
1313 if state == power_state.SHUTDOWN:
1314 LOG.info("Instance destroyed successfully.", instance=instance)
1315 raise loopingcall.LoopingCallDone()
1316
1317 # NOTE(wangpan): If the instance was booted again after destroy,
1318 # this may be an endless loop, so check the id of
1319 # domain here, if it changed and the instance is
1320 # still running, we should destroy it again.
1321 # see https://bugs.launchpad.net/nova/+bug/1111213 for more details
1322 if new_domid != expected_domid:
1323 LOG.info("Instance may be started again.", instance=instance)
1324 kwargs['is_running'] = True
1325 raise loopingcall.LoopingCallDone()
1326
1327 kwargs = {'is_running': False}
1328 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_destroy,
1329 old_domid)
1330 timer.start(interval=0.5).wait()
1331 if kwargs['is_running']:
1332 LOG.info("Going to destroy instance again.", instance=instance)
1333 self._destroy(instance)
1334 else:
1335 # NOTE(GuanQiang): teardown container to avoid resource leak
1336 if CONF.libvirt.virt_type == 'lxc':
1337 self._teardown_container(instance)
1338
1339 def destroy(self, context, instance, network_info, block_device_info=None,
1340 destroy_disks=True):
1341 self._destroy(instance)
1342 self.cleanup(context, instance, network_info, block_device_info,
1343 destroy_disks)
1344
1345 def _undefine_domain(self, instance):
1346 try:
1347 guest = self._host.get_guest(instance)
1348 try:
1349 hw_firmware_type = instance.image_meta.properties.get(
1350 'hw_firmware_type')
1351 support_uefi = self._check_uefi_support(hw_firmware_type)
1352 guest.delete_configuration(support_uefi)
1353 except libvirt.libvirtError as e:
1354 with excutils.save_and_reraise_exception() as ctxt:
1355 errcode = e.get_error_code()
1356 if errcode == libvirt.VIR_ERR_NO_DOMAIN:
1357 LOG.debug("Called undefine, but domain already gone.",
1358 instance=instance)
1359 ctxt.reraise = False
1360 else:
1361 LOG.error('Error from libvirt during undefine. '
1362 'Code=%(errcode)s Error=%(e)s',
1363 {'errcode': errcode,
1364 'e': encodeutils.exception_to_unicode(e)},
1365 instance=instance)
1366 except exception.InstanceNotFound:
1367 pass
1368
1369 def cleanup(self, context, instance, network_info, block_device_info=None,
1370 destroy_disks=True, migrate_data=None, destroy_vifs=True):
1371 """Cleanup the instance from the host.
1372
1373 Identify if the instance disks and instance path should be removed
1374 from the host before calling down into the _cleanup method for the
1375 actual removal of resources from the host.
1376
1377 :param context: security context
1378 :param instance: instance object for the instance being cleaned up
1379 :param network_info: instance network information
1380 :param block_device_info: optional instance block device information
1381 :param destroy_disks: if local ephemeral disks should be destroyed
1382 :param migrate_data: optional migrate_data object
1383 :param destroy_vifs: if plugged vifs should be unplugged
1384 """
1385 cleanup_instance_dir = False
1386 cleanup_instance_disks = False
1387 # We assume destroy_disks means destroy instance directory and disks
1388 if destroy_disks:
1389 cleanup_instance_dir = True
1390 cleanup_instance_disks = True
1391 else:
1392 # NOTE(mdbooth): I think the theory here was that if this is a
1393 # migration with shared block storage then we need to delete the
1394 # instance directory because that's not shared. I'm pretty sure
1395 # this is wrong.
1396 if migrate_data and 'is_shared_block_storage' in migrate_data:
1397 cleanup_instance_dir = migrate_data.is_shared_block_storage
1398
1399 # NOTE(lyarwood): The following workaround allows operators to
1400 # ensure that non-shared instance directories are removed after an
1401 # evacuation or revert resize when using the shared RBD
1402 # imagebackend. This workaround is not required when cleaning up
1403 # migrations that provide migrate_data to this method as the
1404 # existing is_shared_block_storage conditional will cause the
1405 # instance directory to be removed.
1406 if not cleanup_instance_dir:
1407 if CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup:
1408 cleanup_instance_dir = CONF.libvirt.images_type == 'rbd'
1409
1410 return self._cleanup(
1411 context, instance, network_info,
1412 block_device_info=block_device_info,
1413 destroy_vifs=destroy_vifs,
1414 cleanup_instance_dir=cleanup_instance_dir,
1415 cleanup_instance_disks=cleanup_instance_disks)
1416
1417 def _cleanup(self, context, instance, network_info, block_device_info=None,
1418 destroy_vifs=True, cleanup_instance_dir=False,
1419 cleanup_instance_disks=False):
1420 """Cleanup the domain and any attached resources from the host.
1421
1422 This method cleans up any pmem devices, unplugs VIFs, disconnects
1423 attached volumes and undefines the instance domain within libvirt.
1424 It also optionally removes the ephemeral disks and the instance
1425 directory from the host depending on the cleanup_instance_dir|disks
1426 kwargs provided.
1427
1428 :param context: security context
1429 :param instance: instance object for the instance being cleaned up
1430 :param network_info: instance network information
1431 :param block_device_info: optional instance block device information
1432 :param destroy_vifs: if plugged vifs should be unplugged
1433 :param cleanup_instance_dir: If the instance dir should be removed
1434 :param cleanup_instance_disks: If the instance disks should be removed
1435 """
1436 # zero the data on backend pmem device
1437 vpmems = self._get_vpmems(instance)
1438 if vpmems:
1439 self._cleanup_vpmems(vpmems)
1440
1441 if destroy_vifs:
1442 self._unplug_vifs(instance, network_info, True)
1443
1444 # FIXME(wangpan): if the instance is booted again here, such as the
1445 # soft reboot operation boot it here, it will become
1446 # "running deleted", should we check and destroy it
1447 # at the end of this method?
1448
1449 # NOTE(vish): we disconnect from volumes regardless
1450 block_device_mapping = driver.block_device_info_get_mapping(
1451 block_device_info)
1452 for vol in block_device_mapping:
1453 connection_info = vol['connection_info']
1454 if not connection_info:
1455 # if booting from a volume, creation could have failed meaning
1456 # this would be unset
1457 continue
1458
1459 disk_dev = vol['mount_device']
1460 if disk_dev is not None:
1461 disk_dev = disk_dev.rpartition("/")[2]
1462 try:
1463 self._disconnect_volume(context, connection_info, instance)
1464 except Exception as exc:
1465 with excutils.save_and_reraise_exception() as ctxt:
1466 if cleanup_instance_disks:
1467 # Don't block on Volume errors if we're trying to
1468 # delete the instance as we may be partially created
1469 # or deleted
1470 ctxt.reraise = False
1471 LOG.warning(
1472 "Ignoring Volume Error on vol %(vol_id)s "
1473 "during delete %(exc)s",
1474 {'vol_id': vol.get('volume_id'),
1475 'exc': encodeutils.exception_to_unicode(exc)},
1476 instance=instance)
1477
1478 if cleanup_instance_disks:
1479 # NOTE(haomai): destroy volumes if needed
1480 if CONF.libvirt.images_type == 'lvm':
1481 self._cleanup_lvm(instance, block_device_info)
1482 if CONF.libvirt.images_type == 'rbd':
1483 self._cleanup_rbd(instance)
1484
1485 if cleanup_instance_dir:
1486 attempts = int(instance.system_metadata.get('clean_attempts',
1487 '0'))
1488 success = self.delete_instance_files(instance)
1489 # NOTE(mriedem): This is used in the _run_pending_deletes periodic
1490 # task in the compute manager. The tight coupling is not great...
1491 instance.system_metadata['clean_attempts'] = str(attempts + 1)
1492 if success:
1493 instance.cleaned = True
1494 instance.save()
1495
1496 if cleanup_instance_disks:
1497 crypto.delete_vtpm_secret(context, instance)
1498
1499 self._undefine_domain(instance)
1500
1501 def cleanup_lingering_instance_resources(self, instance):
1502 # zero the data on backend pmem device, if fails
1503 # it will raise an exception
1504 vpmems = self._get_vpmems(instance)
1505 if vpmems:
1506 self._cleanup_vpmems(vpmems)
1507
1508 def _cleanup_vpmems(self, vpmems):
1509 for vpmem in vpmems:
1510 try:
1511 nova.privsep.libvirt.cleanup_vpmem(vpmem.devpath)
1512 except Exception as e:
1513 raise exception.VPMEMCleanupFailed(dev=vpmem.devpath,
1514 error=e)
1515
1516 def _get_serial_ports_from_guest(self, guest, mode=None):
1517 """Returns an iterator over serial port(s) configured on guest.
1518
1519 :param mode: Should be a value in (None, bind, connect)
1520 """
1521 xml = guest.get_xml_desc()
1522 tree = etree.fromstring(xml)
1523
1524 # The 'serial' device is the base for x86 platforms. Other platforms
1525 # (e.g. kvm on system z = S390X) can only use 'console' devices.
1526 xpath_mode = "[@mode='%s']" % mode if mode else ""
1527 serial_tcp = "./devices/serial[@type='tcp']/source" + xpath_mode
1528 console_tcp = "./devices/console[@type='tcp']/source" + xpath_mode
1529
1530 tcp_devices = tree.findall(serial_tcp)
1531 if len(tcp_devices) == 0:
1532 tcp_devices = tree.findall(console_tcp)
1533 for source in tcp_devices:
1534 yield (source.get("host"), int(source.get("service")))
1535
1536 def _get_scsi_controller_next_unit(self, guest):
1537 """Returns the max disk unit used by scsi controller"""
1538 xml = guest.get_xml_desc()
1539 tree = etree.fromstring(xml)
1540 addrs = "./devices/disk[target/@bus='scsi']/address[@type='drive']"
1541
1542 ret = []
1543 for obj in tree.xpath(addrs):
1544 ret.append(int(obj.get('unit', 0)))
1545 return max(ret) + 1 if ret else 0
1546
1547 def _cleanup_rbd(self, instance):
1548 # NOTE(nic): On revert_resize, the cleanup steps for the root
1549 # volume are handled with an "rbd snap rollback" command,
1550 # and none of this is needed (and is, in fact, harmful) so
1551 # filter out non-ephemerals from the list
1552 if instance.task_state == task_states.RESIZE_REVERTING:
1553 filter_fn = lambda disk: (disk.startswith(instance.uuid) and
1554 disk.endswith('disk.local'))
1555 else:
1556 filter_fn = lambda disk: disk.startswith(instance.uuid)
1557 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
1558
1559 def _cleanup_lvm(self, instance, block_device_info):
1560 """Delete all LVM disks for given instance object."""
1561 if instance.get('ephemeral_key_uuid') is not None:
1562 # detach encrypted volumes
1563 disks = self._get_instance_disk_info(instance, block_device_info)
1564 for disk in disks:
1565 if dmcrypt.is_encrypted(disk['path']):
1566 dmcrypt.delete_volume(disk['path'])
1567
1568 disks = self._lvm_disks(instance)
1569 if disks:
1570 lvm.remove_volumes(disks)
1571
1572 def _lvm_disks(self, instance):
1573 """Returns all LVM disks for given instance object."""
1574 if CONF.libvirt.images_volume_group:
1575 vg = os.path.join('/dev', CONF.libvirt.images_volume_group)
1576 if not os.path.exists(vg):
1577 return []
1578 pattern = '%s_' % instance.uuid
1579
1580 def belongs_to_instance(disk):
1581 return disk.startswith(pattern)
1582
1583 def fullpath(name):
1584 return os.path.join(vg, name)
1585
1586 logical_volumes = lvm.list_volumes(vg)
1587
1588 disks = [fullpath(disk) for disk in logical_volumes
1589 if belongs_to_instance(disk)]
1590 return disks
1591 return []
1592
1593 def get_volume_connector(self, instance):
1594 root_helper = utils.get_root_helper()
1595 return connector.get_connector_properties(
1596 root_helper, CONF.my_block_storage_ip,
1597 CONF.libvirt.volume_use_multipath,
1598 enforce_multipath=True,
1599 host=CONF.host)
1600
1601 def _cleanup_resize_vtpm(
1602 self,
1603 context: nova_context.RequestContext,
1604 instance: 'objects.Instance',
1605 ) -> None:
1606 """Handle vTPM when confirming a migration or resize.
1607
1608 If the old flavor have vTPM and the new one doesn't, there are keys to
1609 be deleted.
1610 """
1611 old_vtpm_config = hardware.get_vtpm_constraint(
1612 instance.old_flavor, instance.image_meta)
1613 new_vtpm_config = hardware.get_vtpm_constraint(
1614 instance.new_flavor, instance.image_meta)
1615
1616 if old_vtpm_config and not new_vtpm_config:
1617 # the instance no longer cares for its vTPM so delete the related
1618 # secret; the deletion of the instance directory and undefining of
1619 # the domain will take care of the TPM files themselves
1620 LOG.info('New flavor no longer requests vTPM; deleting secret.')
1621 crypto.delete_vtpm_secret(context, instance)
1622
1623 # TODO(stephenfin): Fold this back into its only caller, cleanup_resize
1624 def _cleanup_resize(self, context, instance, network_info):
1625 inst_base = libvirt_utils.get_instance_path(instance)
1626 target = inst_base + '_resize'
1627
1628 # zero the data on backend old pmem device
1629 vpmems = self._get_vpmems(instance, prefix='old')
1630 if vpmems:
1631 self._cleanup_vpmems(vpmems)
1632
1633 # Remove any old vTPM data, if necessary
1634 self._cleanup_resize_vtpm(context, instance)
1635
1636 # Deletion can fail over NFS, so retry the deletion as required.
1637 # Set maximum attempt as 5, most test can remove the directory
1638 # for the second time.
1639 attempts = 0
1640 while(os.path.exists(target) and attempts < 5):
1641 shutil.rmtree(target, ignore_errors=True)
1642 if os.path.exists(target):
1643 time.sleep(random.randint(20, 200) / 100.0)
1644 attempts += 1
1645
1646 # NOTE(mriedem): Some image backends will recreate the instance path
1647 # and disk.info during init, and all we need the root disk for
1648 # here is removing cloned snapshots which is backend-specific, so
1649 # check that first before initializing the image backend object. If
1650 # there is ever an image type that supports clone *and* re-creates
1651 # the instance directory and disk.info on init, this condition will
1652 # need to be re-visited to make sure that backend doesn't re-create
1653 # the disk. Refer to bugs: 1666831 1728603 1769131
1654 if self.image_backend.backend(CONF.libvirt.images_type).SUPPORTS_CLONE:
1655 root_disk = self.image_backend.by_name(instance, 'disk')
1656 if root_disk.exists():
1657 root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
1658
1659 if instance.host != CONF.host:
1660 self._undefine_domain(instance)
1661 self.unplug_vifs(instance, network_info)
1662
1663 def _get_volume_driver(self, connection_info):
1664 driver_type = connection_info.get('driver_volume_type')
1665 if driver_type not in self.volume_drivers:
1666 raise exception.VolumeDriverNotFound(driver_type=driver_type)
1667 return self.volume_drivers[driver_type]
1668
1669 def _connect_volume(self, context, connection_info, instance,
1670 encryption=None):
1671 vol_driver = self._get_volume_driver(connection_info)
1672 vol_driver.connect_volume(connection_info, instance)
1673 try:
1674 self._attach_encryptor(context, connection_info, encryption)
1675 except Exception:
1676 # Encryption failed so rollback the volume connection.
1677 with excutils.save_and_reraise_exception(logger=LOG):
1678 LOG.exception("Failure attaching encryptor; rolling back "
1679 "volume connection", instance=instance)
1680 vol_driver.disconnect_volume(connection_info, instance)
1681
1682 def _should_disconnect_target(self, context, instance, multiattach,
1683 vol_driver, volume_id):
1684 # NOTE(jdg): Multiattach is a special case (not to be confused
1685 # with shared_targets). With multiattach we may have a single volume
1686 # attached multiple times to *this* compute node (ie Server-1 and
1687 # Server-2). So, if we receive a call to delete the attachment for
1688 # Server-1 we need to take special care to make sure that the Volume
1689 # isn't also attached to another Server on this Node. Otherwise we
1690 # will indiscriminantly delete the connection for all Server and that's
1691 # no good. So check if it's attached multiple times on this node
1692 # if it is we skip the call to brick to delete the connection.
1693 if not multiattach:
1694 return True
1695
1696 # NOTE(deiter): Volume drivers using _HostMountStateManager are another
1697 # special case. _HostMountStateManager ensures that the compute node
1698 # only attempts to mount a single mountpoint in use by multiple
1699 # attachments once, and that it is not unmounted until it is no longer
1700 # in use by any attachments. So we can skip the multiattach check for
1701 # volume drivers that based on LibvirtMountedFileSystemVolumeDriver.
1702 if isinstance(vol_driver, fs.LibvirtMountedFileSystemVolumeDriver):
1703 return True
1704
1705 connection_count = 0
1706 volume = self._volume_api.get(context, volume_id)
1707 attachments = volume.get('attachments', {})
1708 if len(attachments) > 1:
1709 # First we get a list of all Server UUID's associated with
1710 # this Host (Compute Node). We're going to use this to
1711 # determine if the Volume being detached is also in-use by
1712 # another Server on this Host, ie just check to see if more
1713 # than one attachment.server_id for this volume is in our
1714 # list of Server UUID's for this Host
1715 servers_this_host = objects.InstanceList.get_uuids_by_host(
1716 context, instance.host)
1717
1718 # NOTE(jdg): nova.volume.cinder translates the
1719 # volume['attachments'] response into a dict which includes
1720 # the Server UUID as the key, so we're using that
1721 # here to check against our server_this_host list
1722 for server_id, data in attachments.items():
1723 if server_id in servers_this_host:
1724 connection_count += 1
1725 return (False if connection_count > 1 else True)
1726
1727 def _disconnect_volume(self, context, connection_info, instance,
1728 encryption=None):
1729 self._detach_encryptor(context, connection_info, encryption=encryption)
1730 vol_driver = self._get_volume_driver(connection_info)
1731 volume_id = driver_block_device.get_volume_id(connection_info)
1732 multiattach = connection_info.get('multiattach', False)
1733 if self._should_disconnect_target(
1734 context, instance, multiattach, vol_driver, volume_id):
1735 vol_driver.disconnect_volume(connection_info, instance)
1736 else:
1737 LOG.info('Detected multiple connections on this host for '
1738 'volume: %(volume)s, skipping target disconnect.',
1739 {'volume': volume_id})
1740
1741 def _extend_volume(self, connection_info, instance, requested_size):
1742 vol_driver = self._get_volume_driver(connection_info)
1743 return vol_driver.extend_volume(connection_info, instance,
1744 requested_size)
1745
1746 def _allow_native_luksv1(self, encryption=None):
1747 """Check if QEMU's native LUKSv1 decryption should be used.
1748 """
1749 # NOTE(lyarwood): Native LUKSv1 decryption can be disabled via a
1750 # workarounds configurable in order to aviod known performance issues
1751 # with the libgcrypt lib.
1752 if CONF.workarounds.disable_native_luksv1:
1753 return False
1754
1755 # NOTE(lyarwood): Ensure the LUKSv1 provider is used.
1756 provider = None
1757 if encryption:
1758 provider = encryption.get('provider', None)
1759 if provider in encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP:
1760 provider = encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP[provider]
1761 return provider == encryptors.LUKS
1762
1763 def _get_volume_config(self, connection_info, disk_info):
1764 vol_driver = self._get_volume_driver(connection_info)
1765 conf = vol_driver.get_config(connection_info, disk_info)
1766 self._set_cache_mode(conf)
1767 return conf
1768
1769 def _get_volume_encryptor(self, connection_info, encryption):
1770 root_helper = utils.get_root_helper()
1771 return encryptors.get_volume_encryptor(root_helper=root_helper,
1772 keymgr=key_manager.API(CONF),
1773 connection_info=connection_info,
1774 **encryption)
1775
1776 def _get_volume_encryption(self, context, connection_info):
1777 """Get the encryption metadata dict if it is not provided
1778 """
1779 encryption = {}
1780 volume_id = driver_block_device.get_volume_id(connection_info)
1781 if volume_id:
1782 encryption = encryptors.get_encryption_metadata(context,
1783 self._volume_api, volume_id, connection_info)
1784 return encryption
1785
1786 def _attach_encryptor(self, context, connection_info, encryption):
1787 """Attach the frontend encryptor if one is required by the volume.
1788
1789 The request context is only used when an encryption metadata dict is
1790 not provided. The encryption metadata dict being populated is then used
1791 to determine if an attempt to attach the encryptor should be made.
1792
1793 """
1794 if encryption is None:
1795 encryption = self._get_volume_encryption(context, connection_info)
1796
1797 if encryption and self._allow_native_luksv1(encryption=encryption):
1798 # NOTE(lyarwood): Fetch the associated key for the volume and
1799 # decode the passphrase from the key.
1800 # FIXME(lyarwood): c-vol currently creates symmetric keys for use
1801 # with volumes, leading to the binary to hex to string conversion
1802 # below.
1803 keymgr = key_manager.API(CONF)
1804 key = keymgr.get(context, encryption['encryption_key_id'])
1805 key_encoded = key.get_encoded()
1806 passphrase = binascii.hexlify(key_encoded).decode('utf-8')
1807
1808 # NOTE(lyarwood): Retain the behaviour of the original os-brick
1809 # encryptors and format any volume that does not identify as
1810 # encrypted with LUKS.
1811 # FIXME(lyarwood): Remove this once c-vol correctly formats
1812 # encrypted volumes during their initial creation:
1813 # https://bugs.launchpad.net/cinder/+bug/1739442
1814 device_path = connection_info.get('data').get('device_path')
1815 if device_path:
1816 root_helper = utils.get_root_helper()
1817 if not luks_encryptor.is_luks(root_helper, device_path):
1818 encryptor = self._get_volume_encryptor(connection_info,
1819 encryption)
1820 encryptor._format_volume(passphrase, **encryption)
1821
1822 # NOTE(lyarwood): Store the passphrase as a libvirt secret locally
1823 # on the compute node. This secret is used later when generating
1824 # the volume config.
1825 volume_id = driver_block_device.get_volume_id(connection_info)
1826 self._host.create_secret('volume', volume_id, password=passphrase)
1827 elif encryption:
1828 encryptor = self._get_volume_encryptor(connection_info,
1829 encryption)
1830 encryptor.attach_volume(context, **encryption)
1831
1832 def _detach_encryptor(self, context, connection_info, encryption):
1833 """Detach the frontend encryptor if one is required by the volume.
1834
1835 The request context is only used when an encryption metadata dict is
1836 not provided. The encryption metadata dict being populated is then used
1837 to determine if an attempt to detach the encryptor should be made.
1838
1839 If native LUKS decryption is enabled then delete previously created
1840 Libvirt volume secret from the host.
1841 """
1842 volume_id = driver_block_device.get_volume_id(connection_info)
1843 if volume_id and self._host.find_secret('volume', volume_id):
1844 return self._host.delete_secret('volume', volume_id)
1845 if encryption is None:
1846 encryption = self._get_volume_encryption(context, connection_info)
1847 # NOTE(lyarwood): Handle bug #1821696 where volume secrets have been
1848 # removed manually by returning if a LUKS provider is being used
1849 # and device_path is not present in the connection_info. This avoids
1850 # VolumeEncryptionNotSupported being thrown when we incorrectly build
1851 # the encryptor below due to the secrets not being present above.
1852 if (encryption and self._allow_native_luksv1(encryption=encryption) and
1853 not connection_info['data'].get('device_path')):
1854 return
1855 if encryption:
1856 encryptor = self._get_volume_encryptor(connection_info,
1857 encryption)
1858 encryptor.detach_volume(**encryption)
1859
1860 def _check_discard_for_attach_volume(self, conf, instance):
1861 """Perform some checks for volumes configured for discard support.
1862
1863 If discard is configured for the volume, and the guest is using a
1864 configuration known to not work, we will log a message explaining
1865 the reason why.
1866 """
1867 if conf.driver_discard == 'unmap' and conf.target_bus == 'virtio':
1868 LOG.debug('Attempting to attach volume %(id)s with discard '
1869 'support enabled to an instance using an '
1870 'unsupported configuration. target_bus = '
1871 '%(bus)s. Trim commands will not be issued to '
1872 'the storage device.',
1873 {'bus': conf.target_bus,
1874 'id': conf.serial},
1875 instance=instance)
1876
1877 def attach_volume(self, context, connection_info, instance, mountpoint,
1878 disk_bus=None, device_type=None, encryption=None):
1879 guest = self._host.get_guest(instance)
1880
1881 disk_dev = mountpoint.rpartition("/")[2]
1882 bdm = {
1883 'device_name': disk_dev,
1884 'disk_bus': disk_bus,
1885 'device_type': device_type}
1886
1887 # Note(cfb): If the volume has a custom block size, check that
1888 # that we are using QEMU/KVM and libvirt >= 0.10.2. The
1889 # presence of a block size is considered mandatory by
1890 # cinder so we fail if we can't honor the request.
1891 data = {}
1892 if ('data' in connection_info):
1893 data = connection_info['data']
1894 if ('logical_block_size' in data or 'physical_block_size' in data):
1895 if ((CONF.libvirt.virt_type != "kvm" and
1896 CONF.libvirt.virt_type != "qemu")):
1897 msg = _("Volume sets block size, but the current "
1898 "libvirt hypervisor '%s' does not support custom "
1899 "block size") % CONF.libvirt.virt_type
1900 raise exception.InvalidHypervisorType(msg)
1901
1902 self._connect_volume(context, connection_info, instance,
1903 encryption=encryption)
1904 disk_info = blockinfo.get_info_from_bdm(
1905 instance, CONF.libvirt.virt_type, instance.image_meta, bdm)
1906 if disk_info['bus'] == 'scsi':
1907 disk_info['unit'] = self._get_scsi_controller_next_unit(guest)
1908
1909 conf = self._get_volume_config(connection_info, disk_info)
1910
1911 self._check_discard_for_attach_volume(conf, instance)
1912
1913 try:
1914 state = guest.get_power_state(self._host)
1915 live = state in (power_state.RUNNING, power_state.PAUSED)
1916
1917 guest.attach_device(conf, persistent=True, live=live)
1918 # NOTE(artom) If we're attaching with a device role tag, we need to
1919 # rebuild device_metadata. If we're attaching without a role
1920 # tag, we're rebuilding it here needlessly anyways. This isn't a
1921 # massive deal, and it helps reduce code complexity by not having
1922 # to indicate to the virt driver that the attach is tagged. The
1923 # really important optimization of not calling the database unless
1924 # device_metadata has actually changed is done for us by
1925 # instance.save().
1926 instance.device_metadata = self._build_device_metadata(
1927 context, instance)
1928 instance.save()
1929 except Exception:
1930 LOG.exception('Failed to attach volume at mountpoint: %s',
1931 mountpoint, instance=instance)
1932 with excutils.save_and_reraise_exception():
1933 self._disconnect_volume(context, connection_info, instance,
1934 encryption=encryption)
1935
1936 def _swap_volume(self, guest, disk_dev, conf, resize_to, hw_firmware_type):
1937 """Swap existing disk with a new block device.
1938
1939 Call virDomainBlockRebase or virDomainBlockCopy with Libvirt >= 6.0.0
1940 to copy and then pivot to a new volume.
1941
1942 :param: guest: Guest object representing the guest domain
1943 :param: disk_dev: Device within the domain that is being swapped
1944 :param: conf: LibvirtConfigGuestDisk object representing the new volume
1945 :param: resize_to: Size of the dst volume, 0 if the same as the src
1946 :param: hw_firmware_type: fields.FirmwareType if set in the imagemeta
1947 """
1948 dev = guest.get_block_device(disk_dev)
1949
1950 # Save a copy of the domain's persistent XML file. We'll use this
1951 # to redefine the domain if anything fails during the volume swap.
1952 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)
1953
1954 # Abort is an idempotent operation, so make sure any block
1955 # jobs which may have failed are ended.
1956 try:
1957 dev.abort_job()
1958 except Exception:
1959 pass
1960
1961 try:
1962 # NOTE (rmk): virDomainBlockRebase and virDomainBlockCopy cannot be
1963 # executed on persistent domains, so we need to temporarily
1964 # undefine it. If any part of this block fails, the domain is
1965 # re-defined regardless.
1966 if guest.has_persistent_configuration():
1967 support_uefi = self._check_uefi_support(hw_firmware_type)
1968 guest.delete_configuration(support_uefi)
1969
1970 try:
1971 # NOTE(lyarwood): Use virDomainBlockCopy from libvirt >= 6.0.0
1972 # and QEMU >= 4.2.0 with -blockdev domains allowing QEMU to
1973 # copy to remote disks.
1974 if self._host.has_min_version(lv_ver=MIN_LIBVIRT_BLOCKDEV,
1975 hv_ver=MIN_QEMU_BLOCKDEV):
1976 dev.copy(conf.to_xml(), reuse_ext=True)
1977 else:
1978 # TODO(lyarwood): Remove the following use of
1979 # virDomainBlockRebase once MIN_LIBVIRT_VERSION hits >=
1980 # 6.0.0 and MIN_QEMU_VERSION hits >= 4.2.0.
1981 # Start copy with VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT flag to
1982 # allow writing to existing external volume file. Use
1983 # VIR_DOMAIN_BLOCK_REBASE_COPY_DEV if it's a block device
1984 # to make sure XML is generated correctly (bug 1691195)
1985 copy_dev = conf.source_type == 'block'
1986 dev.rebase(conf.source_path, copy=True, reuse_ext=True,
1987 copy_dev=copy_dev)
1988 while not dev.is_job_complete():
1989 time.sleep(0.5)
1990
1991 dev.abort_job(pivot=True)
1992
1993 except Exception as exc:
1994 # NOTE(lyarwood): conf.source_path is not set for RBD disks so
1995 # fallback to conf.target_dev when None.
1996 new_path = conf.source_path or conf.target_dev
1997 old_path = disk_dev
1998 LOG.exception("Failure rebasing volume %(new_path)s on "
1999 "%(old_path)s.", {'new_path': new_path,
2000 'old_path': old_path})
2001 raise exception.VolumeRebaseFailed(reason=six.text_type(exc))
2002
2003 if resize_to:
2004 dev.resize(resize_to * units.Gi)
2005
2006 # Make sure we will redefine the domain using the updated
2007 # configuration after the volume was swapped. The dump_inactive
2008 # keyword arg controls whether we pull the inactive (persistent)
2009 # or active (live) config from the domain. We want to pull the
2010 # live config after the volume was updated to use when we redefine
2011 # the domain.
2012 xml = guest.get_xml_desc(dump_inactive=False, dump_sensitive=True)
2013 finally:
2014 self._host.write_instance_config(xml)
2015
2016 def swap_volume(self, context, old_connection_info,
2017 new_connection_info, instance, mountpoint, resize_to):
2018
2019 # NOTE(lyarwood): https://bugzilla.redhat.com/show_bug.cgi?id=760547
2020 old_encrypt = self._get_volume_encryption(context, old_connection_info)
2021 new_encrypt = self._get_volume_encryption(context, new_connection_info)
2022 if ((old_encrypt and self._allow_native_luksv1(old_encrypt)) or
2023 (new_encrypt and self._allow_native_luksv1(new_encrypt))):
2024 raise NotImplementedError(_("Swap volume is not supported for "
2025 "encrypted volumes when native LUKS decryption is enabled."))
2026
2027 guest = self._host.get_guest(instance)
2028
2029 disk_dev = mountpoint.rpartition("/")[2]
2030 if not guest.get_disk(disk_dev):
2031 raise exception.DiskNotFound(location=disk_dev)
2032 disk_info = {
2033 'dev': disk_dev,
2034 'bus': blockinfo.get_disk_bus_for_disk_dev(
2035 CONF.libvirt.virt_type, disk_dev),
2036 'type': 'disk',
2037 }
2038 # NOTE (lyarwood): new_connection_info will be modified by the
2039 # following _connect_volume call down into the volume drivers. The
2040 # majority of the volume drivers will add a device_path that is in turn
2041 # used by _get_volume_config to set the source_path of the
2042 # LibvirtConfigGuestDisk object it returns. We do not explicitly save
2043 # this to the BDM here as the upper compute swap_volume method will
2044 # eventually do this for us.
2045 self._connect_volume(context, new_connection_info, instance)
2046 conf = self._get_volume_config(new_connection_info, disk_info)
2047 if (not conf.source_path and not
2048 self._host.has_min_version(lv_ver=MIN_LIBVIRT_BLOCKDEV,
2049 hv_ver=MIN_QEMU_BLOCKDEV)):
2050 self._disconnect_volume(context, new_connection_info, instance)
2051 raise NotImplementedError(_("Swap only supports host devices and "
2052 "files with Libvirt < 6.0.0 or QEMU "
2053 "< 4.2.0"))
2054
2055 hw_firmware_type = instance.image_meta.properties.get(
2056 'hw_firmware_type')
2057
2058 try:
2059 self._swap_volume(guest, disk_dev, conf,
2060 resize_to, hw_firmware_type)
2061 except exception.VolumeRebaseFailed:
2062 with excutils.save_and_reraise_exception():
2063 self._disconnect_volume(context, new_connection_info, instance)
2064
2065 self._disconnect_volume(context, old_connection_info, instance)
2066
2067 def _get_existing_domain_xml(self, instance, network_info,
2068 block_device_info=None):
2069 try:
2070 guest = self._host.get_guest(instance)
2071 xml = guest.get_xml_desc()
2072 except exception.InstanceNotFound:
2073 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
2074 instance,
2075 instance.image_meta,
2076 block_device_info)
2077 xml = self._get_guest_xml(nova_context.get_admin_context(),
2078 instance, network_info, disk_info,
2079 instance.image_meta,
2080 block_device_info=block_device_info)
2081 return xml
2082
2083 def detach_volume(self, context, connection_info, instance, mountpoint,
2084 encryption=None):
2085 disk_dev = mountpoint.rpartition("/")[2]
2086 try:
2087 guest = self._host.get_guest(instance)
2088
2089 state = guest.get_power_state(self._host)
2090 live = state in (power_state.RUNNING, power_state.PAUSED)
2091 # NOTE(lyarwood): The volume must be detached from the VM before
2092 # detaching any attached encryptors or disconnecting the underlying
2093 # volume in _disconnect_volume. Otherwise, the encryptor or volume
2094 # driver may report that the volume is still in use.
2095 supports_device_missing = self._host.has_min_version(
2096 MIN_LIBVIRT_VIR_ERR_DEVICE_MISSING)
2097 wait_for_detach = guest.detach_device_with_retry(
2098 guest.get_disk, disk_dev, live=live,
2099 supports_device_missing_error_code=supports_device_missing)
2100 wait_for_detach()
2101
2102 except exception.InstanceNotFound:
2103 # NOTE(zhaoqin): If the instance does not exist, _lookup_by_name()
2104 # will throw InstanceNotFound exception. Need to
2105 # disconnect volume under this circumstance.
2106 LOG.warning("During detach_volume, instance disappeared.",
2107 instance=instance)
2108 except exception.DeviceNotFound:
2109 # We should still try to disconnect logical device from
2110 # host, an error might have happened during a previous
2111 # call.
2112 LOG.info("Device %s not found in instance.",
2113 disk_dev, instance=instance)
2114 except libvirt.libvirtError as ex:
2115 # NOTE(vish): This is called to cleanup volumes after live
2116 # migration, so we should still disconnect even if
2117 # the instance doesn't exist here anymore.
2118 error_code = ex.get_error_code()
2119 if error_code == libvirt.VIR_ERR_NO_DOMAIN:
2120 # NOTE(vish):
2121 LOG.warning("During detach_volume, instance disappeared.",
2122 instance=instance)
2123 else:
2124 raise
2125
2126 self._disconnect_volume(context, connection_info, instance,
2127 encryption=encryption)
2128
2129 def _resize_attached_volume(self, new_size, block_device, instance):
2130 LOG.debug('Resizing target device %(dev)s to %(size)u',
2131 {'dev': block_device._disk, 'size': new_size},
2132 instance=instance)
2133 block_device.resize(new_size)
2134
2135 def _resize_attached_encrypted_volume(self, original_new_size,
2136 block_device, instance,
2137 connection_info, encryption):
2138 # TODO(lyarwood): Also handle the dm-crpyt encryption providers of
2139 # plain and LUKSv2, for now just use the original_new_size.
2140 decrypted_device_new_size = original_new_size
2141
2142 # NOTE(lyarwood): original_new_size currently refers to the total size
2143 # of the extended volume in bytes. With natively decrypted LUKSv1
2144 # volumes we need to ensure this now takes the LUKSv1 header and key
2145 # material into account. Otherwise QEMU will attempt and fail to grow
2146 # host block devices and remote RBD volumes.
2147 if self._allow_native_luksv1(encryption):
2148 try:
2149 # NOTE(lyarwood): Find the path to provide to qemu-img
2150 if 'device_path' in connection_info['data']:
2151 path = connection_info['data']['device_path']
2152 elif connection_info['driver_volume_type'] == 'rbd':
2153 path = 'rbd:%s' % (connection_info['data']['name'])
2154 else:
2155 path = 'unknown'
2156 raise exception.DiskNotFound(location='unknown')
2157
2158 info = images.privileged_qemu_img_info(path)
2159 format_specific_data = info.format_specific['data']
2160 payload_offset = format_specific_data['payload-offset']
2161
2162 # NOTE(lyarwood): Ensure the underlying device is not resized
2163 # by subtracting the LUKSv1 payload_offset (where the users
2164 # encrypted data starts) from the original_new_size (the total
2165 # size of the underlying volume). Both are reported in bytes.
2166 decrypted_device_new_size = original_new_size - payload_offset
2167
2168 except exception.DiskNotFound:
2169 with excutils.save_and_reraise_exception():
2170 LOG.exception('Unable to access the encrypted disk %s.',
2171 path, instance=instance)
2172 except Exception:
2173 with excutils.save_and_reraise_exception():
2174 LOG.exception('Unknown error when attempting to find the '
2175 'payload_offset for LUKSv1 encrypted disk '
2176 '%s.', path, instance=instance)
2177 # NOTE(lyarwood): Resize the decrypted device within the instance to
2178 # the calculated size as with normal volumes.
2179 self._resize_attached_volume(
2180 decrypted_device_new_size, block_device, instance)
2181
2182 def extend_volume(self, context, connection_info, instance,
2183 requested_size):
2184 try:
2185 new_size = self._extend_volume(connection_info, instance,
2186 requested_size)
2187 except NotImplementedError:
2188 raise exception.ExtendVolumeNotSupported()
2189
2190 # Resize the device in QEMU so its size is updated and
2191 # detected by the instance without rebooting.
2192 try:
2193 guest = self._host.get_guest(instance)
2194 state = guest.get_power_state(self._host)
2195 volume_id = driver_block_device.get_volume_id(connection_info)
2196 active_state = state in (power_state.RUNNING, power_state.PAUSED)
2197 if active_state:
2198 if 'device_path' in connection_info['data']:
2199 disk_path = connection_info['data']['device_path']
2200 else:
2201 # Some drivers (eg. net) don't put the device_path
2202 # into the connection_info. Match disks by their serial
2203 # number instead
2204 disk = next(iter([
2205 d for d in guest.get_all_disks()
2206 if d.serial == volume_id
2207 ]), None)
2208 if not disk:
2209 raise exception.VolumeNotFound(volume_id=volume_id)
2210 disk_path = disk.target_dev
2211 dev = guest.get_block_device(disk_path)
2212 encryption = encryptors.get_encryption_metadata(
2213 context, self._volume_api, volume_id, connection_info)
2214 if encryption:
2215 self._resize_attached_encrypted_volume(
2216 new_size, dev, instance,
2217 connection_info, encryption)
2218 else:
2219 self._resize_attached_volume(
2220 new_size, dev, instance)
2221 else:
2222 LOG.debug('Skipping block device resize, guest is not running',
2223 instance=instance)
2224 except exception.InstanceNotFound:
2225 with excutils.save_and_reraise_exception():
2226 LOG.warning('During extend_volume, instance disappeared.',
2227 instance=instance)
2228 except libvirt.libvirtError:
2229 with excutils.save_and_reraise_exception():
2230 LOG.exception('resizing block device failed.',
2231 instance=instance)
2232
2233 def attach_interface(self, context, instance, image_meta, vif):
2234 guest = self._host.get_guest(instance)
2235
2236 self.vif_driver.plug(instance, vif)
2237 cfg = self.vif_driver.get_config(instance, vif, image_meta,
2238 instance.flavor,
2239 CONF.libvirt.virt_type)
2240 try:
2241 state = guest.get_power_state(self._host)
2242 live = state in (power_state.RUNNING, power_state.PAUSED)
2243 guest.attach_device(cfg, persistent=True, live=live)
2244 except libvirt.libvirtError:
2245 LOG.error('attaching network adapter failed.',
2246 instance=instance, exc_info=True)
2247 self.vif_driver.unplug(instance, vif)
2248 raise exception.InterfaceAttachFailed(
2249 instance_uuid=instance.uuid)
2250 try:
2251 # NOTE(artom) If we're attaching with a device role tag, we need to
2252 # rebuild device_metadata. If we're attaching without a role
2253 # tag, we're rebuilding it here needlessly anyways. This isn't a
2254 # massive deal, and it helps reduce code complexity by not having
2255 # to indicate to the virt driver that the attach is tagged. The
2256 # really important optimization of not calling the database unless
2257 # device_metadata has actually changed is done for us by
2258 # instance.save().
2259 instance.device_metadata = self._build_device_metadata(
2260 context, instance)
2261 instance.save()
2262 except Exception:
2263 # NOTE(artom) If we fail here it means the interface attached
2264 # successfully but building and/or saving the device metadata
2265 # failed. Just unplugging the vif is therefore not enough cleanup,
2266 # we need to detach the interface.
2267 with excutils.save_and_reraise_exception(reraise=False):
2268 LOG.error('Interface attached successfully but building '
2269 'and/or saving device metadata failed.',
2270 instance=instance, exc_info=True)
2271 self.detach_interface(context, instance, vif)
2272 raise exception.InterfaceAttachFailed(
2273 instance_uuid=instance.uuid)
2274
2275 def detach_interface(self, context, instance, vif):
2276 guest = self._host.get_guest(instance)
2277 cfg = self.vif_driver.get_config(instance, vif,
2278 instance.image_meta,
2279 instance.flavor,
2280 CONF.libvirt.virt_type)
2281 interface = guest.get_interface_by_cfg(cfg)
2282 try:
2283 # NOTE(mriedem): When deleting an instance and using Neutron,
2284 # we can be racing against Neutron deleting the port and
2285 # sending the vif-deleted event which then triggers a call to
2286 # detach the interface, so if the interface is not found then
2287 # we can just log it as a warning.
2288 if not interface:
2289 mac = vif.get('address')
2290 # The interface is gone so just log it as a warning.
2291 LOG.warning('Detaching interface %(mac)s failed because '
2292 'the device is no longer found on the guest.',
2293 {'mac': mac}, instance=instance)
2294 return
2295
2296 state = guest.get_power_state(self._host)
2297 live = state in (power_state.RUNNING, power_state.PAUSED)
2298 # Now we are going to loop until the interface is detached or we
2299 # timeout.
2300 supports_device_missing = self._host.has_min_version(
2301 MIN_LIBVIRT_VIR_ERR_DEVICE_MISSING)
2302 wait_for_detach = guest.detach_device_with_retry(
2303 guest.get_interface_by_cfg, cfg, live=live,
2304 alternative_device_name=self.vif_driver.get_vif_devname(vif),
2305 supports_device_missing_error_code=supports_device_missing)
2306 wait_for_detach()
2307 except exception.DeviceDetachFailed:
2308 # We failed to detach the device even with the retry loop, so let's
2309 # dump some debug information to the logs before raising back up.
2310 with excutils.save_and_reraise_exception():
2311 devname = self.vif_driver.get_vif_devname(vif)
2312 interface = guest.get_interface_by_cfg(cfg)
2313 if interface:
2314 LOG.warning(
2315 'Failed to detach interface %(devname)s after '
2316 'repeated attempts. Final interface xml:\n'
2317 '%(interface_xml)s\nFinal guest xml:\n%(guest_xml)s',
2318 {'devname': devname,
2319 'interface_xml': interface.to_xml(),
2320 'guest_xml': guest.get_xml_desc()},
2321 instance=instance)
2322 except exception.DeviceNotFound:
2323 # The interface is gone so just log it as a warning.
2324 LOG.warning('Detaching interface %(mac)s failed because '
2325 'the device is no longer found on the guest.',
2326 {'mac': vif.get('address')}, instance=instance)
2327 except libvirt.libvirtError as ex:
2328 error_code = ex.get_error_code()
2329 if error_code == libvirt.VIR_ERR_NO_DOMAIN:
2330 LOG.warning("During detach_interface, instance disappeared.",
2331 instance=instance)
2332 else:
2333 # NOTE(mriedem): When deleting an instance and using Neutron,
2334 # we can be racing against Neutron deleting the port and
2335 # sending the vif-deleted event which then triggers a call to
2336 # detach the interface, so we might have failed because the
2337 # network device no longer exists. Libvirt will fail with
2338 # "operation failed: no matching network device was found"
2339 # which unfortunately does not have a unique error code so we
2340 # need to look up the interface by config and if it's not found
2341 # then we can just log it as a warning rather than tracing an
2342 # error.
2343 mac = vif.get('address')
2344 # Get a fresh instance of the guest in case it is gone.
2345 try:
2346 guest = self._host.get_guest(instance)
2347 except exception.InstanceNotFound:
2348 LOG.info("Instance disappeared while detaching interface "
2349 "%s", vif['id'], instance=instance)
2350 return
2351 interface = guest.get_interface_by_cfg(cfg)
2352 if interface:
2353 LOG.error('detaching network adapter failed.',
2354 instance=instance, exc_info=True)
2355 raise exception.InterfaceDetachFailed(
2356 instance_uuid=instance.uuid)
2357
2358 # The interface is gone so just log it as a warning.
2359 LOG.warning('Detaching interface %(mac)s failed because '
2360 'the device is no longer found on the guest.',
2361 {'mac': mac}, instance=instance)
2362 finally:
2363 # NOTE(gibi): we need to unplug the vif _after_ the detach is done
2364 # on the libvirt side as otherwise libvirt will still manage the
2365 # device that our unplug code trying to reset. This can cause a
2366 # race and leave the detached device configured. Also even if we
2367 # are failed to detach due to race conditions the unplug is
2368 # necessary for the same reason
2369 self.vif_driver.unplug(instance, vif)
2370
2371 def _create_snapshot_metadata(self, image_meta, instance,
2372 img_fmt, snp_name):
2373 metadata = {'status': 'active',
2374 'name': snp_name,
2375 'properties': {
2376 'kernel_id': instance.kernel_id,
2377 'image_location': 'snapshot',
2378 'image_state': 'available',
2379 'owner_id': instance.project_id,
2380 'ramdisk_id': instance.ramdisk_id,
2381 }
2382 }
2383 if instance.os_type:
2384 metadata['properties']['os_type'] = instance.os_type
2385
2386 # NOTE(vish): glance forces ami disk format to be ami
2387 if image_meta.disk_format == 'ami':
2388 metadata['disk_format'] = 'ami'
2389 else:
2390 metadata['disk_format'] = img_fmt
2391
2392 if image_meta.obj_attr_is_set("container_format"):
2393 metadata['container_format'] = image_meta.container_format
2394 else:
2395 metadata['container_format'] = "bare"
2396
2397 return metadata
2398
2399 def snapshot(self, context, instance, image_id, update_task_state):
2400 """Create snapshot from a running VM instance.
2401
2402 This command only works with qemu 0.14+
2403 """
2404 try:
2405 guest = self._host.get_guest(instance)
2406 except exception.InstanceNotFound:
2407 raise exception.InstanceNotRunning(instance_id=instance.uuid)
2408
2409 snapshot = self._image_api.get(context, image_id)
2410
2411 # source_format is an on-disk format
2412 # source_type is a backend type
2413 disk_path, source_format = libvirt_utils.find_disk(guest)
2414 source_type = libvirt_utils.get_disk_type_from_path(disk_path)
2415
2416 # We won't have source_type for raw or qcow2 disks, because we can't
2417 # determine that from the path. We should have it from the libvirt
2418 # xml, though.
2419 if source_type is None:
2420 source_type = source_format
2421 # For lxc instances we won't have it either from libvirt xml
2422 # (because we just gave libvirt the mounted filesystem), or the path,
2423 # so source_type is still going to be None. In this case,
2424 # root_disk is going to default to CONF.libvirt.images_type
2425 # below, which is still safe.
2426
2427 image_format = CONF.libvirt.snapshot_image_format or source_type
2428
2429 # NOTE(bfilippov): save lvm and rbd as raw
2430 if image_format == 'lvm' or image_format == 'rbd':
2431 image_format = 'raw'
2432
2433 metadata = self._create_snapshot_metadata(instance.image_meta,
2434 instance,
2435 image_format,
2436 snapshot['name'])
2437
2438 snapshot_name = uuidutils.generate_uuid(dashed=False)
2439
2440 # store current state so we know what to resume back to if we suspend
2441 original_power_state = guest.get_power_state(self._host)
2442
2443 # NOTE(dgenin): Instances with LVM encrypted ephemeral storage require
2444 # cold snapshots. Currently, checking for encryption is
2445 # redundant because LVM supports only cold snapshots.
2446 # It is necessary in case this situation changes in the
2447 # future.
2448 if (
2449 self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU) and
2450 source_type != 'lvm' and
2451 not CONF.ephemeral_storage_encryption.enabled and
2452 not CONF.workarounds.disable_libvirt_livesnapshot and
2453 # NOTE(stephenfin): Live snapshotting doesn't make sense for
2454 # shutdown instances
2455 original_power_state != power_state.SHUTDOWN
2456 ):
2457 live_snapshot = True
2458 else:
2459 live_snapshot = False
2460
2461 self._suspend_guest_for_snapshot(
2462 context, live_snapshot, original_power_state, instance)
2463
2464 root_disk = self.image_backend.by_libvirt_path(
2465 instance, disk_path, image_type=source_type)
2466
2467 if live_snapshot:
2468 LOG.info("Beginning live snapshot process", instance=instance)
2469 else:
2470 LOG.info("Beginning cold snapshot process", instance=instance)
2471
2472 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD)
2473
2474 update_task_state(task_state=task_states.IMAGE_UPLOADING,
2475 expected_state=task_states.IMAGE_PENDING_UPLOAD)
2476
2477 try:
2478 metadata['location'] = root_disk.direct_snapshot(
2479 context, snapshot_name, image_format, image_id,
2480 instance.image_ref)
2481 self._resume_guest_after_snapshot(
2482 context, live_snapshot, original_power_state, instance, guest)
2483 self._image_api.update(context, image_id, metadata,
2484 purge_props=False)
2485 except (NotImplementedError, exception.ImageUnacceptable,
2486 exception.Forbidden) as e:
2487 if type(e) != NotImplementedError:
2488 LOG.warning('Performing standard snapshot because direct '
2489 'snapshot failed: %(error)s',
2490 {'error': encodeutils.exception_to_unicode(e)})
2491 failed_snap = metadata.pop('location', None)
2492 if failed_snap:
2493 failed_snap = {'url': str(failed_snap)}
2494 root_disk.cleanup_direct_snapshot(failed_snap,
2495 also_destroy_volume=True,
2496 ignore_errors=True)
2497 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD,
2498 expected_state=task_states.IMAGE_UPLOADING)
2499
2500 # TODO(nic): possibly abstract this out to the root_disk
2501 if source_type == 'rbd' and live_snapshot:
2502 # Standard snapshot uses qemu-img convert from RBD which is
2503 # not safe to run with live_snapshot.
2504 live_snapshot = False
2505 # Suspend the guest, so this is no longer a live snapshot
2506 self._suspend_guest_for_snapshot(
2507 context, live_snapshot, original_power_state, instance)
2508
2509 snapshot_directory = CONF.libvirt.snapshots_directory
2510 fileutils.ensure_tree(snapshot_directory)
2511 with utils.tempdir(dir=snapshot_directory) as tmpdir:
2512 try:
2513 out_path = os.path.join(tmpdir, snapshot_name)
2514 if live_snapshot:
2515 # NOTE(xqueralt): libvirt needs o+x in the tempdir
2516 os.chmod(tmpdir, 0o701)
2517 self._live_snapshot(context, instance, guest,
2518 disk_path, out_path, source_format,
2519 image_format, instance.image_meta)
2520 else:
2521 root_disk.snapshot_extract(out_path, image_format)
2522 LOG.info("Snapshot extracted, beginning image upload",
2523 instance=instance)
2524 except libvirt.libvirtError as ex:
2525 error_code = ex.get_error_code()
2526 if error_code == libvirt.VIR_ERR_NO_DOMAIN:
2527 LOG.info('Instance %(instance_name)s disappeared '
2528 'while taking snapshot of it: [Error Code '
2529 '%(error_code)s] %(ex)s',
2530 {'instance_name': instance.name,
2531 'error_code': error_code,
2532 'ex': ex},
2533 instance=instance)
2534 raise exception.InstanceNotFound(
2535 instance_id=instance.uuid)
2536 else:
2537 raise
2538 finally:
2539 self._resume_guest_after_snapshot(
2540 context, live_snapshot, original_power_state, instance,
2541 guest)
2542
2543 # Upload that image to the image service
2544 update_task_state(task_state=task_states.IMAGE_UPLOADING,
2545 expected_state=task_states.IMAGE_PENDING_UPLOAD)
2546 with libvirt_utils.file_open(out_path, 'rb') as image_file:
2547 # execute operation with disk concurrency semaphore
2548 with compute_utils.disk_ops_semaphore:
2549 self._image_api.update(context,
2550 image_id,
2551 metadata,
2552 image_file)
2553 except Exception:
2554 with excutils.save_and_reraise_exception():
2555 LOG.exception("Failed to snapshot image")
2556 failed_snap = metadata.pop('location', None)
2557 if failed_snap:
2558 failed_snap = {'url': str(failed_snap)}
2559 root_disk.cleanup_direct_snapshot(
2560 failed_snap, also_destroy_volume=True,
2561 ignore_errors=True)
2562
2563 LOG.info("Snapshot image upload complete", instance=instance)
2564
2565 def _needs_suspend_resume_for_snapshot(
2566 self,
2567 live_snapshot: bool,
2568 current_power_state: int,
2569 ):
2570 # NOTE(dkang): managedSave does not work for LXC
2571 if CONF.libvirt.virt_type == 'lxc':
2572 return False
2573
2574 # Live snapshots do not necessitate suspending the domain
2575 if live_snapshot:
2576 return False
2577
2578 # ...and neither does a non-running domain
2579 return current_power_state in (power_state.RUNNING, power_state.PAUSED)
2580
2581 def _suspend_guest_for_snapshot(
2582 self,
2583 context: nova_context.RequestContext,
2584 live_snapshot: bool,
2585 current_power_state: int,
2586 instance: 'objects.Instance',
2587 ):
2588 if self._needs_suspend_resume_for_snapshot(
2589 live_snapshot, current_power_state,
2590 ):
2591 self.suspend(context, instance)
2592
2593 def _resume_guest_after_snapshot(
2594 self,
2595 context: nova_context.RequestContext,
2596 live_snapshot: bool,
2597 original_power_state: int,
2598 instance: 'objects.Instance',
2599 guest: libvirt_guest.Guest,
2600 ):
2601 if not self._needs_suspend_resume_for_snapshot(
2602 live_snapshot, original_power_state,
2603 ):
2604 return
2605
2606 current_power_state = guest.get_power_state(self._host)
2607
2608 # TODO(stephenfin): Any reason we couldn't use 'self.resume' here?
2609 guest.launch(pause=current_power_state == power_state.PAUSED)
2610
2611 self._attach_pci_devices(
2612 guest, pci_manager.get_instance_pci_devs(instance))
2613 self._attach_direct_passthrough_ports(context, instance, guest)
2614
2615 def _can_set_admin_password(self, image_meta):
2616
2617 if CONF.libvirt.virt_type in ('kvm', 'qemu'):
2618 if not image_meta.properties.get('hw_qemu_guest_agent', False):
2619 raise exception.QemuGuestAgentNotEnabled()
2620 elif not CONF.libvirt.virt_type == 'parallels':
2621 raise exception.SetAdminPasswdNotSupported()
2622
2623 def _save_instance_password_if_sshkey_present(self, instance, new_pass):
2624 sshkey = instance.key_data if 'key_data' in instance else None
2625 if sshkey and sshkey.startswith("ssh-rsa"):
2626 enc = crypto.ssh_encrypt_text(sshkey, new_pass)
2627 # NOTE(melwitt): The convert_password method doesn't actually do
2628 # anything with the context argument, so we can pass None.
2629 instance.system_metadata.update(
2630 password.convert_password(None, base64.encode_as_text(enc)))
2631 instance.save()
2632
2633 def set_admin_password(self, instance, new_pass):
2634 self._can_set_admin_password(instance.image_meta)
2635
2636 guest = self._host.get_guest(instance)
2637 user = instance.image_meta.properties.get("os_admin_user")
2638 if not user:
2639 if instance.os_type == "windows":
2640 user = "Administrator"
2641 else:
2642 user = "root"
2643 try:
2644 guest.set_user_password(user, new_pass)
2645 except libvirt.libvirtError as ex:
2646 error_code = ex.get_error_code()
2647 if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE:
2648 LOG.debug('Failed to set password: QEMU agent unresponsive',
2649 instance_uuid=instance.uuid)
2650 raise NotImplementedError()
2651
2652 err_msg = encodeutils.exception_to_unicode(ex)
2653 msg = (_('Error from libvirt while set password for username '
2654 '"%(user)s": [Error Code %(error_code)s] %(ex)s')
2655 % {'user': user, 'error_code': error_code, 'ex': err_msg})
2656 raise exception.InternalError(msg)
2657 else:
2658 # Save the password in sysmeta so it may be retrieved from the
2659 # metadata service.
2660 self._save_instance_password_if_sshkey_present(instance, new_pass)
2661
2662 def _can_quiesce(self, instance, image_meta):
2663 if CONF.libvirt.virt_type not in ('kvm', 'qemu'):
2664 raise exception.InstanceQuiesceNotSupported(
2665 instance_id=instance.uuid)
2666
2667 if not image_meta.properties.get('hw_qemu_guest_agent', False):
2668 raise exception.QemuGuestAgentNotEnabled()
2669
2670 def _requires_quiesce(self, image_meta):
2671 return image_meta.properties.get('os_require_quiesce', False)
2672
2673 def _set_quiesced(self, context, instance, image_meta, quiesced):
2674 self._can_quiesce(instance, image_meta)
2675 try:
2676 guest = self._host.get_guest(instance)
2677 if quiesced:
2678 guest.freeze_filesystems()
2679 else:
2680 guest.thaw_filesystems()
2681 except libvirt.libvirtError as ex:
2682 error_code = ex.get_error_code()
2683 err_msg = encodeutils.exception_to_unicode(ex)
2684 msg = (_('Error from libvirt while quiescing %(instance_name)s: '
2685 '[Error Code %(error_code)s] %(ex)s')
2686 % {'instance_name': instance.name,
2687 'error_code': error_code, 'ex': err_msg})
2688 raise exception.InternalError(msg)
2689
2690 def quiesce(self, context, instance, image_meta):
2691 """Freeze the guest filesystems to prepare for snapshot.
2692
2693 The qemu-guest-agent must be setup to execute fsfreeze.
2694 """
2695 self._set_quiesced(context, instance, image_meta, True)
2696
2697 def unquiesce(self, context, instance, image_meta):
2698 """Thaw the guest filesystems after snapshot."""
2699 self._set_quiesced(context, instance, image_meta, False)
2700
2701 def _live_snapshot(self, context, instance, guest, disk_path, out_path,
2702 source_format, image_format, image_meta):
2703 """Snapshot an instance without downtime."""
2704 dev = guest.get_block_device(disk_path)
2705
2706 # Save a copy of the domain's persistent XML file
2707 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)
2708
2709 # Abort is an idempotent operation, so make sure any block
2710 # jobs which may have failed are ended.
2711 try:
2712 dev.abort_job()
2713 except Exception:
2714 pass
2715
2716 # NOTE (rmk): We are using shallow rebases as a workaround to a bug
2717 # in QEMU 1.3. In order to do this, we need to create
2718 # a destination image with the original backing file
2719 # and matching size of the instance root disk.
2720 src_disk_size = libvirt_utils.get_disk_size(disk_path,
2721 format=source_format)
2722 src_back_path = libvirt_utils.get_disk_backing_file(disk_path,
2723 format=source_format,
2724 basename=False)
2725 disk_delta = out_path + '.delta'
2726 libvirt_utils.create_cow_image(src_back_path, disk_delta,
2727 src_disk_size)
2728
2729 quiesced = False
2730 try:
2731 self._set_quiesced(context, instance, image_meta, True)
2732 quiesced = True
2733 except exception.NovaException as err:
2734 if self._requires_quiesce(image_meta):
2735 raise
2736 LOG.info('Skipping quiescing instance: %(reason)s.',
2737 {'reason': err}, instance=instance)
2738
2739 try:
2740 # NOTE (rmk): blockRebase cannot be executed on persistent
2741 # domains, so we need to temporarily undefine it.
2742 # If any part of this block fails, the domain is
2743 # re-defined regardless.
2744 if guest.has_persistent_configuration():
2745 hw_firmware_type = image_meta.properties.get(
2746 'hw_firmware_type')
2747 support_uefi = self._check_uefi_support(hw_firmware_type)
2748 guest.delete_configuration(support_uefi)
2749
2750 # NOTE (rmk): Establish a temporary mirror of our root disk and
2751 # issue an abort once we have a complete copy.
2752 dev.rebase(disk_delta, copy=True, reuse_ext=True, shallow=True)
2753
2754 while not dev.is_job_complete():
2755 time.sleep(0.5)
2756
2757 dev.abort_job()
2758 nova.privsep.path.chown(disk_delta, uid=os.getuid())
2759 finally:
2760 self._host.write_instance_config(xml)
2761 if quiesced:
2762 self._set_quiesced(context, instance, image_meta, False)
2763
2764 # Convert the delta (CoW) image with a backing file to a flat
2765 # image with no backing file.
2766 libvirt_utils.extract_snapshot(disk_delta, 'qcow2',
2767 out_path, image_format)
2768
2769 # Remove the disk_delta file once the snapshot extracted, so that
2770 # it doesn't hang around till the snapshot gets uploaded
2771 fileutils.delete_if_exists(disk_delta)
2772
2773 def _volume_snapshot_update_status(self, context, snapshot_id, status):
2774 """Send a snapshot status update to Cinder.
2775
2776 This method captures and logs exceptions that occur
2777 since callers cannot do anything useful with these exceptions.
2778
2779 Operations on the Cinder side waiting for this will time out if
2780 a failure occurs sending the update.
2781
2782 :param context: security context
2783 :param snapshot_id: id of snapshot being updated
2784 :param status: new status value
2785
2786 """
2787
2788 try:
2789 self._volume_api.update_snapshot_status(context,
2790 snapshot_id,
2791 status)
2792 except Exception:
2793 LOG.exception('Failed to send updated snapshot status '
2794 'to volume service.')
2795
2796 def _volume_snapshot_create(self, context, instance, guest,
2797 volume_id, new_file):
2798 """Perform volume snapshot.
2799
2800 :param guest: VM that volume is attached to
2801 :param volume_id: volume UUID to snapshot
2802 :param new_file: relative path to new qcow2 file present on share
2803
2804 """
2805 xml = guest.get_xml_desc()
2806 xml_doc = etree.fromstring(xml)
2807
2808 device_info = vconfig.LibvirtConfigGuest()
2809 device_info.parse_dom(xml_doc)
2810
2811 disks_to_snap = [] # to be snapshotted by libvirt
2812 network_disks_to_snap = [] # network disks (netfs, etc.)
2813 disks_to_skip = [] # local disks not snapshotted
2814
2815 for guest_disk in device_info.devices:
2816 if (guest_disk.root_name != 'disk'):
2817 continue
2818
2819 if (guest_disk.target_dev is None):
2820 continue
2821
2822 if (guest_disk.serial is None or guest_disk.serial != volume_id):
2823 disks_to_skip.append(guest_disk.target_dev)
2824 continue
2825
2826 # disk is a Cinder volume with the correct volume_id
2827
2828 disk_info = {
2829 'dev': guest_disk.target_dev,
2830 'serial': guest_disk.serial,
2831 'current_file': guest_disk.source_path,
2832 'source_protocol': guest_disk.source_protocol,
2833 'source_name': guest_disk.source_name,
2834 'source_hosts': guest_disk.source_hosts,
2835 'source_ports': guest_disk.source_ports
2836 }
2837
2838 # Determine path for new_file based on current path
2839 if disk_info['current_file'] is not None:
2840 current_file = disk_info['current_file']
2841 new_file_path = os.path.join(os.path.dirname(current_file),
2842 new_file)
2843 disks_to_snap.append((current_file, new_file_path))
2844 # NOTE(mriedem): This used to include a check for gluster in
2845 # addition to netfs since they were added together. Support for
2846 # gluster was removed in the 16.0.0 Pike release. It is unclear,
2847 # however, if other volume drivers rely on the netfs disk source
2848 # protocol.
2849 elif disk_info['source_protocol'] == 'netfs':
2850 network_disks_to_snap.append((disk_info, new_file))
2851
2852 if not disks_to_snap and not network_disks_to_snap:
2853 msg = _('Found no disk to snapshot.')
2854 raise exception.InternalError(msg)
2855
2856 snapshot = vconfig.LibvirtConfigGuestSnapshot()
2857
2858 for current_name, new_filename in disks_to_snap:
2859 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
2860 snap_disk.name = current_name
2861 snap_disk.source_path = new_filename
2862 snap_disk.source_type = 'file'
2863 snap_disk.snapshot = 'external'
2864 snap_disk.driver_name = 'qcow2'
2865
2866 snapshot.add_disk(snap_disk)
2867
2868 for disk_info, new_filename in network_disks_to_snap:
2869 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
2870 snap_disk.name = disk_info['dev']
2871 snap_disk.source_type = 'network'
2872 snap_disk.source_protocol = disk_info['source_protocol']
2873 snap_disk.snapshot = 'external'
2874 snap_disk.source_path = new_filename
2875 old_dir = disk_info['source_name'].split('/')[0]
2876 snap_disk.source_name = '%s/%s' % (old_dir, new_filename)
2877 snap_disk.source_hosts = disk_info['source_hosts']
2878 snap_disk.source_ports = disk_info['source_ports']
2879
2880 snapshot.add_disk(snap_disk)
2881
2882 for dev in disks_to_skip:
2883 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
2884 snap_disk.name = dev
2885 snap_disk.snapshot = 'no'
2886
2887 snapshot.add_disk(snap_disk)
2888
2889 snapshot_xml = snapshot.to_xml()
2890 LOG.debug("snap xml: %s", snapshot_xml, instance=instance)
2891
2892 image_meta = instance.image_meta
2893 try:
2894 # Check to see if we can quiesce the guest before taking the
2895 # snapshot.
2896 self._can_quiesce(instance, image_meta)
2897 try:
2898 guest.snapshot(snapshot, no_metadata=True, disk_only=True,
2899 reuse_ext=True, quiesce=True)
2900 return
2901 except libvirt.libvirtError:
2902 # If the image says that quiesce is required then we fail.
2903 if self._requires_quiesce(image_meta):
2904 raise
2905 LOG.exception('Unable to create quiesced VM snapshot, '
2906 'attempting again with quiescing disabled.',
2907 instance=instance)
2908 except (exception.InstanceQuiesceNotSupported,
2909 exception.QemuGuestAgentNotEnabled) as err:
2910 # If the image says that quiesce is required then we need to fail.
2911 if self._requires_quiesce(image_meta):
2912 raise
2913 LOG.info('Skipping quiescing instance: %(reason)s.',
2914 {'reason': err}, instance=instance)
2915
2916 try:
2917 guest.snapshot(snapshot, no_metadata=True, disk_only=True,
2918 reuse_ext=True, quiesce=False)
2919 except libvirt.libvirtError:
2920 LOG.exception('Unable to create VM snapshot, '
2921 'failing volume_snapshot operation.',
2922 instance=instance)
2923
2924 raise
2925
2926 def _volume_refresh_connection_info(self, context, instance, volume_id):
2927 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
2928 context, volume_id, instance.uuid)
2929
2930 driver_bdm = driver_block_device.convert_volume(bdm)
2931 if driver_bdm:
2932 driver_bdm.refresh_connection_info(context, instance,
2933 self._volume_api, self)
2934
2935 def volume_snapshot_create(self, context, instance, volume_id,
2936 create_info):
2937 """Create snapshots of a Cinder volume via libvirt.
2938
2939 :param instance: VM instance object reference
2940 :param volume_id: id of volume being snapshotted
2941 :param create_info: dict of information used to create snapshots
2942 - snapshot_id : ID of snapshot
2943 - type : qcow2 / <other>
2944 - new_file : qcow2 file created by Cinder which
2945 becomes the VM's active image after
2946 the snapshot is complete
2947 """
2948
2949 LOG.debug("volume_snapshot_create: create_info: %(c_info)s",
2950 {'c_info': create_info}, instance=instance)
2951
2952 try:
2953 guest = self._host.get_guest(instance)
2954 except exception.InstanceNotFound:
2955 raise exception.InstanceNotRunning(instance_id=instance.uuid)
2956
2957 if create_info['type'] != 'qcow2':
2958 msg = _('Unknown type: %s') % create_info['type']
2959 raise exception.InternalError(msg)
2960
2961 snapshot_id = create_info.get('snapshot_id', None)
2962 if snapshot_id is None:
2963 msg = _('snapshot_id required in create_info')
2964 raise exception.InternalError(msg)
2965
2966 try:
2967 self._volume_snapshot_create(context, instance, guest,
2968 volume_id, create_info['new_file'])
2969 except Exception:
2970 with excutils.save_and_reraise_exception():
2971 LOG.exception('Error occurred during volume_snapshot_create, '
2972 'sending error status to Cinder.',
2973 instance=instance)
2974 self._volume_snapshot_update_status(
2975 context, snapshot_id, 'error')
2976
2977 self._volume_snapshot_update_status(
2978 context, snapshot_id, 'creating')
2979
2980 def _wait_for_snapshot():
2981 snapshot = self._volume_api.get_snapshot(context, snapshot_id)
2982
2983 if snapshot.get('status') != 'creating':
2984 self._volume_refresh_connection_info(context, instance,
2985 volume_id)
2986 raise loopingcall.LoopingCallDone()
2987
2988 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_snapshot)
2989 timer.start(interval=0.5).wait()
2990
2991 @staticmethod
2992 def _rebase_with_qemu_img(source_path, rebase_base):
2993 """Rebase a disk using qemu-img.
2994
2995 :param source_path: the disk source path to rebase
2996 :type source_path: string
2997 :param rebase_base: the new parent in the backing chain
2998 :type rebase_base: None or string
2999 """
3000
3001 if rebase_base is None:
3002 # If backing_file is specified as "" (the empty string), then
3003 # the image is rebased onto no backing file (i.e. it will exist
3004 # independently of any backing file).
3005 backing_file = ""
3006 qemu_img_extra_arg = []
3007 else:
3008 # If the rebased image is going to have a backing file then
3009 # explicitly set the backing file format to avoid any security
3010 # concerns related to file format auto detection.
3011 backing_file = rebase_base
3012 b_file_fmt = images.qemu_img_info(backing_file).file_format
3013 qemu_img_extra_arg = ['-F', b_file_fmt]
3014
3015 qemu_img_extra_arg.append(source_path)
3016 # execute operation with disk concurrency semaphore
3017 with compute_utils.disk_ops_semaphore:
3018 processutils.execute("qemu-img", "rebase", "-b", backing_file,
3019 *qemu_img_extra_arg)
3020
3021 def _volume_snapshot_delete(self, context, instance, volume_id,
3022 snapshot_id, delete_info=None):
3023 """Note:
3024 if file being merged into == active image:
3025 do a blockRebase (pull) operation
3026 else:
3027 do a blockCommit operation
3028 Files must be adjacent in snap chain.
3029
3030 :param instance: instance object reference
3031 :param volume_id: volume UUID
3032 :param snapshot_id: snapshot UUID (unused currently)
3033 :param delete_info: {
3034 'type': 'qcow2',
3035 'file_to_merge': 'a.img',
3036 'merge_target_file': 'b.img' or None (if merging file_to_merge into
3037 active image)
3038 }
3039 """
3040
3041 LOG.debug('volume_snapshot_delete: delete_info: %s', delete_info,
3042 instance=instance)
3043
3044 if delete_info['type'] != 'qcow2':
3045 msg = _('Unknown delete_info type %s') % delete_info['type']
3046 raise exception.InternalError(msg)
3047
3048 try:
3049 guest = self._host.get_guest(instance)
3050 except exception.InstanceNotFound:
3051 raise exception.InstanceNotRunning(instance_id=instance.uuid)
3052
3053 # Find dev name
3054 xml = guest.get_xml_desc()
3055 xml_doc = etree.fromstring(xml)
3056
3057 device_info = vconfig.LibvirtConfigGuest()
3058 device_info.parse_dom(xml_doc)
3059
3060 for guest_disk in device_info.devices:
3061 if (guest_disk.root_name != 'disk'):
3062 continue
3063
3064 if (guest_disk.target_dev is None or guest_disk.serial is None):
3065 continue
3066
3067 if (
3068 guest_disk.source_path is None and
3069 guest_disk.source_protocol is None
3070 ):
3071 continue
3072
3073 if guest_disk.serial == volume_id:
3074 my_dev = guest_disk.target_dev
3075
3076 active_protocol = guest_disk.source_protocol
3077 active_disk_object = guest_disk
3078 break
3079 else:
3080 LOG.debug('Domain XML: %s', xml, instance=instance)
3081 msg = (_("Disk with id '%s' not found attached to instance.")
3082 % volume_id)
3083 raise exception.InternalError(msg)
3084
3085 LOG.debug("found device at %s", my_dev, instance=instance)
3086
3087 def _get_snap_dev(filename, backing_store):
3088 if filename is None:
3089 msg = _('filename cannot be None')
3090 raise exception.InternalError(msg)
3091
3092 # libgfapi delete
3093 LOG.debug("XML: %s", xml)
3094
3095 LOG.debug("active disk object: %s", active_disk_object)
3096
3097 # determine reference within backing store for desired image
3098 filename_to_merge = filename
3099 matched_name = None
3100 b = backing_store
3101 index = None
3102
3103 current_filename = active_disk_object.source_name.split('/')[1]
3104 if current_filename == filename_to_merge:
3105 return my_dev + '[0]'
3106
3107 while b is not None:
3108 source_filename = b.source_name.split('/')[1]
3109 if source_filename == filename_to_merge:
3110 LOG.debug('found match: %s', b.source_name)
3111 matched_name = b.source_name
3112 index = b.index
3113 break
3114
3115 b = b.backing_store
3116
3117 if matched_name is None:
3118 msg = _('no match found for %s') % (filename_to_merge)
3119 raise exception.InternalError(msg)
3120
3121 LOG.debug('index of match (%s) is %s', b.source_name, index)
3122
3123 my_snap_dev = '%s[%s]' % (my_dev, index)
3124 return my_snap_dev
3125
3126 if delete_info['merge_target_file'] is None:
3127 # pull via blockRebase()
3128
3129 # Merge the most recent snapshot into the active image
3130
3131 rebase_disk = my_dev
3132 rebase_base = delete_info['file_to_merge'] # often None
3133 if (active_protocol is not None) and (rebase_base is not None):
3134 rebase_base = _get_snap_dev(rebase_base,
3135 active_disk_object.backing_store)
3136
3137 relative = rebase_base is not None
3138 LOG.debug(
3139 'disk: %(disk)s, base: %(base)s, '
3140 'bw: %(bw)s, relative: %(relative)s',
3141 {'disk': rebase_disk,
3142 'base': rebase_base,
3143 'bw': libvirt_guest.BlockDevice.REBASE_DEFAULT_BANDWIDTH,
3144 'relative': str(relative)}, instance=instance)
3145
3146 dev = guest.get_block_device(rebase_disk)
3147 if guest.is_active():
3148 result = dev.rebase(rebase_base, relative=relative)
3149 if result == 0:
3150 LOG.debug('blockRebase started successfully',
3151 instance=instance)
3152
3153 while not dev.is_job_complete():
3154 LOG.debug('waiting for blockRebase job completion',
3155 instance=instance)
3156 time.sleep(0.5)
3157
3158 # If the guest is not running libvirt won't do a blockRebase.
3159 # In that case, let's ask qemu-img to rebase the disk.
3160 else:
3161 LOG.debug('Guest is not running so doing a block rebase '
3162 'using "qemu-img rebase"', instance=instance)
3163
3164 # It's unsure how well qemu-img handles network disks for
3165 # every protocol. So let's be safe.
3166 active_protocol = active_disk_object.source_protocol
3167 if active_protocol is not None:
3168 msg = _("Something went wrong when deleting a volume "
3169 "snapshot: rebasing a %(protocol)s network disk "
3170 "using qemu-img has not been fully tested"
3171 ) % {'protocol': active_protocol}
3172 LOG.error(msg)
3173 raise exception.InternalError(msg)
3174 self._rebase_with_qemu_img(active_disk_object.source_path,
3175 rebase_base)
3176
3177 else:
3178 # commit with blockCommit()
3179 my_snap_base = None
3180 my_snap_top = None
3181 commit_disk = my_dev
3182
3183 if active_protocol is not None:
3184 my_snap_base = _get_snap_dev(delete_info['merge_target_file'],
3185 active_disk_object.backing_store)
3186 my_snap_top = _get_snap_dev(delete_info['file_to_merge'],
3187 active_disk_object.backing_store)
3188
3189 commit_base = my_snap_base or delete_info['merge_target_file']
3190 commit_top = my_snap_top or delete_info['file_to_merge']
3191
3192 LOG.debug('will call blockCommit with commit_disk=%(commit_disk)s '
3193 'commit_base=%(commit_base)s '
3194 'commit_top=%(commit_top)s ',
3195 {'commit_disk': commit_disk,
3196 'commit_base': commit_base,
3197 'commit_top': commit_top}, instance=instance)
3198
3199 dev = guest.get_block_device(commit_disk)
3200 result = dev.commit(commit_base, commit_top, relative=True)
3201
3202 if result == 0:
3203 LOG.debug('blockCommit started successfully',
3204 instance=instance)
3205
3206 while not dev.is_job_complete():
3207 LOG.debug('waiting for blockCommit job completion',
3208 instance=instance)
3209 time.sleep(0.5)
3210
3211 def volume_snapshot_delete(self, context, instance, volume_id, snapshot_id,
3212 delete_info):
3213 try:
3214 self._volume_snapshot_delete(context, instance, volume_id,
3215 snapshot_id, delete_info=delete_info)
3216 except Exception:
3217 with excutils.save_and_reraise_exception():
3218 LOG.exception('Error occurred during volume_snapshot_delete, '
3219 'sending error status to Cinder.',
3220 instance=instance)
3221 self._volume_snapshot_update_status(
3222 context, snapshot_id, 'error_deleting')
3223
3224 self._volume_snapshot_update_status(context, snapshot_id, 'deleting')
3225 self._volume_refresh_connection_info(context, instance, volume_id)
3226
3227 def reboot(self, context, instance, network_info, reboot_type,
3228 block_device_info=None, bad_volumes_callback=None,
3229 accel_info=None):
3230 """Reboot a virtual machine, given an instance reference."""
3231 if reboot_type == 'SOFT':
3232 # NOTE(vish): This will attempt to do a graceful shutdown/restart.
3233 try:
3234 soft_reboot_success = self._soft_reboot(instance)
3235 except libvirt.libvirtError as e:
3236 LOG.debug("Instance soft reboot failed: %s",
3237 encodeutils.exception_to_unicode(e),
3238 instance=instance)
3239 soft_reboot_success = False
3240
3241 if soft_reboot_success:
3242 LOG.info("Instance soft rebooted successfully.",
3243 instance=instance)
3244 return
3245 else:
3246 LOG.warning("Failed to soft reboot instance. "
3247 "Trying hard reboot.",
3248 instance=instance)
3249 return self._hard_reboot(context, instance, network_info,
3250 block_device_info, accel_info)
3251
3252 def _soft_reboot(self, instance):
3253 """Attempt to shutdown and restart the instance gracefully.
3254
3255 We use shutdown and create here so we can return if the guest
3256 responded and actually rebooted. Note that this method only
3257 succeeds if the guest responds to acpi. Therefore we return
3258 success or failure so we can fall back to a hard reboot if
3259 necessary.
3260
3261 :returns: True if the reboot succeeded
3262 """
3263 guest = self._host.get_guest(instance)
3264
3265 state = guest.get_power_state(self._host)
3266 old_domid = guest.id
3267 # NOTE(vish): This check allows us to reboot an instance that
3268 # is already shutdown.
3269 if state == power_state.RUNNING:
3270 guest.shutdown()
3271 # NOTE(vish): This actually could take slightly longer than the
3272 # FLAG defines depending on how long the get_info
3273 # call takes to return.
3274 self._prepare_pci_devices_for_use(
3275 pci_manager.get_instance_pci_devs(instance, 'all'))
3276 for x in range(CONF.libvirt.wait_soft_reboot_seconds):
3277 guest = self._host.get_guest(instance)
3278
3279 state = guest.get_power_state(self._host)
3280 new_domid = guest.id
3281
3282 # NOTE(ivoks): By checking domain IDs, we make sure we are
3283 # not recreating domain that's already running.
3284 if old_domid != new_domid:
3285 if state in (power_state.SHUTDOWN, power_state.CRASHED):
3286 LOG.info("Instance shutdown successfully.",
3287 instance=instance)
3288 guest.launch()
3289 timer = loopingcall.FixedIntervalLoopingCall(
3290 self._wait_for_running, instance)
3291 timer.start(interval=0.5).wait()
3292 return True
3293 else:
3294 LOG.info("Instance may have been rebooted during soft "
3295 "reboot, so return now.", instance=instance)
3296 return True
3297 greenthread.sleep(1)
3298 return False
3299
3300 def _hard_reboot(self, context, instance, network_info,
3301 block_device_info=None, accel_info=None):
3302 """Reboot a virtual machine, given an instance reference.
3303
3304 Performs a Libvirt reset (if supported) on the domain.
3305
3306 If Libvirt reset is unavailable this method actually destroys and
3307 re-creates the domain to ensure the reboot happens, as the guest
3308 OS cannot ignore this action.
3309 """
3310 # NOTE(sbauza): Since we undefine the guest XML when destroying, we
3311 # need to remember the existing mdevs for reusing them.
3312 mdevs = self._get_all_assigned_mediated_devices(instance)
3313 mdevs = list(mdevs.keys())
3314 # NOTE(mdbooth): In addition to performing a hard reboot of the domain,
3315 # the hard reboot operation is relied upon by operators to be an
3316 # automated attempt to fix as many things as possible about a
3317 # non-functioning instance before resorting to manual intervention.
3318 # With this goal in mind, we tear down all the aspects of an instance
3319 # we can here without losing data. This allows us to re-initialise from
3320 # scratch, and hopefully fix, most aspects of a non-functioning guest.
3321 self.destroy(context, instance, network_info, destroy_disks=False,
3322 block_device_info=block_device_info)
3323
3324 # Convert the system metadata to image metadata
3325 # NOTE(mdbooth): This is a workaround for stateless Nova compute
3326 # https://bugs.launchpad.net/nova/+bug/1349978
3327 instance_dir = libvirt_utils.get_instance_path(instance)
3328 fileutils.ensure_tree(instance_dir)
3329
3330 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
3331 instance,
3332 instance.image_meta,
3333 block_device_info)
3334 # NOTE(vish): This could generate the wrong device_format if we are
3335 # using the raw backend and the images don't exist yet.
3336 # The create_images_and_backing below doesn't properly
3337 # regenerate raw backend images, however, so when it
3338 # does we need to (re)generate the xml after the images
3339 # are in place.
3340 xml = self._get_guest_xml(context, instance, network_info, disk_info,
3341 instance.image_meta,
3342 block_device_info=block_device_info,
3343 mdevs=mdevs, accel_info=accel_info)
3344
3345 # NOTE(mdbooth): context.auth_token will not be set when we call
3346 # _hard_reboot from resume_state_on_host_boot()
3347 if context.auth_token is not None:
3348 # NOTE (rmk): Re-populate any missing backing files.
3349 config = vconfig.LibvirtConfigGuest()
3350 config.parse_str(xml)
3351 backing_disk_info = self._get_instance_disk_info_from_config(
3352 config, block_device_info)
3353 self._create_images_and_backing(context, instance, instance_dir,
3354 backing_disk_info)
3355
3356 # Initialize all the necessary networking, block devices and
3357 # start the instance.
3358 # NOTE(melwitt): Pass vifs_already_plugged=True here even though we've
3359 # unplugged vifs earlier. The behavior of neutron plug events depends
3360 # on which vif type we're using and we are working with a stale network
3361 # info cache here, so won't rely on waiting for neutron plug events.
3362 # vifs_already_plugged=True means "do not wait for neutron plug events"
3363 # NOTE(efried): The instance should already have a vtpm_secret_uuid
3364 # registered if appropriate.
3365 self._create_guest_with_network(
3366 context, xml, instance, network_info, block_device_info,
3367 vifs_already_plugged=True)
3368 self._prepare_pci_devices_for_use(
3369 pci_manager.get_instance_pci_devs(instance, 'all'))
3370
3371 def _wait_for_reboot():
3372 """Called at an interval until the VM is running again."""
3373 state = self.get_info(instance).state
3374
3375 if state == power_state.RUNNING:
3376 LOG.info("Instance rebooted successfully.",
3377 instance=instance)
3378 raise loopingcall.LoopingCallDone()
3379
3380 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot)
3381 timer.start(interval=0.5).wait()
3382
3383 def pause(self, instance):
3384 """Pause VM instance."""
3385 self._host.get_guest(instance).pause()
3386
3387 def unpause(self, instance):
3388 """Unpause paused VM instance."""
3389 guest = self._host.get_guest(instance)
3390 guest.resume()
3391 guest.sync_guest_time()
3392
3393 def _clean_shutdown(self, instance, timeout, retry_interval):
3394 """Attempt to shutdown the instance gracefully.
3395
3396 :param instance: The instance to be shutdown
3397 :param timeout: How long to wait in seconds for the instance to
3398 shutdown
3399 :param retry_interval: How often in seconds to signal the instance
3400 to shutdown while waiting
3401
3402 :returns: True if the shutdown succeeded
3403 """
3404
3405 # List of states that represent a shutdown instance
3406 SHUTDOWN_STATES = [power_state.SHUTDOWN,
3407 power_state.CRASHED]
3408
3409 try:
3410 guest = self._host.get_guest(instance)
3411 except exception.InstanceNotFound:
3412 # If the instance has gone then we don't need to
3413 # wait for it to shutdown
3414 return True
3415
3416 state = guest.get_power_state(self._host)
3417 if state in SHUTDOWN_STATES:
3418 LOG.info("Instance already shutdown.", instance=instance)
3419 return True
3420
3421 LOG.debug("Shutting down instance from state %s", state,
3422 instance=instance)
3423 guest.shutdown()
3424 retry_countdown = retry_interval
3425
3426 for sec in range(timeout):
3427
3428 guest = self._host.get_guest(instance)
3429 state = guest.get_power_state(self._host)
3430
3431 if state in SHUTDOWN_STATES:
3432 LOG.info("Instance shutdown successfully after %d seconds.",
3433 sec, instance=instance)
3434 return True
3435
3436 # Note(PhilD): We can't assume that the Guest was able to process
3437 # any previous shutdown signal (for example it may
3438 # have still been startingup, so within the overall
3439 # timeout we re-trigger the shutdown every
3440 # retry_interval
3441 if retry_countdown == 0:
3442 retry_countdown = retry_interval
3443 # Instance could shutdown at any time, in which case we
3444 # will get an exception when we call shutdown
3445 try:
3446 LOG.debug("Instance in state %s after %d seconds - "
3447 "resending shutdown", state, sec,
3448 instance=instance)
3449 guest.shutdown()
3450 except libvirt.libvirtError:
3451 # Assume this is because its now shutdown, so loop
3452 # one more time to clean up.
3453 LOG.debug("Ignoring libvirt exception from shutdown "
3454 "request.", instance=instance)
3455 continue
3456 else:
3457 retry_countdown -= 1
3458
3459 time.sleep(1)
3460
3461 LOG.info("Instance failed to shutdown in %d seconds.",
3462 timeout, instance=instance)
3463 return False
3464
3465 def power_off(self, instance, timeout=0, retry_interval=0):
3466 """Power off the specified instance."""
3467 if timeout:
3468 self._clean_shutdown(instance, timeout, retry_interval)
3469 self._destroy(instance)
3470
3471 def power_on(self, context, instance, network_info,
3472 block_device_info=None, accel_info=None):
3473 """Power on the specified instance."""
3474 # We use _hard_reboot here to ensure that all backing files,
3475 # network, and block device connections, etc. are established
3476 # and available before we attempt to start the instance.
3477 self._hard_reboot(context, instance, network_info, block_device_info,
3478 accel_info)
3479
3480 def trigger_crash_dump(self, instance):
3481
3482 """Trigger crash dump by injecting an NMI to the specified instance."""
3483 try:
3484 self._host.get_guest(instance).inject_nmi()
3485 except libvirt.libvirtError as ex:
3486 error_code = ex.get_error_code()
3487
3488 if error_code == libvirt.VIR_ERR_NO_SUPPORT:
3489 raise exception.TriggerCrashDumpNotSupported()
3490 elif error_code == libvirt.VIR_ERR_OPERATION_INVALID:
3491 raise exception.InstanceNotRunning(instance_id=instance.uuid)
3492
3493 LOG.exception(
3494 'Error from libvirt while injecting an NMI to '
3495 '%(instance_uuid)s: [Error Code %(error_code)s] %(ex)s',
3496 {'instance_uuid': instance.uuid,
3497 'error_code': error_code, 'ex': ex})
3498 raise
3499
3500 def suspend(self, context, instance):
3501 """Suspend the specified instance."""
3502 guest = self._host.get_guest(instance)
3503
3504 self._detach_pci_devices(guest,
3505 pci_manager.get_instance_pci_devs(instance))
3506 self._detach_direct_passthrough_ports(context, instance, guest)
3507 self._detach_mediated_devices(guest)
3508 guest.save_memory_state()
3509
3510 def resume(self, context, instance, network_info, block_device_info=None):
3511 """resume the specified instance."""
3512 xml = self._get_existing_domain_xml(instance, network_info,
3513 block_device_info)
3514 # NOTE(efried): The instance should already have a vtpm_secret_uuid
3515 # registered if appropriate.
3516 guest = self._create_guest_with_network(
3517 context, xml, instance, network_info, block_device_info,
3518 vifs_already_plugged=True)
3519 self._attach_pci_devices(guest,
3520 pci_manager.get_instance_pci_devs(instance))
3521 self._attach_direct_passthrough_ports(
3522 context, instance, guest, network_info)
3523 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,
3524 instance)
3525 timer.start(interval=0.5).wait()
3526 guest.sync_guest_time()
3527
3528 def resume_state_on_host_boot(self, context, instance, network_info,
3529 block_device_info=None):
3530 """resume guest state when a host is booted."""
3531 # Check if the instance is running already and avoid doing
3532 # anything if it is.
3533 try:
3534 guest = self._host.get_guest(instance)
3535 state = guest.get_power_state(self._host)
3536
3537 ignored_states = (power_state.RUNNING,
3538 power_state.SUSPENDED,
3539 power_state.NOSTATE,
3540 power_state.PAUSED)
3541
3542 if state in ignored_states:
3543 return
3544 except (exception.InternalError, exception.InstanceNotFound):
3545 pass
3546
3547 # Instance is not up and could be in an unknown state.
3548 # Be as absolute as possible about getting it back into
3549 # a known and running state.
3550 self._hard_reboot(context, instance, network_info, block_device_info)
3551
3552 def rescue(self, context, instance, network_info, image_meta,
3553 rescue_password, block_device_info):
3554 """Loads a VM using rescue images.
3555
3556 A rescue is normally performed when something goes wrong with the
3557 primary images and data needs to be corrected/recovered. Rescuing
3558 should not edit or over-ride the original image, only allow for
3559 data recovery.
3560
3561 Two modes are provided when rescuing an instance with this driver.
3562
3563 The original and default rescue mode, where the rescue boot disk,
3564 original root disk and optional regenerated config drive are attached
3565 to the instance.
3566
3567 A second stable device rescue mode is also provided where all of the
3568 original devices are attached to the instance during the rescue attempt
3569 with the addition of the rescue boot disk. This second mode is
3570 controlled by the hw_rescue_device and hw_rescue_bus image properties
3571 on the rescue image provided to this method via image_meta.
3572
3573 :param nova.context.RequestContext context:
3574 The context for the rescue.
3575 :param nova.objects.instance.Instance instance:
3576 The instance being rescued.
3577 :param nova.network.model.NetworkInfo network_info:
3578 Necessary network information for the resume.
3579 :param nova.objects.ImageMeta image_meta:
3580 The metadata of the image of the instance.
3581 :param rescue_password: new root password to set for rescue.
3582 :param dict block_device_info:
3583 The block device mapping of the instance.
3584 """
3585 instance_dir = libvirt_utils.get_instance_path(instance)
3586 unrescue_xml = self._get_existing_domain_xml(instance, network_info)
3587 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')
3588 with open(unrescue_xml_path, 'w') as f:
3589 f.write(unrescue_xml)
3590
3591 rescue_image_id = None
3592 rescue_image_meta = None
3593 if image_meta.obj_attr_is_set("id"):
3594 rescue_image_id = image_meta.id
3595
3596 rescue_images = {
3597 'image_id': (rescue_image_id or
3598 CONF.libvirt.rescue_image_id or instance.image_ref),
3599 'kernel_id': (CONF.libvirt.rescue_kernel_id or
3600 instance.kernel_id),
3601 'ramdisk_id': (CONF.libvirt.rescue_ramdisk_id or
3602 instance.ramdisk_id),
3603 }
3604
3605 virt_type = CONF.libvirt.virt_type
3606 if hardware.check_hw_rescue_props(image_meta):
3607 LOG.info("Attempting a stable device rescue", instance=instance)
3608 # NOTE(lyarwood): Stable device rescue is not supported when using
3609 # the LXC and Xen virt_types as they do not support the required
3610 # <boot order=''> definitions allowing an instance to boot from the
3611 # rescue device added as a final device to the domain.
3612 if virt_type in ('lxc', 'xen'):
3613 reason = ("Stable device rescue is not supported by virt_type "
3614 "%s", virt_type)
3615 raise exception.InstanceNotRescuable(instance_id=instance.uuid,
3616 reason=reason)
3617 # NOTE(lyarwood): Stable device rescue provides the original disk
3618 # mapping of the instance with the rescue device appened to the
3619 # end. As a result we need to provide the original image_meta, the
3620 # new rescue_image_meta and block_device_info when calling
3621 # get_disk_info.
3622 rescue_image_meta = image_meta
3623 if instance.image_ref:
3624 image_meta = objects.ImageMeta.from_image_ref(
3625 context, self._image_api, instance.image_ref)
3626 else:
3627 # NOTE(lyarwood): If instance.image_ref isn't set attempt to
3628 # lookup the original image_meta from the bdms. This will
3629 # return an empty dict if no valid image_meta is found.
3630 image_meta_dict = block_device.get_bdm_image_metadata(
3631 context, self._image_api, self._volume_api,
3632 block_device_info['block_device_mapping'],
3633 legacy_bdm=False)
3634 image_meta = objects.ImageMeta.from_dict(image_meta_dict)
3635
3636 else:
3637 LOG.info("Attempting rescue", instance=instance)
3638 # NOTE(lyarwood): A legacy rescue only provides the rescue device
3639 # and the original root device so we don't need to provide
3640 # block_device_info to the get_disk_info call.
3641 block_device_info = None
3642
3643 disk_info = blockinfo.get_disk_info(virt_type, instance, image_meta,
3644 rescue=True, block_device_info=block_device_info,
3645 rescue_image_meta=rescue_image_meta)
3646 LOG.debug("rescue generated disk_info: %s", disk_info)
3647
3648 injection_info = InjectionInfo(network_info=network_info,
3649 admin_pass=rescue_password,
3650 files=None)
3651 gen_confdrive = functools.partial(self._create_configdrive,
3652 context, instance, injection_info,
3653 rescue=True)
3654 # NOTE(sbauza): Since rescue recreates the guest XML, we need to
3655 # remember the existing mdevs for reusing them.
3656 mdevs = self._get_all_assigned_mediated_devices(instance)
3657 mdevs = list(mdevs.keys())
3658 self._create_image(context, instance, disk_info['mapping'],
3659 injection_info=injection_info, suffix='.rescue',
3660 disk_images=rescue_images)
3661 # NOTE(efried): The instance should already have a vtpm_secret_uuid
3662 # registered if appropriate.
3663 xml = self._get_guest_xml(context, instance, network_info, disk_info,
3664 image_meta, rescue=rescue_images,
3665 mdevs=mdevs,
3666 block_device_info=block_device_info)
3667 self._destroy(instance)
3668 self._create_guest(
3669 context, xml, instance, post_xml_callback=gen_confdrive,
3670 )
3671
3672 def unrescue(
3673 self,
3674 context: nova_context.RequestContext,
3675 instance: 'objects.Instance',
3676 ):
3677 """Reboot the VM which is being rescued back into primary images."""
3678 instance_dir = libvirt_utils.get_instance_path(instance)
3679 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')
3680 # The xml should already contain the secret_uuid if relevant.
3681 xml = libvirt_utils.load_file(unrescue_xml_path)
3682
3683 self._destroy(instance)
3684 self._create_guest(context, xml, instance)
3685 os.unlink(unrescue_xml_path)
3686 rescue_files = os.path.join(instance_dir, "*.rescue")
3687 for rescue_file in glob.iglob(rescue_files):
3688 if os.path.isdir(rescue_file):
3689 shutil.rmtree(rescue_file)
3690 else:
3691 os.unlink(rescue_file)
3692 # cleanup rescue volume
3693 lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
3694 if lvmdisk.endswith('.rescue')])
3695 if CONF.libvirt.images_type == 'rbd':
3696 filter_fn = lambda disk: (disk.startswith(instance.uuid) and
3697 disk.endswith('.rescue'))
3698 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
3699
3700 def poll_rebooting_instances(self, timeout, instances):
3701 pass
3702
3703 def spawn(self, context, instance, image_meta, injected_files,
3704 admin_password, allocations, network_info=None,
3705 block_device_info=None, power_on=True, accel_info=None):
3706 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
3707 instance,
3708 image_meta,
3709 block_device_info)
3710 injection_info = InjectionInfo(network_info=network_info,
3711 files=injected_files,
3712 admin_pass=admin_password)
3713 gen_confdrive = functools.partial(self._create_configdrive,
3714 context, instance,
3715 injection_info)
3716 created_instance_dir, created_disks = self._create_image(
3717 context, instance, disk_info['mapping'],
3718 injection_info=injection_info,
3719 block_device_info=block_device_info)
3720
3721 # Required by Quobyte CI
3722 self._ensure_console_log_for_instance(instance)
3723
3724 # Does the guest need to be assigned some vGPU mediated devices ?
3725 mdevs = self._allocate_mdevs(allocations)
3726
3727 # If the guest needs a vTPM, _get_guest_xml needs its secret to exist
3728 # and its uuid to be registered in the instance prior to _get_guest_xml
3729 if CONF.libvirt.swtpm_enabled and hardware.get_vtpm_constraint(
3730 instance.flavor, image_meta
3731 ):
3732 if not instance.system_metadata.get('vtpm_secret_uuid'):
3733 # Create the secret via the key manager service so that we have
3734 # it to hand when generating the XML. This is slightly wasteful
3735 # as we'll perform a redundant key manager API call later when
3736 # we create the domain but the alternative is an ugly mess
3737 crypto.ensure_vtpm_secret(context, instance)
3738
3739 xml = self._get_guest_xml(context, instance, network_info,
3740 disk_info, image_meta,
3741 block_device_info=block_device_info,
3742 mdevs=mdevs, accel_info=accel_info)
3743 self._create_guest_with_network(
3744 context, xml, instance, network_info, block_device_info,
3745 post_xml_callback=gen_confdrive,
3746 power_on=power_on,
3747 cleanup_instance_dir=created_instance_dir,
3748 cleanup_instance_disks=created_disks)
3749 LOG.debug("Guest created on hypervisor", instance=instance)
3750
3751 def _wait_for_boot():
3752 """Called at an interval until the VM is running."""
3753 state = self.get_info(instance).state
3754
3755 if state == power_state.RUNNING:
3756 LOG.info("Instance spawned successfully.", instance=instance)
3757 raise loopingcall.LoopingCallDone()
3758
3759 if power_on:
3760 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
3761 timer.start(interval=0.5).wait()
3762 else:
3763 LOG.info("Instance spawned successfully.", instance=instance)
3764
3765 def _get_console_output_file(self, instance, console_log):
3766 bytes_to_read = MAX_CONSOLE_BYTES
3767 log_data = b"" # The last N read bytes
3768 i = 0 # in case there is a log rotation (like "virtlogd")
3769 path = console_log
3770
3771 while bytes_to_read > 0 and os.path.exists(path):
3772 read_log_data, remaining = nova.privsep.path.last_bytes(
3773 path, bytes_to_read)
3774 # We need the log file content in chronological order,
3775 # that's why we *prepend* the log data.
3776 log_data = read_log_data + log_data
3777
3778 # Prep to read the next file in the chain
3779 bytes_to_read -= len(read_log_data)
3780 path = console_log + "." + str(i)
3781 i += 1
3782
3783 if remaining > 0:
3784 LOG.info('Truncated console log returned, '
3785 '%d bytes ignored', remaining, instance=instance)
3786 return log_data
3787
3788 def get_console_output(self, context, instance):
3789 guest = self._host.get_guest(instance)
3790
3791 xml = guest.get_xml_desc()
3792 tree = etree.fromstring(xml)
3793
3794 # check for different types of consoles
3795 path_sources = [
3796 ('file', "./devices/console[@type='file']/source[@path]", 'path'),
3797 ('tcp', "./devices/console[@type='tcp']/log[@file]", 'file'),
3798 ('pty', "./devices/console[@type='pty']/source[@path]", 'path')]
3799 console_type = ""
3800 console_path = ""
3801 for c_type, epath, attrib in path_sources:
3802 node = tree.find(epath)
3803 if (node is not None) and node.get(attrib):
3804 console_type = c_type
3805 console_path = node.get(attrib)
3806 break
3807
3808 # instance has no console at all
3809 if not console_path:
3810 raise exception.ConsoleNotAvailable()
3811
3812 # instance has a console, but file doesn't exist (yet?)
3813 if not os.path.exists(console_path):
3814 LOG.info('console logfile for instance does not exist',
3815 instance=instance)
3816 return ""
3817
3818 # pty consoles need special handling
3819 if console_type == 'pty':
3820 console_log = self._get_console_log_path(instance)
3821 data = nova.privsep.libvirt.readpty(console_path)
3822
3823 # NOTE(markus_z): The virt_types kvm and qemu are the only ones
3824 # which create a dedicated file device for the console logging.
3825 # Other virt_types like xen, lxc, uml, parallels depend on the
3826 # flush of that pty device into the "console.log" file to ensure
3827 # that a series of "get_console_output" calls return the complete
3828 # content even after rebooting a guest.
3829 nova.privsep.path.writefile(console_log, 'a+', data)
3830
3831 # set console path to logfile, not to pty device
3832 console_path = console_log
3833
3834 # return logfile content
3835 return self._get_console_output_file(instance, console_path)
3836
3837 def get_host_ip_addr(self):
3838 return CONF.my_ip
3839
3840 def get_vnc_console(self, context, instance):
3841 def get_vnc_port_for_instance(instance_name):
3842 guest = self._host.get_guest(instance)
3843
3844 xml = guest.get_xml_desc()
3845 xml_dom = etree.fromstring(xml)
3846
3847 graphic = xml_dom.find("./devices/graphics[@type='vnc']")
3848 if graphic is not None:
3849 return graphic.get('port')
3850 # NOTE(rmk): We had VNC consoles enabled but the instance in
3851 # question is not actually listening for connections.
3852 raise exception.ConsoleTypeUnavailable(console_type='vnc')
3853
3854 port = get_vnc_port_for_instance(instance.name)
3855 host = CONF.vnc.server_proxyclient_address
3856
3857 return ctype.ConsoleVNC(host=host, port=port)
3858
3859 def get_spice_console(self, context, instance):
3860 def get_spice_ports_for_instance(instance_name):
3861 guest = self._host.get_guest(instance)
3862
3863 xml = guest.get_xml_desc()
3864 xml_dom = etree.fromstring(xml)
3865
3866 graphic = xml_dom.find("./devices/graphics[@type='spice']")
3867 if graphic is not None:
3868 return (graphic.get('port'), graphic.get('tlsPort'))
3869 # NOTE(rmk): We had Spice consoles enabled but the instance in
3870 # question is not actually listening for connections.
3871 raise exception.ConsoleTypeUnavailable(console_type='spice')
3872
3873 ports = get_spice_ports_for_instance(instance.name)
3874 host = CONF.spice.server_proxyclient_address
3875
3876 return ctype.ConsoleSpice(host=host, port=ports[0], tlsPort=ports[1])
3877
3878 def get_serial_console(self, context, instance):
3879 guest = self._host.get_guest(instance)
3880 for hostname, port in self._get_serial_ports_from_guest(
3881 guest, mode='bind'):
3882 return ctype.ConsoleSerial(host=hostname, port=port)
3883 raise exception.ConsoleTypeUnavailable(console_type='serial')
3884
3885 @staticmethod
3886 def _create_ephemeral(target, ephemeral_size,
3887 fs_label, os_type, is_block_dev=False,
3888 context=None, specified_fs=None,
3889 vm_mode=None):
3890 if not is_block_dev:
3891 if (CONF.libvirt.virt_type == "parallels" and
3892 vm_mode == fields.VMMode.EXE):
3893
3894 libvirt_utils.create_ploop_image('expanded', target,
3895 '%dG' % ephemeral_size,
3896 specified_fs)
3897 return
3898 libvirt_utils.create_image('raw', target, '%dG' % ephemeral_size)
3899
3900 # Run as root only for block devices.
3901 disk_api.mkfs(os_type, fs_label, target, run_as_root=is_block_dev,
3902 specified_fs=specified_fs)
3903
3904 @staticmethod
3905 def _create_swap(target, swap_mb, context=None):
3906 """Create a swap file of specified size."""
3907 libvirt_utils.create_image('raw', target, '%dM' % swap_mb)
3908 nova.privsep.fs.unprivileged_mkfs('swap', target)
3909
3910 @staticmethod
3911 def _get_console_log_path(instance):
3912 return os.path.join(libvirt_utils.get_instance_path(instance),
3913 'console.log')
3914
3915 def _ensure_console_log_for_instance(self, instance):
3916 # NOTE(mdbooth): Although libvirt will create this file for us
3917 # automatically when it starts, it will initially create it with
3918 # root ownership and then chown it depending on the configuration of
3919 # the domain it is launching. Quobyte CI explicitly disables the
3920 # chown by setting dynamic_ownership=0 in libvirt's config.
3921 # Consequently when the domain starts it is unable to write to its
3922 # console.log. See bug https://bugs.launchpad.net/nova/+bug/1597644
3923 #
3924 # To work around this, we create the file manually before starting
3925 # the domain so it has the same ownership as Nova. This works
3926 # for Quobyte CI because it is also configured to run qemu as the same
3927 # user as the Nova service. Installations which don't set
3928 # dynamic_ownership=0 are not affected because libvirt will always
3929 # correctly configure permissions regardless of initial ownership.
3930 #
3931 # Setting dynamic_ownership=0 is dubious and potentially broken in
3932 # more ways than console.log (see comment #22 on the above bug), so
3933 # Future Maintainer who finds this code problematic should check to see
3934 # if we still support it.
3935 console_file = self._get_console_log_path(instance)
3936 LOG.debug('Ensure instance console log exists: %s', console_file,
3937 instance=instance)
3938 try:
3939 libvirt_utils.file_open(console_file, 'a').close()
3940 # NOTE(sfinucan): We can safely ignore permission issues here and
3941 # assume that it is libvirt that has taken ownership of this file.
3942 except IOError as ex:
3943 if ex.errno != errno.EACCES:
3944 raise
3945 LOG.debug('Console file already exists: %s.', console_file)
3946
3947 @staticmethod
3948 def _get_disk_config_image_type():
3949 # TODO(mikal): there is a bug here if images_type has
3950 # changed since creation of the instance, but I am pretty
3951 # sure that this bug already exists.
3952 return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
3953
3954 @staticmethod
3955 def _is_booted_from_volume(block_device_info):
3956 """Determines whether the VM is booting from volume
3957
3958 Determines whether the block device info indicates that the VM
3959 is booting from a volume.
3960 """
3961 block_device_mapping = driver.block_device_info_get_mapping(
3962 block_device_info)
3963 return bool(block_device.get_root_bdm(block_device_mapping))
3964
3965 def _inject_data(self, disk, instance, injection_info):
3966 """Injects data in a disk image
3967
3968 Helper used for injecting data in a disk image file system.
3969
3970 :param disk: The disk we're injecting into (an Image object)
3971 :param instance: The instance we're injecting into
3972 :param injection_info: Injection info
3973 """
3974 # Handles the partition need to be used.
3975 LOG.debug('Checking root disk injection %s',
3976 str(injection_info), instance=instance)
3977 target_partition = None
3978 if not instance.kernel_id:
3979 target_partition = CONF.libvirt.inject_partition
3980 if target_partition == 0:
3981 target_partition = None
3982 if CONF.libvirt.virt_type == 'lxc':
3983 target_partition = None
3984
3985 # Handles the key injection.
3986 key = None
3987 if CONF.libvirt.inject_key and instance.get('key_data'):
3988 key = str(instance.key_data)
3989
3990 # Handles the admin password injection.
3991 admin_pass = None
3992 if CONF.libvirt.inject_password:
3993 admin_pass = injection_info.admin_pass
3994
3995 # Handles the network injection.
3996 net = netutils.get_injected_network_template(
3997 injection_info.network_info,
3998 libvirt_virt_type=CONF.libvirt.virt_type)
3999
4000 # Handles the metadata injection
4001 metadata = instance.get('metadata')
4002
4003 if any((key, net, metadata, admin_pass, injection_info.files)):
4004 LOG.debug('Injecting %s', str(injection_info),
4005 instance=instance)
4006 img_id = instance.image_ref
4007 try:
4008 disk_api.inject_data(disk.get_model(self._conn),
4009 key, net, metadata, admin_pass,
4010 injection_info.files,
4011 partition=target_partition,
4012 mandatory=('files',))
4013 except Exception as e:
4014 with excutils.save_and_reraise_exception():
4015 LOG.error('Error injecting data into image '
4016 '%(img_id)s (%(e)s)',
4017 {'img_id': img_id, 'e': e},
4018 instance=instance)
4019
4020 # NOTE(sileht): many callers of this method assume that this
4021 # method doesn't fail if an image already exists but instead
4022 # think that it will be reused (ie: (live)-migration/resize)
4023 def _create_image(self, context, instance,
4024 disk_mapping, injection_info=None, suffix='',
4025 disk_images=None, block_device_info=None,
4026 fallback_from_host=None,
4027 ignore_bdi_for_swap=False):
4028 booted_from_volume = self._is_booted_from_volume(block_device_info)
4029
4030 def image(fname, image_type=CONF.libvirt.images_type):
4031 return self.image_backend.by_name(instance,
4032 fname + suffix, image_type)
4033
4034 def raw(fname):
4035 return image(fname, image_type='raw')
4036
4037 created_instance_dir = True
4038
4039 # ensure directories exist and are writable
4040 instance_dir = libvirt_utils.get_instance_path(instance)
4041 if os.path.exists(instance_dir):
4042 LOG.debug("Instance directory exists: not creating",
4043 instance=instance)
4044 created_instance_dir = False
4045 else:
4046 LOG.debug("Creating instance directory", instance=instance)
4047 fileutils.ensure_tree(libvirt_utils.get_instance_path(instance))
4048
4049 LOG.info('Creating image', instance=instance)
4050
4051 inst_type = instance.get_flavor()
4052 swap_mb = 0
4053 if 'disk.swap' in disk_mapping:
4054 mapping = disk_mapping['disk.swap']
4055
4056 if ignore_bdi_for_swap:
4057 # This is a workaround to support legacy swap resizing,
4058 # which does not touch swap size specified in bdm,
4059 # but works with flavor specified size only.
4060 # In this case we follow the legacy logic and ignore block
4061 # device info completely.
4062 # NOTE(ft): This workaround must be removed when a correct
4063 # implementation of resize operation changing sizes in bdms is
4064 # developed. Also at that stage we probably may get rid of
4065 # the direct usage of flavor swap size here,
4066 # leaving the work with bdm only.
4067 swap_mb = inst_type['swap']
4068 else:
4069 swap = driver.block_device_info_get_swap(block_device_info)
4070 if driver.swap_is_usable(swap):
4071 swap_mb = swap['swap_size']
4072 elif (inst_type['swap'] > 0 and
4073 not block_device.volume_in_mapping(
4074 mapping['dev'], block_device_info)):
4075 swap_mb = inst_type['swap']
4076
4077 if swap_mb > 0:
4078 if (CONF.libvirt.virt_type == "parallels" and
4079 instance.vm_mode == fields.VMMode.EXE):
4080 msg = _("Swap disk is not supported "
4081 "for Virtuozzo container")
4082 raise exception.Invalid(msg)
4083
4084 if not disk_images:
4085 disk_images = {'image_id': instance.image_ref,
4086 'kernel_id': instance.kernel_id,
4087 'ramdisk_id': instance.ramdisk_id}
4088
4089 # NOTE(mdbooth): kernel and ramdisk, if they are defined, are hardcoded
4090 # to use raw, which means they will always be cleaned up with the
4091 # instance directory. We must not consider them for created_disks,
4092 # which may not be using the instance directory.
4093 if disk_images['kernel_id']:
4094 fname = imagecache.get_cache_fname(disk_images['kernel_id'])
4095 raw('kernel').cache(fetch_func=libvirt_utils.fetch_raw_image,
4096 context=context,
4097 filename=fname,
4098 image_id=disk_images['kernel_id'])
4099 if disk_images['ramdisk_id']:
4100 fname = imagecache.get_cache_fname(disk_images['ramdisk_id'])
4101 raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_raw_image,
4102 context=context,
4103 filename=fname,
4104 image_id=disk_images['ramdisk_id'])
4105
4106 if CONF.libvirt.virt_type == 'uml':
4107 # PONDERING(mikal): can I assume that root is UID zero in every
4108 # OS? Probably not.
4109 uid = pwd.getpwnam('root').pw_uid
4110 nova.privsep.path.chown(image('disk').path, uid=uid)
4111
4112 created_disks = self._create_and_inject_local_root(
4113 context, instance, booted_from_volume, suffix, disk_images,
4114 injection_info, fallback_from_host)
4115
4116 # Lookup the filesystem type if required
4117 os_type_with_default = nova.privsep.fs.get_fs_type_for_os_type(
4118 instance.os_type)
4119 # Generate a file extension based on the file system
4120 # type and the mkfs commands configured if any
4121 file_extension = nova.privsep.fs.get_file_extension_for_os_type(
4122 os_type_with_default, CONF.default_ephemeral_format)
4123
4124 vm_mode = fields.VMMode.get_from_instance(instance)
4125 ephemeral_gb = instance.flavor.ephemeral_gb
4126 if 'disk.local' in disk_mapping:
4127 disk_image = image('disk.local')
4128 # Short circuit the exists() tests if we already created a disk
4129 created_disks = created_disks or not disk_image.exists()
4130
4131 fn = functools.partial(self._create_ephemeral,
4132 fs_label='ephemeral0',
4133 os_type=instance.os_type,
4134 is_block_dev=disk_image.is_block_dev,
4135 vm_mode=vm_mode)
4136 fname = "ephemeral_%s_%s" % (ephemeral_gb, file_extension)
4137 size = ephemeral_gb * units.Gi
4138 disk_image.cache(fetch_func=fn,
4139 context=context,
4140 filename=fname,
4141 size=size,
4142 ephemeral_size=ephemeral_gb)
4143
4144 for idx, eph in enumerate(driver.block_device_info_get_ephemerals(
4145 block_device_info)):
4146 disk_image = image(blockinfo.get_eph_disk(idx))
4147 # Short circuit the exists() tests if we already created a disk
4148 created_disks = created_disks or not disk_image.exists()
4149
4150 specified_fs = eph.get('guest_format')
4151 if specified_fs and not self.is_supported_fs_format(specified_fs):
4152 msg = _("%s format is not supported") % specified_fs
4153 raise exception.InvalidBDMFormat(details=msg)
4154
4155 fn = functools.partial(self._create_ephemeral,
4156 fs_label='ephemeral%d' % idx,
4157 os_type=instance.os_type,
4158 is_block_dev=disk_image.is_block_dev,
4159 vm_mode=vm_mode)
4160 size = eph['size'] * units.Gi
4161 fname = "ephemeral_%s_%s" % (eph['size'], file_extension)
4162 disk_image.cache(fetch_func=fn,
4163 context=context,
4164 filename=fname,
4165 size=size,
4166 ephemeral_size=eph['size'],
4167 specified_fs=specified_fs)
4168
4169 if swap_mb > 0:
4170 size = swap_mb * units.Mi
4171 swap = image('disk.swap')
4172 # Short circuit the exists() tests if we already created a disk
4173 created_disks = created_disks or not swap.exists()
4174 swap.cache(fetch_func=self._create_swap, context=context,
4175 filename="swap_%s" % swap_mb,
4176 size=size, swap_mb=swap_mb)
4177
4178 if created_disks:
4179 LOG.debug('Created local disks', instance=instance)
4180 else:
4181 LOG.debug('Did not create local disks', instance=instance)
4182
4183 return (created_instance_dir, created_disks)
4184
4185 def _create_and_inject_local_root(self, context, instance,
4186 booted_from_volume, suffix, disk_images,
4187 injection_info, fallback_from_host):
4188 created_disks = False
4189
4190 # File injection only if needed
4191 need_inject = (not configdrive.required_by(instance) and
4192 injection_info is not None and
4193 CONF.libvirt.inject_partition != -2)
4194
4195 # NOTE(ndipanov): Even if disk_mapping was passed in, which
4196 # currently happens only on rescue - we still don't want to
4197 # create a base image.
4198 if not booted_from_volume:
4199 root_fname = imagecache.get_cache_fname(disk_images['image_id'])
4200 size = instance.flavor.root_gb * units.Gi
4201
4202 if size == 0 or suffix == '.rescue':
4203 size = None
4204
4205 backend = self.image_backend.by_name(instance, 'disk' + suffix,
4206 CONF.libvirt.images_type)
4207 created_disks = not backend.exists()
4208
4209 if instance.task_state == task_states.RESIZE_FINISH:
4210 backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
4211 if backend.SUPPORTS_CLONE:
4212 def clone_fallback_to_fetch(
4213 context, target, image_id, trusted_certs=None,
4214 ):
4215 refuse_fetch = (
4216 CONF.libvirt.images_type == 'rbd' and
4217 CONF.workarounds.never_download_image_if_on_rbd)
4218 try:
4219 backend.clone(context, disk_images['image_id'])
4220 except exception.ImageUnacceptable:
4221 if refuse_fetch:
4222 # Re-raise the exception from the failed
4223 # ceph clone. The compute manager expects
4224 # ImageUnacceptable as a possible result
4225 # of spawn(), from which this is called.
4226 with excutils.save_and_reraise_exception():
4227 LOG.warning(
4228 'Image %s is not on my ceph and '
4229 '[workarounds]/'
4230 'never_download_image_if_on_rbd=True;'
4231 ' refusing to fetch and upload.',
4232 disk_images['image_id'])
4233 libvirt_utils.fetch_image(
4234 context, target, image_id, trusted_certs,
4235 )
4236 fetch_func = clone_fallback_to_fetch
4237 else:
4238 fetch_func = libvirt_utils.fetch_image
4239
4240 self._try_fetch_image_cache(backend, fetch_func, context,
4241 root_fname, disk_images['image_id'],
4242 instance, size, fallback_from_host)
4243
4244 # During unshelve on Qcow2 backend, we spawn() using snapshot image
4245 # created during shelve. Extra work is needed in order to rebase
4246 # disk image to its original image_ref. Disk backing file will
4247 # then represent back image_ref instead of shelved image.
4248 if (instance.vm_state == vm_states.SHELVED_OFFLOADED and
4249 isinstance(backend, imagebackend.Qcow2)):
4250 self._finalize_unshelve_qcow2_image(context, instance, backend)
4251
4252 if need_inject:
4253 self._inject_data(backend, instance, injection_info)
4254
4255 elif need_inject:
4256 LOG.warning('File injection into a boot from volume '
4257 'instance is not supported', instance=instance)
4258
4259 return created_disks
4260
4261 def _finalize_unshelve_qcow2_image(self, context, instance, backend):
4262 # NOTE(aarents): During qcow2 instance unshelve, backing file
4263 # represents shelved image, not original instance.image_ref.
4264 # We rebase here instance disk to original image.
4265 # This second fetch call does nothing except downloading original
4266 # backing file if missing, as image disk have already been
4267 # created/resized by first fetch call.
4268 base_dir = self.image_cache_manager.cache_dir
4269 base_image_ref = instance.system_metadata.get('image_base_image_ref')
4270 root_fname = imagecache.get_cache_fname(base_image_ref)
4271 base_backing_fname = os.path.join(base_dir, root_fname)
4272
4273 try:
4274 self._try_fetch_image_cache(backend, libvirt_utils.fetch_image,
4275 context, root_fname, base_image_ref,
4276 instance, None)
4277 except exception.ImageNotFound:
4278 # We must flatten here in order to remove dependency with an orphan
4279 # backing file (as shelved image will be dropped once unshelve
4280 # is successfull).
4281 LOG.warning('Current disk image is created on top of shelved '
4282 'image and cannot be rebased to original image '
4283 'because it is no longer available in the image '
4284 'service, disk will be consequently flattened.',
4285 instance=instance)
4286 base_backing_fname = None
4287
4288 LOG.info('Rebasing disk image.', instance=instance)
4289 self._rebase_with_qemu_img(backend.path, base_backing_fname)
4290
4291 def _create_configdrive(self, context, instance, injection_info,
4292 rescue=False):
4293 # As this method being called right after the definition of a
4294 # domain, but before its actual launch, device metadata will be built
4295 # and saved in the instance for it to be used by the config drive and
4296 # the metadata service.
4297 instance.device_metadata = self._build_device_metadata(context,
4298 instance)
4299 if configdrive.required_by(instance):
4300 LOG.info('Using config drive', instance=instance)
4301
4302 name = 'disk.config'
4303 if rescue:
4304 name += '.rescue'
4305
4306 config_disk = self.image_backend.by_name(
4307 instance, name, self._get_disk_config_image_type())
4308
4309 # Don't overwrite an existing config drive
4310 if not config_disk.exists():
4311 extra_md = {}
4312 if injection_info.admin_pass:
4313 extra_md['admin_pass'] = injection_info.admin_pass
4314
4315 inst_md = instance_metadata.InstanceMetadata(
4316 instance, content=injection_info.files, extra_md=extra_md,
4317 network_info=injection_info.network_info,
4318 request_context=context)
4319
4320 cdb = configdrive.ConfigDriveBuilder(instance_md=inst_md)
4321 with cdb:
4322 # NOTE(mdbooth): We're hardcoding here the path of the
4323 # config disk when using the flat backend. This isn't
4324 # good, but it's required because we need a local path we
4325 # know we can write to in case we're subsequently
4326 # importing into rbd. This will be cleaned up when we
4327 # replace this with a call to create_from_func, but that
4328 # can't happen until we've updated the backends and we
4329 # teach them not to cache config disks. This isn't
4330 # possible while we're still using cache() under the hood.
4331 config_disk_local_path = os.path.join(
4332 libvirt_utils.get_instance_path(instance), name)
4333 LOG.info('Creating config drive at %(path)s',
4334 {'path': config_disk_local_path},
4335 instance=instance)
4336
4337 try:
4338 cdb.make_drive(config_disk_local_path)
4339 except processutils.ProcessExecutionError as e:
4340 with excutils.save_and_reraise_exception():
4341 LOG.error('Creating config drive failed with '
4342 'error: %s', e, instance=instance)
4343
4344 try:
4345 config_disk.import_file(
4346 instance, config_disk_local_path, name)
4347 finally:
4348 # NOTE(mikal): if the config drive was imported into RBD,
4349 # then we no longer need the local copy
4350 if CONF.libvirt.images_type == 'rbd':
4351 LOG.info('Deleting local config drive %(path)s '
4352 'because it was imported into RBD.',
4353 {'path': config_disk_local_path},
4354 instance=instance)
4355 os.unlink(config_disk_local_path)
4356
4357 def _prepare_pci_devices_for_use(self, pci_devices):
4358 # kvm , qemu support managed mode
4359 # In managed mode, the configured device will be automatically
4360 # detached from the host OS drivers when the guest is started,
4361 # and then re-attached when the guest shuts down.
4362 if CONF.libvirt.virt_type != 'xen':
4363 # we do manual detach only for xen
4364 return
4365 try:
4366 for dev in pci_devices:
4367 libvirt_dev_addr = dev['hypervisor_name']
4368 libvirt_dev = \
4369 self._host.device_lookup_by_name(libvirt_dev_addr)
4370 # Note(yjiang5) Spelling for 'dettach' is correct, see
4371 # http://libvirt.org/html/libvirt-libvirt.html.
4372 libvirt_dev.dettach()
4373
4374 # Note(yjiang5): A reset of one PCI device may impact other
4375 # devices on the same bus, thus we need two separated loops
4376 # to detach and then reset it.
4377 for dev in pci_devices:
4378 libvirt_dev_addr = dev['hypervisor_name']
4379 libvirt_dev = \
4380 self._host.device_lookup_by_name(libvirt_dev_addr)
4381 libvirt_dev.reset()
4382
4383 except libvirt.libvirtError as exc:
4384 raise exception.PciDevicePrepareFailed(id=dev['id'],
4385 instance_uuid=
4386 dev['instance_uuid'],
4387 reason=six.text_type(exc))
4388
4389 def _detach_pci_devices(self, guest, pci_devs):
4390 try:
4391 for dev in pci_devs:
4392 guest.detach_device(self._get_guest_pci_device(dev), live=True)
4393 # after detachDeviceFlags returned, we should check the dom to
4394 # ensure the detaching is finished
4395 xml = guest.get_xml_desc()
4396 xml_doc = etree.fromstring(xml)
4397 guest_config = vconfig.LibvirtConfigGuest()
4398 guest_config.parse_dom(xml_doc)
4399
4400 for hdev in [d for d in guest_config.devices
4401 if isinstance(d, vconfig.LibvirtConfigGuestHostdevPCI)]:
4402 hdbsf = [hdev.domain, hdev.bus, hdev.slot, hdev.function]
4403 dbsf = pci_utils.parse_address(dev.address)
4404 if [int(x, 16) for x in hdbsf] ==\
4405 [int(x, 16) for x in dbsf]:
4406 raise exception.PciDeviceDetachFailed(reason=
4407 "timeout",
4408 dev=dev)
4409
4410 except libvirt.libvirtError as ex:
4411 error_code = ex.get_error_code()
4412 if error_code == libvirt.VIR_ERR_NO_DOMAIN:
4413 LOG.warning("Instance disappeared while detaching "
4414 "a PCI device from it.")
4415 else:
4416 raise
4417
4418 def _attach_pci_devices(self, guest, pci_devs):
4419 try:
4420 for dev in pci_devs:
4421 guest.attach_device(self._get_guest_pci_device(dev))
4422
4423 except libvirt.libvirtError:
4424 LOG.error('Attaching PCI devices %(dev)s to %(dom)s failed.',
4425 {'dev': pci_devs, 'dom': guest.id})
4426 raise
4427
4428 @staticmethod
4429 def _has_direct_passthrough_port(network_info):
4430 for vif in network_info:
4431 if (vif['vnic_type'] in
4432 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):
4433 return True
4434 return False
4435
4436 def _attach_direct_passthrough_ports(
4437 self, context, instance, guest, network_info=None):
4438 if network_info is None:
4439 network_info = instance.info_cache.network_info
4440 if network_info is None:
4441 return
4442
4443 if self._has_direct_passthrough_port(network_info):
4444 for vif in network_info:
4445 if (vif['vnic_type'] in
4446 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):
4447 cfg = self.vif_driver.get_config(instance,
4448 vif,
4449 instance.image_meta,
4450 instance.flavor,
4451 CONF.libvirt.virt_type)
4452 LOG.debug('Attaching direct passthrough port %(port)s '
4453 'to %(dom)s', {'port': vif, 'dom': guest.id},
4454 instance=instance)
4455 guest.attach_device(cfg)
4456
4457 def _detach_direct_passthrough_ports(self, context, instance, guest):
4458 network_info = instance.info_cache.network_info
4459 if network_info is None:
4460 return
4461
4462 if self._has_direct_passthrough_port(network_info):
4463 # In case of VNIC_TYPES_DIRECT_PASSTHROUGH ports we create
4464 # pci request per direct passthrough port. Therefore we can trust
4465 # that pci_slot value in the vif is correct.
4466 direct_passthrough_pci_addresses = [
4467 vif['profile']['pci_slot']
4468 for vif in network_info
4469 if (vif['vnic_type'] in
4470 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH and
4471 vif['profile'].get('pci_slot') is not None)
4472 ]
4473
4474 # use detach_pci_devices to avoid failure in case of
4475 # multiple guest direct passthrough ports with the same MAC
4476 # (protection use-case, ports are on different physical
4477 # interfaces)
4478 pci_devs = pci_manager.get_instance_pci_devs(instance, 'all')
4479 direct_passthrough_pci_addresses = (
4480 [pci_dev for pci_dev in pci_devs
4481 if pci_dev.address in direct_passthrough_pci_addresses])
4482 self._detach_pci_devices(guest, direct_passthrough_pci_addresses)
4483
4484 def _update_compute_provider_status(self, context, service):
4485 """Calls the ComputeVirtAPI.update_compute_provider_status method
4486
4487 :param context: nova auth RequestContext
4488 :param service: nova.objects.Service record for this host which is
4489 expected to only manage a single ComputeNode
4490 """
4491 rp_uuid = None
4492 try:
4493 rp_uuid = service.compute_node.uuid
4494 self.virtapi.update_compute_provider_status(
4495 context, rp_uuid, enabled=not service.disabled)
4496 except Exception:
4497 # This is best effort so just log the exception but don't fail.
4498 # The update_available_resource periodic task will sync the trait.
4499 LOG.warning(
4500 'An error occurred while updating compute node '
4501 'resource provider status to "%s" for provider: %s',
4502 'disabled' if service.disabled else 'enabled',
4503 rp_uuid or service.host, exc_info=True)
4504
4505 def _set_host_enabled(self, enabled,
4506 disable_reason=DISABLE_REASON_UNDEFINED):
4507 """Enables / Disables the compute service on this host.
4508
4509 This doesn't override non-automatic disablement with an automatic
4510 setting; thereby permitting operators to keep otherwise
4511 healthy hosts out of rotation.
4512 """
4513
4514 status_name = {True: 'disabled',
4515 False: 'enabled'}
4516
4517 disable_service = not enabled
4518
4519 ctx = nova_context.get_admin_context()
4520 try:
4521 service = objects.Service.get_by_compute_host(ctx, CONF.host)
4522
4523 if service.disabled != disable_service:
4524 # Note(jang): this is a quick fix to stop operator-
4525 # disabled compute hosts from re-enabling themselves
4526 # automatically. We prefix any automatic reason code
4527 # with a fixed string. We only re-enable a host
4528 # automatically if we find that string in place.
4529 # This should probably be replaced with a separate flag.
4530 if not service.disabled or (
4531 service.disabled_reason and
4532 service.disabled_reason.startswith(DISABLE_PREFIX)):
4533 service.disabled = disable_service
4534 service.disabled_reason = (
4535 DISABLE_PREFIX + disable_reason
4536 if disable_service and disable_reason else
4537 DISABLE_REASON_UNDEFINED)
4538 service.save()
4539 LOG.debug('Updating compute service status to %s',
4540 status_name[disable_service])
4541 # Update the disabled trait status on the corresponding
4542 # compute node resource provider in placement.
4543 self._update_compute_provider_status(ctx, service)
4544 else:
4545 LOG.debug('Not overriding manual compute service '
4546 'status with: %s',
4547 status_name[disable_service])
4548 except exception.ComputeHostNotFound:
4549 LOG.warning('Cannot update service status on host "%s" '
4550 'since it is not registered.', CONF.host)
4551 except Exception:
4552 LOG.warning('Cannot update service status on host "%s" '
4553 'due to an unexpected exception.', CONF.host,
4554 exc_info=True)
4555
4556 if enabled:
4557 mount.get_manager().host_up(self._host)
4558 else:
4559 mount.get_manager().host_down()
4560
4561 def _get_cpu_model_mapping(self, model):
4562 """Get the CPU model mapping
4563
4564 The CPU models which admin configured are case-insensitive, libvirt is
4565 case-sensitive, therefore build a mapping to get the correct CPU model
4566 name.
4567
4568 :param model: Case-insensitive CPU model name.
4569 :return: It will validate and return the case-sensitive CPU model name
4570 if on a supported platform, otherwise it will just return
4571 what was provided
4572 :raises: exception.InvalidCPUInfo if the CPU model is not supported.
4573 """
4574 cpu_info = self._get_cpu_info()
4575 if cpu_info['arch'] not in (fields.Architecture.I686,
4576 fields.Architecture.X86_64,
4577 fields.Architecture.PPC64,
4578 fields.Architecture.PPC64LE,
4579 fields.Architecture.PPC):
4580 return model
4581
4582 if not self.cpu_models_mapping:
4583 cpu_models = self._host.get_cpu_model_names()
4584 for cpu_model in cpu_models:
4585 self.cpu_models_mapping[cpu_model.lower()] = cpu_model
4586
4587 if model.lower() not in self.cpu_models_mapping:
4588 msg = (_("Configured CPU model: %(model)s is not correct, "
4589 "or your host CPU arch does not support this "
4590 "model. Please correct your config and try "
4591 "again.") % {'model': model})
4592 raise exception.InvalidCPUInfo(msg)
4593
4594 return self.cpu_models_mapping.get(model.lower())
4595
4596 def _get_guest_cpu_model_config(self, flavor=None):
4597 mode = CONF.libvirt.cpu_mode
4598 models = [self._get_cpu_model_mapping(model)
4599 for model in CONF.libvirt.cpu_models]
4600 extra_flags = set([flag.lower() for flag in
4601 CONF.libvirt.cpu_model_extra_flags])
4602
4603 if (CONF.libvirt.virt_type == "kvm" or
4604 CONF.libvirt.virt_type == "qemu"):
4605 caps = self._host.get_capabilities()
4606 if mode is None:
4607 # AArch64 lacks 'host-model' support because neither libvirt
4608 # nor QEMU are able to tell what the host CPU model exactly is.
4609 # And there is no CPU description code for ARM(64) at this
4610 # point.
4611
4612 # Also worth noting: 'host-passthrough' mode will completely
4613 # break live migration, *unless* all the Compute nodes (running
4614 # libvirtd) have *identical* CPUs.
4615 if caps.host.cpu.arch == fields.Architecture.AARCH64:
4616 mode = "host-passthrough"
4617 LOG.info('CPU mode "host-passthrough" was chosen. Live '
4618 'migration can break unless all compute nodes '
4619 'have identical cpus. AArch64 does not support '
4620 'other modes.')
4621 else:
4622 mode = "host-model"
4623 if mode == "none":
4624 return vconfig.LibvirtConfigGuestCPU()
4625 # On AArch64 platform the return of _get_cpu_model_mapping will not
4626 # return the default CPU model.
4627 if mode == "custom":
4628 if caps.host.cpu.arch == fields.Architecture.AARCH64:
4629 if not models:
4630 models = ['max']
4631
4632 else:
4633 if mode is None or mode == "none":
4634 return None
4635
4636 cpu = vconfig.LibvirtConfigGuestCPU()
4637 cpu.mode = mode
4638 cpu.model = models[0] if models else None
4639
4640 # compare flavor trait and cpu models, select the first mathched model
4641 if flavor and mode == "custom":
4642 flags = libvirt_utils.get_flags_by_flavor_specs(flavor)
4643 if flags:
4644 cpu.model = self._match_cpu_model_by_flags(models, flags)
4645
4646 LOG.debug("CPU mode '%(mode)s' models '%(models)s' was chosen, "
4647 "with extra flags: '%(extra_flags)s'",
4648 {'mode': mode,
4649 'models': (cpu.model or ""),
4650 'extra_flags': (extra_flags or "")})
4651
4652 # NOTE (kchamart): Currently there's no existing way to ask if a
4653 # given CPU model + CPU flags combination is supported by KVM &
4654 # a specific QEMU binary. However, libvirt runs the 'CPUID'
4655 # command upfront -- before even a Nova instance (a QEMU
4656 # process) is launched -- to construct CPU models and check
4657 # their validity; so we are good there. In the long-term,
4658 # upstream libvirt intends to add an additional new API that can
4659 # do fine-grained validation of a certain CPU model + CPU flags
4660 # against a specific QEMU binary (the libvirt RFE bug for that:
4661 # https://bugzilla.redhat.com/show_bug.cgi?id=1559832).
4662 for flag in extra_flags:
4663 cpu.add_feature(vconfig.LibvirtConfigGuestCPUFeature(flag))
4664
4665 return cpu
4666
4667 def _match_cpu_model_by_flags(self, models, flags):
4668 for model in models:
4669 if flags.issubset(self.cpu_model_flag_mapping.get(model, set([]))):
4670 return model
4671 cpu = vconfig.LibvirtConfigCPU()
4672 cpu.arch = self._host.get_capabilities().host.cpu.arch
4673 cpu.model = model
4674 features_xml = self._get_guest_baseline_cpu_features(cpu.to_xml())
4675 if features_xml:
4676 cpu.parse_str(features_xml)
4677 feature_names = [f.name for f in cpu.features]
4678 self.cpu_model_flag_mapping[model] = feature_names
4679 if flags.issubset(feature_names):
4680 return model
4681
4682 msg = ('No CPU model match traits, models: {models}, required '
4683 'flags: {flags}'.format(models=models, flags=flags))
4684 raise exception.InvalidCPUInfo(msg)
4685
4686 def _get_guest_cpu_config(self, flavor, image_meta,
4687 guest_cpu_numa_config, instance_numa_topology):
4688 cpu = self._get_guest_cpu_model_config(flavor)
4689
4690 if cpu is None:
4691 return None
4692
4693 topology = hardware.get_best_cpu_topology(
4694 flavor, image_meta, numa_topology=instance_numa_topology)
4695
4696 cpu.sockets = topology.sockets
4697 cpu.cores = topology.cores
4698 cpu.threads = topology.threads
4699 cpu.numa = guest_cpu_numa_config
4700
4701 return cpu
4702
4703 def _get_guest_disk_config(self, instance, name, disk_mapping, inst_type,
4704 image_type=None, boot_order=None):
4705 disk_unit = None
4706 disk = self.image_backend.by_name(instance, name, image_type)
4707 if (name == 'disk.config' and image_type == 'rbd' and
4708 not disk.exists()):
4709 # This is likely an older config drive that has not been migrated
4710 # to rbd yet. Try to fall back on 'flat' image type.
4711 # TODO(melwitt): Add online migration of some sort so we can
4712 # remove this fall back once we know all config drives are in rbd.
4713 # NOTE(vladikr): make sure that the flat image exist, otherwise
4714 # the image will be created after the domain definition.
4715 flat_disk = self.image_backend.by_name(instance, name, 'flat')
4716 if flat_disk.exists():
4717 disk = flat_disk
4718 LOG.debug('Config drive not found in RBD, falling back to the '
4719 'instance directory', instance=instance)
4720 disk_info = disk_mapping[name]
4721 if 'unit' in disk_mapping and disk_info['bus'] == 'scsi':
4722 disk_unit = disk_mapping['unit']
4723 disk_mapping['unit'] += 1 # Increments for the next disk added
4724 conf = disk.libvirt_info(disk_info, self.disk_cachemode,
4725 inst_type['extra_specs'],
4726 self._host.get_version(),
4727 disk_unit=disk_unit,
4728 boot_order=boot_order)
4729 return conf
4730
4731 def _get_guest_fs_config(self, instance, name, image_type=None):
4732 disk = self.image_backend.by_name(instance, name, image_type)
4733 return disk.libvirt_fs_info("/", "ploop")
4734
4735 def _get_guest_storage_config(self, context, instance, image_meta,
4736 disk_info,
4737 rescue, block_device_info,
4738 inst_type, os_type):
4739 devices = []
4740 disk_mapping = disk_info['mapping']
4741
4742 block_device_mapping = driver.block_device_info_get_mapping(
4743 block_device_info)
4744 mount_rootfs = CONF.libvirt.virt_type == "lxc"
4745 scsi_controller = self._get_scsi_controller(image_meta)
4746
4747 if scsi_controller and scsi_controller.model == 'virtio-scsi':
4748 # The virtio-scsi can handle up to 256 devices but the
4749 # optional element "address" must be defined to describe
4750 # where the device is placed on the controller (see:
4751 # LibvirtConfigGuestDeviceAddressDrive).
4752 #
4753 # Note about why it's added in disk_mapping: It's not
4754 # possible to pass an 'int' by reference in Python, so we
4755 # use disk_mapping as container to keep reference of the
4756 # unit added and be able to increment it for each disk
4757 # added.
4758 #
4759 # NOTE(jaypipes,melwitt): If this is a boot-from-volume instance,
4760 # we need to start the disk mapping unit at 1 since we set the
4761 # bootable volume's unit to 0 for the bootable volume.
4762 disk_mapping['unit'] = 0
4763 if self._is_booted_from_volume(block_device_info):
4764 disk_mapping['unit'] = 1
4765
4766 def _get_ephemeral_devices():
4767 eph_devices = []
4768 for idx, eph in enumerate(
4769 driver.block_device_info_get_ephemerals(
4770 block_device_info)):
4771 diskeph = self._get_guest_disk_config(
4772 instance,
4773 blockinfo.get_eph_disk(idx),
4774 disk_mapping, inst_type)
4775 eph_devices.append(diskeph)
4776 return eph_devices
4777
4778 if mount_rootfs:
4779 fs = vconfig.LibvirtConfigGuestFilesys()
4780 fs.source_type = "mount"
4781 fs.source_dir = os.path.join(
4782 libvirt_utils.get_instance_path(instance), 'rootfs')
4783 devices.append(fs)
4784 elif (os_type == fields.VMMode.EXE and
4785 CONF.libvirt.virt_type == "parallels"):
4786 if rescue:
4787 fsrescue = self._get_guest_fs_config(instance, "disk.rescue")
4788 devices.append(fsrescue)
4789
4790 fsos = self._get_guest_fs_config(instance, "disk")
4791 fsos.target_dir = "/mnt/rescue"
4792 devices.append(fsos)
4793 else:
4794 if 'disk' in disk_mapping:
4795 fs = self._get_guest_fs_config(instance, "disk")
4796 devices.append(fs)
4797 devices = devices + _get_ephemeral_devices()
4798 else:
4799
4800 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:
4801 diskrescue = self._get_guest_disk_config(instance,
4802 'disk.rescue',
4803 disk_mapping,
4804 inst_type)
4805 devices.append(diskrescue)
4806
4807 diskos = self._get_guest_disk_config(instance,
4808 'disk',
4809 disk_mapping,
4810 inst_type)
4811 devices.append(diskos)
4812 else:
4813 if 'disk' in disk_mapping:
4814 diskos = self._get_guest_disk_config(instance,
4815 'disk',
4816 disk_mapping,
4817 inst_type)
4818 devices.append(diskos)
4819
4820 if 'disk.local' in disk_mapping:
4821 disklocal = self._get_guest_disk_config(instance,
4822 'disk.local',
4823 disk_mapping,
4824 inst_type)
4825 devices.append(disklocal)
4826 instance.default_ephemeral_device = (
4827 block_device.prepend_dev(disklocal.target_dev))
4828
4829 devices = devices + _get_ephemeral_devices()
4830
4831 if 'disk.swap' in disk_mapping:
4832 diskswap = self._get_guest_disk_config(instance,
4833 'disk.swap',
4834 disk_mapping,
4835 inst_type)
4836 devices.append(diskswap)
4837 instance.default_swap_device = (
4838 block_device.prepend_dev(diskswap.target_dev))
4839
4840 config_name = 'disk.config'
4841 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:
4842 config_name = 'disk.config.rescue'
4843
4844 if config_name in disk_mapping:
4845 diskconfig = self._get_guest_disk_config(
4846 instance, config_name, disk_mapping, inst_type,
4847 self._get_disk_config_image_type())
4848 devices.append(diskconfig)
4849
4850 for vol in block_device.get_bdms_to_connect(block_device_mapping,
4851 mount_rootfs):
4852 connection_info = vol['connection_info']
4853 vol_dev = block_device.prepend_dev(vol['mount_device'])
4854 info = disk_mapping[vol_dev]
4855 self._connect_volume(context, connection_info, instance)
4856 if scsi_controller and scsi_controller.model == 'virtio-scsi':
4857 # Check if this is the bootable volume when in a
4858 # boot-from-volume instance, and if so, ensure the unit
4859 # attribute is 0.
4860 if vol.get('boot_index') == 0:
4861 info['unit'] = 0
4862 else:
4863 info['unit'] = disk_mapping['unit']
4864 disk_mapping['unit'] += 1
4865 cfg = self._get_volume_config(connection_info, info)
4866 devices.append(cfg)
4867 vol['connection_info'] = connection_info
4868 vol.save()
4869
4870 for d in devices:
4871 self._set_cache_mode(d)
4872
4873 if scsi_controller:
4874 devices.append(scsi_controller)
4875
4876 if rescue and disk_mapping['disk.rescue'] != disk_mapping['root']:
4877 diskrescue = self._get_guest_disk_config(instance, 'disk.rescue',
4878 disk_mapping, inst_type,
4879 boot_order='1')
4880 devices.append(diskrescue)
4881
4882 return devices
4883
4884 @staticmethod
4885 def _get_scsi_controller(image_meta):
4886 """Return scsi controller or None based on image meta"""
4887 if image_meta.properties.get('hw_scsi_model'):
4888 hw_scsi_model = image_meta.properties.hw_scsi_model
4889 scsi_controller = vconfig.LibvirtConfigGuestController()
4890 scsi_controller.type = 'scsi'
4891 scsi_controller.model = hw_scsi_model
4892 scsi_controller.index = 0
4893 return scsi_controller
4894
4895 def _get_host_sysinfo_serial_hardware(self):
4896 """Get a UUID from the host hardware
4897
4898 Get a UUID for the host hardware reported by libvirt.
4899 This is typically from the SMBIOS data, unless it has
4900 been overridden in /etc/libvirt/libvirtd.conf
4901 """
4902 caps = self._host.get_capabilities()
4903 return caps.host.uuid
4904
4905 def _get_host_sysinfo_serial_os(self):
4906 """Get a UUID from the host operating system
4907
4908 Get a UUID for the host operating system. Modern Linux
4909 distros based on systemd provide a /etc/machine-id
4910 file containing a UUID. This is also provided inside
4911 systemd based containers and can be provided by other
4912 init systems too, since it is just a plain text file.
4913 """
4914 if not os.path.exists("/etc/machine-id"):
4915 msg = _("Unable to get host UUID: /etc/machine-id does not exist")
4916 raise exception.InternalError(msg)
4917
4918 with open("/etc/machine-id") as f:
4919 # We want to have '-' in the right place
4920 # so we parse & reformat the value
4921 lines = f.read().split()
4922 if not lines:
4923 msg = _("Unable to get host UUID: /etc/machine-id is empty")
4924 raise exception.InternalError(msg)
4925
4926 return str(uuid.UUID(lines[0]))
4927
4928 def _get_host_sysinfo_serial_auto(self):
4929 if os.path.exists("/etc/machine-id"):
4930 return self._get_host_sysinfo_serial_os()
4931 else:
4932 return self._get_host_sysinfo_serial_hardware()
4933
4934 def _get_guest_config_sysinfo(self, instance):
4935 sysinfo = vconfig.LibvirtConfigGuestSysinfo()
4936
4937 sysinfo.system_manufacturer = version.vendor_string()
4938 sysinfo.system_product = version.product_string()
4939 sysinfo.system_version = version.version_string_with_package()
4940
4941 if CONF.libvirt.sysinfo_serial == 'unique':
4942 sysinfo.system_serial = instance.uuid
4943 else:
4944 sysinfo.system_serial = self._sysinfo_serial_func()
4945 sysinfo.system_uuid = instance.uuid
4946
4947 sysinfo.system_family = "Virtual Machine"
4948
4949 return sysinfo
4950
4951 def _set_managed_mode(self, pcidev):
4952 # only kvm support managed mode
4953 if CONF.libvirt.virt_type in ('xen', 'parallels',):
4954 pcidev.managed = 'no'
4955 if CONF.libvirt.virt_type in ('kvm', 'qemu'):
4956 pcidev.managed = 'yes'
4957
4958 def _get_guest_pci_device(self, pci_device):
4959
4960 dbsf = pci_utils.parse_address(pci_device.address)
4961 dev = vconfig.LibvirtConfigGuestHostdevPCI()
4962 dev.domain, dev.bus, dev.slot, dev.function = dbsf
4963 self._set_managed_mode(dev)
4964
4965 return dev
4966
4967 def _get_guest_config_meta(self, instance):
4968 """Get metadata config for guest."""
4969
4970 meta = vconfig.LibvirtConfigGuestMetaNovaInstance()
4971 meta.package = version.version_string_with_package()
4972 meta.name = instance.display_name
4973 meta.creationTime = time.time()
4974
4975 if instance.image_ref not in ("", None):
4976 meta.roottype = "image"
4977 meta.rootid = instance.image_ref
4978
4979 system_meta = instance.system_metadata
4980 ometa = vconfig.LibvirtConfigGuestMetaNovaOwner()
4981 ometa.userid = instance.user_id
4982 ometa.username = system_meta.get('owner_user_name', 'N/A')
4983 ometa.projectid = instance.project_id
4984 ometa.projectname = system_meta.get('owner_project_name', 'N/A')
4985 meta.owner = ometa
4986
4987 fmeta = vconfig.LibvirtConfigGuestMetaNovaFlavor()
4988 flavor = instance.flavor
4989 fmeta.name = flavor.name
4990 fmeta.memory = flavor.memory_mb
4991 fmeta.vcpus = flavor.vcpus
4992 fmeta.ephemeral = flavor.ephemeral_gb
4993 fmeta.disk = flavor.root_gb
4994 fmeta.swap = flavor.swap
4995
4996 meta.flavor = fmeta
4997
4998 return meta
4999
5000 @staticmethod
5001 def _create_idmaps(klass, map_strings):
5002 idmaps = []
5003 if len(map_strings) > 5:
5004 map_strings = map_strings[0:5]
5005 LOG.warning("Too many id maps, only included first five.")
5006 for map_string in map_strings:
5007 try:
5008 idmap = klass()
5009 values = [int(i) for i in map_string.split(":")]
5010 idmap.start = values[0]
5011 idmap.target = values[1]
5012 idmap.count = values[2]
5013 idmaps.append(idmap)
5014 except (ValueError, IndexError):
5015 LOG.warning("Invalid value for id mapping %s", map_string)
5016 return idmaps
5017
5018 def _get_guest_idmaps(self):
5019 id_maps: ty.List[vconfig.LibvirtConfigGuestIDMap] = []
5020 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.uid_maps:
5021 uid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestUIDMap,
5022 CONF.libvirt.uid_maps)
5023 id_maps.extend(uid_maps)
5024 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.gid_maps:
5025 gid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestGIDMap,
5026 CONF.libvirt.gid_maps)
5027 id_maps.extend(gid_maps)
5028 return id_maps
5029
5030 def _update_guest_cputune(self, guest, flavor, virt_type):
5031 is_able = self._host.is_cpu_control_policy_capable()
5032
5033 cputuning = ['shares', 'period', 'quota']
5034 wants_cputune = any([k for k in cputuning
5035 if "quota:cpu_" + k in flavor.extra_specs.keys()])
5036
5037 if wants_cputune and not is_able:
5038 raise exception.UnsupportedHostCPUControlPolicy()
5039
5040 if not is_able or virt_type not in ('lxc', 'kvm', 'qemu'):
5041 return
5042
5043 if guest.cputune is None:
5044 guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
5045 # Setting the default cpu.shares value to be a value
5046 # dependent on the number of vcpus
5047 guest.cputune.shares = 1024 * guest.vcpus
5048
5049 for name in cputuning:
5050 key = "quota:cpu_" + name
5051 if key in flavor.extra_specs:
5052 setattr(guest.cputune, name,
5053 int(flavor.extra_specs[key]))
5054
5055 def _get_cpu_numa_config_from_instance(self, instance_numa_topology,
5056 wants_hugepages):
5057 if instance_numa_topology:
5058 guest_cpu_numa = vconfig.LibvirtConfigGuestCPUNUMA()
5059 for instance_cell in instance_numa_topology.cells:
5060 guest_cell = vconfig.LibvirtConfigGuestCPUNUMACell()
5061 guest_cell.id = instance_cell.id
5062 guest_cell.cpus = instance_cell.total_cpus
5063 guest_cell.memory = instance_cell.memory * units.Ki
5064
5065 # The vhost-user network backend requires file backed
5066 # guest memory (ie huge pages) to be marked as shared
5067 # access, not private, so an external process can read
5068 # and write the pages.
5069 #
5070 # You can't change the shared vs private flag for an
5071 # already running guest, and since we can't predict what
5072 # types of NIC may be hotplugged, we have no choice but
5073 # to unconditionally turn on the shared flag. This has
5074 # no real negative functional effect on the guest, so
5075 # is a reasonable approach to take
5076 if wants_hugepages:
5077 guest_cell.memAccess = "shared"
5078 guest_cpu_numa.cells.append(guest_cell)
5079 return guest_cpu_numa
5080
5081 def _wants_hugepages(self, host_topology, instance_topology):
5082 """Determine if the guest / host topology implies the
5083 use of huge pages for guest RAM backing
5084 """
5085
5086 if host_topology is None or instance_topology is None:
5087 return False
5088
5089 avail_pagesize = [page.size_kb
5090 for page in host_topology.cells[0].mempages]
5091 avail_pagesize.sort()
5092 # Remove smallest page size as that's not classed as a largepage
5093 avail_pagesize = avail_pagesize[1:]
5094
5095 # See if we have page size set
5096 for cell in instance_topology.cells:
5097 if (cell.pagesize is not None and
5098 cell.pagesize in avail_pagesize):
5099 return True
5100
5101 return False
5102
5103 def _get_cell_pairs(self, guest_cpu_numa_config, host_topology):
5104 """Returns the lists of pairs(tuple) of an instance cell and
5105 corresponding host cell:
5106 [(LibvirtConfigGuestCPUNUMACell, NUMACell), ...]
5107 """
5108 cell_pairs = []
5109 for guest_config_cell in guest_cpu_numa_config.cells:
5110 for host_cell in host_topology.cells:
5111 if guest_config_cell.id == host_cell.id:
5112 cell_pairs.append((guest_config_cell, host_cell))
5113 return cell_pairs
5114
5115 def _get_pin_cpuset(self, vcpu, inst_cell, host_cell):
5116 """Returns the config object of LibvirtConfigGuestCPUTuneVCPUPin.
5117
5118 Prepares vcpupin config for the guest with the following caveats:
5119
5120 a) If the specified instance vCPU is intended to be pinned, we pin
5121 it to the previously selected host CPU.
5122 b) Otherwise we float over the whole host NUMA node
5123 """
5124 pin_cpuset = vconfig.LibvirtConfigGuestCPUTuneVCPUPin()
5125 pin_cpuset.id = vcpu
5126
5127 # 'InstanceNUMACell.cpu_pinning' tracks the CPU pinning pair for guest
5128 # CPU and host CPU