"Fossies" - the Fresh Open Source Software Archive

Member "nova-22.0.1/nova/pci/stats.py" (19 Nov 2020, 19204 Bytes) of package /linux/misc/openstack/nova-22.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "stats.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 22.0.0_vs_22.0.1.

    1 # Copyright (c) 2013 Intel, Inc.
    2 # Copyright (c) 2013 OpenStack Foundation
    3 # All Rights Reserved.
    4 #
    5 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
    6 #    not use this file except in compliance with the License. You may obtain
    7 #    a copy of the License at
    8 #
    9 #         http://www.apache.org/licenses/LICENSE-2.0
   10 #
   11 #    Unless required by applicable law or agreed to in writing, software
   12 #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
   13 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
   14 #    License for the specific language governing permissions and limitations
   15 #    under the License.
   16 
   17 import copy
   18 
   19 from oslo_config import cfg
   20 from oslo_log import log as logging
   21 
   22 from nova import exception
   23 from nova.objects import fields
   24 from nova.objects import pci_device_pool
   25 from nova.pci import utils
   26 from nova.pci import whitelist
   27 
   28 
   29 CONF = cfg.CONF
   30 LOG = logging.getLogger(__name__)
   31 
   32 
   33 class PciDeviceStats(object):
   34 
   35     """PCI devices summary information.
   36 
   37     According to the PCI SR-IOV spec, a PCI physical function can have up to
   38     256 PCI virtual functions, thus the number of assignable PCI functions in
   39     a cloud can be big. The scheduler needs to know all device availability
   40     information in order to determine which compute hosts can support a PCI
   41     request. Passing individual virtual device information to the scheduler
   42     does not scale, so we provide summary information.
   43 
   44     Usually the virtual functions provided by a host PCI device have the same
   45     value for most properties, like vendor_id, product_id and class type.
   46     The PCI stats class summarizes this information for the scheduler.
   47 
   48     The pci stats information is maintained exclusively by compute node
   49     resource tracker and updated to database. The scheduler fetches the
   50     information and selects the compute node accordingly. If a compute
   51     node is selected, the resource tracker allocates the devices to the
   52     instance and updates the pci stats information.
   53 
   54     This summary information will be helpful for cloud management also.
   55     """
   56 
   57     pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type']
   58 
   59     def __init__(self, stats=None, dev_filter=None):
   60         super(PciDeviceStats, self).__init__()
   61         # NOTE(sbauza): Stats are a PCIDevicePoolList object
   62         self.pools = [pci_pool.to_dict()
   63                       for pci_pool in stats] if stats else []
   64         self.pools.sort(key=lambda item: len(item))
   65         self.dev_filter = dev_filter or whitelist.Whitelist(
   66             CONF.pci.passthrough_whitelist)
   67 
   68     def _equal_properties(self, dev, entry, matching_keys):
   69         return all(dev.get(prop) == entry.get(prop)
   70                    for prop in matching_keys)
   71 
   72     def _find_pool(self, dev_pool):
   73         """Return the first pool that matches dev."""
   74         for pool in self.pools:
   75             pool_keys = pool.copy()
   76             del pool_keys['count']
   77             del pool_keys['devices']
   78             if (len(pool_keys.keys()) == len(dev_pool.keys()) and
   79                 self._equal_properties(dev_pool, pool_keys, dev_pool.keys())):
   80                 return pool
   81 
   82     def _create_pool_keys_from_dev(self, dev):
   83         """create a stats pool dict that this dev is supposed to be part of
   84 
   85         Note that this pool dict contains the stats pool's keys and their
   86         values. 'count' and 'devices' are not included.
   87         """
   88         # Don't add a device that doesn't have a matching device spec.
   89         # This can happen during initial sync up with the controller
   90         devspec = self.dev_filter.get_devspec(dev)
   91         if not devspec:
   92             return
   93         tags = devspec.get_tags()
   94         pool = {k: getattr(dev, k) for k in self.pool_keys}
   95         if tags:
   96             pool.update(tags)
   97         # NOTE(gibi): parent_ifname acts like a tag during pci claim but
   98         # not provided as part of the whitelist spec as it is auto detected
   99         # by the virt driver.
  100         # This key is used for match InstancePciRequest backed by neutron ports
  101         # that has resource_request and therefore that has resource allocation
  102         # already in placement.
  103         if dev.extra_info.get('parent_ifname'):
  104             pool['parent_ifname'] = dev.extra_info['parent_ifname']
  105         return pool
  106 
  107     def _get_pool_with_device_type_mismatch(self, dev):
  108         """Check for device type mismatch in the pools for a given device.
  109 
  110         Return (pool, device) if device type does not match or a single None
  111         if the device type matches.
  112         """
  113         for pool in self.pools:
  114             for device in pool['devices']:
  115                 if device.address == dev.address:
  116                     if dev.dev_type != pool["dev_type"]:
  117                         return pool, device
  118                     return None
  119 
  120         return None
  121 
  122     def update_device(self, dev):
  123         """Update a device to its matching pool."""
  124         pool_device_info = self._get_pool_with_device_type_mismatch(dev)
  125         if pool_device_info is None:
  126             return
  127 
  128         pool, device = pool_device_info
  129         pool['devices'].remove(device)
  130         self._decrease_pool_count(self.pools, pool)
  131         self.add_device(dev)
  132 
  133     def add_device(self, dev):
  134         """Add a device to its matching pool."""
  135         dev_pool = self._create_pool_keys_from_dev(dev)
  136         if dev_pool:
  137             pool = self._find_pool(dev_pool)
  138             if not pool:
  139                 dev_pool['count'] = 0
  140                 dev_pool['devices'] = []
  141                 self.pools.append(dev_pool)
  142                 self.pools.sort(key=lambda item: len(item))
  143                 pool = dev_pool
  144             pool['count'] += 1
  145             pool['devices'].append(dev)
  146 
  147     @staticmethod
  148     def _decrease_pool_count(pool_list, pool, count=1):
  149         """Decrement pool's size by count.
  150 
  151         If pool becomes empty, remove pool from pool_list.
  152         """
  153         if pool['count'] > count:
  154             pool['count'] -= count
  155             count = 0
  156         else:
  157             count -= pool['count']
  158             pool_list.remove(pool)
  159         return count
  160 
  161     def remove_device(self, dev):
  162         """Remove one device from the first pool that it matches."""
  163         dev_pool = self._create_pool_keys_from_dev(dev)
  164         if dev_pool:
  165             pool = self._find_pool(dev_pool)
  166             if not pool:
  167                 raise exception.PciDevicePoolEmpty(
  168                     compute_node_id=dev.compute_node_id, address=dev.address)
  169             pool['devices'].remove(dev)
  170             self._decrease_pool_count(self.pools, pool)
  171 
  172     def get_free_devs(self):
  173         free_devs = []
  174         for pool in self.pools:
  175             free_devs.extend(pool['devices'])
  176         return free_devs
  177 
  178     def consume_requests(self, pci_requests, numa_cells=None):
  179         alloc_devices = []
  180         for request in pci_requests:
  181             count = request.count
  182             spec = request.spec
  183             # For now, keep the same algorithm as during scheduling:
  184             # a spec may be able to match multiple pools.
  185             pools = self._filter_pools_for_spec(self.pools, spec)
  186             if numa_cells:
  187                 numa_policy = None
  188                 if 'numa_policy' in request:
  189                     numa_policy = request.numa_policy
  190                 pools = self._filter_pools_for_numa_cells(
  191                     pools, numa_cells, numa_policy, count)
  192             pools = self._filter_non_requested_pfs(pools, request)
  193             # Failed to allocate the required number of devices
  194             # Return the devices already allocated back to their pools
  195             if sum([pool['count'] for pool in pools]) < count:
  196                 LOG.error("Failed to allocate PCI devices for instance. "
  197                           "Unassigning devices back to pools. "
  198                           "This should not happen, since the scheduler "
  199                           "should have accurate information, and allocation "
  200                           "during claims is controlled via a hold "
  201                           "on the compute node semaphore.")
  202                 for d in range(len(alloc_devices)):
  203                     self.add_device(alloc_devices.pop())
  204                 return None
  205             for pool in pools:
  206                 if pool['count'] >= count:
  207                     num_alloc = count
  208                 else:
  209                     num_alloc = pool['count']
  210                 count -= num_alloc
  211                 pool['count'] -= num_alloc
  212                 for d in range(num_alloc):
  213                     pci_dev = pool['devices'].pop()
  214                     self._handle_device_dependents(pci_dev)
  215                     pci_dev.request_id = request.request_id
  216                     alloc_devices.append(pci_dev)
  217                 if count == 0:
  218                     break
  219         return alloc_devices
  220 
  221     def _handle_device_dependents(self, pci_dev):
  222         """Remove device dependents or a parent from pools.
  223 
  224         In case the device is a PF, all of it's dependent VFs should
  225         be removed from pools count, if these are present.
  226         When the device is a VF, it's parent PF pool count should be
  227         decreased, unless it is no longer in a pool.
  228         """
  229         if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF:
  230             vfs_list = pci_dev.child_devices
  231             if vfs_list:
  232                 for vf in vfs_list:
  233                     self.remove_device(vf)
  234         elif pci_dev.dev_type == fields.PciDeviceType.SRIOV_VF:
  235             try:
  236                 parent = pci_dev.parent_device
  237                 # Make sure not to decrease PF pool count if this parent has
  238                 # been already removed from pools
  239                 if parent in self.get_free_devs():
  240                     self.remove_device(parent)
  241             except exception.PciDeviceNotFound:
  242                 return
  243 
  244     @staticmethod
  245     def _filter_pools_for_spec(pools, request_specs):
  246         return [pool for pool in pools
  247                 if utils.pci_device_prop_match(pool, request_specs)]
  248 
  249     @classmethod
  250     def _filter_pools_for_numa_cells(cls, pools, numa_cells, numa_policy,
  251             requested_count):
  252         """Filter out pools with the wrong NUMA affinity, if required.
  253 
  254         Exclude pools that do not have *suitable* PCI NUMA affinity.
  255         ``numa_policy`` determines what *suitable* means, being one of
  256         PREFERRED (nice-to-have), LEGACY (must-have-if-available) and REQUIRED
  257         (must-have). We iterate through the various policies in order of
  258         strictness. This means that even if we only *prefer* PCI-NUMA affinity,
  259         we will still attempt to provide it if possible.
  260 
  261         :param pools: A list of PCI device pool dicts
  262         :param numa_cells: A list of InstanceNUMACell objects whose ``id``
  263             corresponds to the ``id`` of host NUMACells.
  264         :param numa_policy: The PCI NUMA affinity policy to apply.
  265         :param requested_count: The number of PCI devices requested.
  266         :returns: A list of pools that can, together, provide at least
  267             ``requested_count`` PCI devices with the level of NUMA affinity
  268             required by ``numa_policy``, else all pools that can satisfy this
  269             policy even if it's not enough.
  270         """
  271         # NOTE(stephenfin): We may wish to change the default policy at a later
  272         # date
  273         requested_policy = numa_policy or fields.PCINUMAAffinityPolicy.LEGACY
  274         numa_cell_ids = [cell.id for cell in numa_cells]
  275 
  276         # filter out pools which numa_node is not included in numa_cell_ids
  277         filtered_pools = [
  278             pool for pool in pools if any(utils.pci_device_prop_match(
  279                 pool, [{'numa_node': cell}]) for cell in numa_cell_ids)]
  280 
  281         # we can't apply a less strict policy than the one requested, so we
  282         # need to return if we've demanded a NUMA affinity of REQUIRED.
  283         # However, NUMA affinity is a good thing. If we can get enough devices
  284         # with the stricter policy then we will use them.
  285         if requested_policy == fields.PCINUMAAffinityPolicy.REQUIRED or sum(
  286                 pool['count'] for pool in filtered_pools) >= requested_count:
  287             return filtered_pools
  288 
  289         # some systems don't report NUMA node info for PCI devices, in which
  290         # case None is reported in 'pci_device.numa_node'. The LEGACY policy
  291         # allows us to use these devices so we include None in the list of
  292         # suitable NUMA cells.
  293         numa_cell_ids.append(None)
  294 
  295         # filter out pools which numa_node is not included in numa_cell_ids
  296         filtered_pools = [
  297             pool for pool in pools if any(utils.pci_device_prop_match(
  298                 pool, [{'numa_node': cell}]) for cell in numa_cell_ids)]
  299 
  300         # once again, we can't apply a less strict policy than the one
  301         # requested, so we need to return if we've demanded a NUMA affinity of
  302         # LEGACY. Similarly, we will also return if we have enough devices to
  303         # satisfy this somewhat strict policy.
  304         if requested_policy == fields.PCINUMAAffinityPolicy.LEGACY or sum(
  305                 pool['count'] for pool in filtered_pools) >= requested_count:
  306             return filtered_pools
  307 
  308         # if we've got here, we're using the PREFERRED policy and weren't able
  309         # to provide anything with stricter affinity. Use whatever devices you
  310         # can, folks.
  311         return sorted(
  312             pools, key=lambda pool: pool.get('numa_node') not in numa_cell_ids)
  313 
  314     @classmethod
  315     def _filter_non_requested_pfs(cls, pools, request):
  316         # Remove SRIOV_PFs from pools, unless it has been explicitly requested
  317         # This is especially needed in cases where PFs and VFs have the same
  318         # product_id.
  319         if all(spec.get('dev_type') != fields.PciDeviceType.SRIOV_PF for
  320                spec in request.spec):
  321             pools = cls._filter_pools_for_pfs(pools)
  322         return pools
  323 
  324     @staticmethod
  325     def _filter_pools_for_pfs(pools):
  326         return [pool for pool in pools
  327                 if not pool.get('dev_type') == fields.PciDeviceType.SRIOV_PF]
  328 
  329     def _apply_request(self, pools, request, numa_cells=None):
  330         """Apply a PCI request.
  331 
  332         Apply a PCI request against a given set of PCI device pools, which are
  333         collections of devices with similar traits.
  334 
  335         If ``numa_cells`` is provided then NUMA locality may be taken into
  336         account, depending on the value of ``request.numa_policy``.
  337 
  338         :param pools: A list of PCI device pool dicts
  339         :param request: An InstancePCIRequest object describing the type,
  340             quantity and required NUMA affinity of device(s) we want..
  341         :param numa_cells: A list of InstanceNUMACell objects whose ``id``
  342             corresponds to the ``id`` of host NUMACells.
  343         :returns: True if the request was applied against the provided pools
  344             successfully, else False.
  345         """
  346         # NOTE(vladikr): This code maybe open to race conditions.
  347         # Two concurrent requests may succeed when called support_requests
  348         # because this method does not remove related devices from the pools
  349         count = request.count
  350 
  351         # Firstly, let's exclude all devices that don't match our spec (e.g.
  352         # they've got different PCI IDs or something)
  353         matching_pools = self._filter_pools_for_spec(pools, request.spec)
  354 
  355         # Next, let's exclude all devices that aren't on the correct NUMA node
  356         # *assuming* we have devices and care about that, as determined by
  357         # policy
  358         if numa_cells:
  359             numa_policy = None
  360             if 'numa_policy' in request:
  361                 numa_policy = request.numa_policy
  362 
  363             matching_pools = self._filter_pools_for_numa_cells(matching_pools,
  364                 numa_cells, numa_policy, count)
  365 
  366         # Finally, if we're not requesting PFs then we should not use these.
  367         # Exclude them.
  368         matching_pools = self._filter_non_requested_pfs(matching_pools,
  369                                                         request)
  370 
  371         # Do we still have any devices left?
  372         if sum([pool['count'] for pool in matching_pools]) < count:
  373             return False
  374         else:
  375             for pool in matching_pools:
  376                 count = self._decrease_pool_count(pools, pool, count)
  377                 if not count:
  378                     break
  379         return True
  380 
  381     def support_requests(self, requests, numa_cells=None):
  382         """Determine if the PCI requests can be met.
  383 
  384         Determine, based on a compute node's PCI stats, if an instance can be
  385         scheduled on the node. **Support does not mean real allocation**.
  386 
  387         If ``numa_cells`` is provided then NUMA locality may be taken into
  388         account, depending on the value of ``numa_policy``.
  389 
  390         :param requests: A list of InstancePCIRequest object describing the
  391             types, quantities and required NUMA affinities of devices we want.
  392         :type requests: nova.objects.InstancePCIRequests
  393         :param numa_cells: A list of InstanceNUMACell objects whose ``id``
  394             corresponds to the ``id`` of host NUMACells, or None.
  395         :returns: Whether this compute node can satisfy the given request.
  396         """
  397         # note (yjiang5): this function has high possibility to fail,
  398         # so no exception should be triggered for performance reason.
  399         pools = copy.deepcopy(self.pools)
  400         return all(self._apply_request(pools, r, numa_cells) for r in requests)
  401 
  402     def apply_requests(self, requests, numa_cells=None):
  403         """Apply PCI requests to the PCI stats.
  404 
  405         This is used in multiple instance creation, when the scheduler has to
  406         maintain how the resources are consumed by the instances.
  407 
  408         If ``numa_cells`` is provided then NUMA locality may be taken into
  409         account, depending on the value of ``numa_policy``.
  410 
  411         :param requests: A list of InstancePCIRequest object describing the
  412             types, quantities and required NUMA affinities of devices we want.
  413         :type requests: nova.objects.InstancePCIRequests
  414         :param numa_cells: A list of InstanceNUMACell objects whose ``id``
  415             corresponds to the ``id`` of host NUMACells, or None.
  416         :raises: exception.PciDeviceRequestFailed if this compute node cannot
  417             satisfy the given request.
  418         """
  419         if not all(self._apply_request(self.pools, r, numa_cells)
  420                    for r in requests):
  421             raise exception.PciDeviceRequestFailed(requests=requests)
  422 
  423     def __iter__(self):
  424         # 'devices' shouldn't be part of stats
  425         pools = []
  426         for pool in self.pools:
  427             tmp = {k: v for k, v in pool.items() if k != 'devices'}
  428             pools.append(tmp)
  429         return iter(pools)
  430 
  431     def clear(self):
  432         """Clear all the stats maintained."""
  433         self.pools = []
  434 
  435     def __eq__(self, other):
  436         return self.pools == other.pools
  437 
  438     def to_device_pools_obj(self):
  439         """Return the contents of the pools as a PciDevicePoolList object."""
  440         stats = [x for x in self]
  441         return pci_device_pool.from_pci_stats(stats)