"Fossies" - the Fresh Open Source Software Archive

Member "snapcraft-3.8/snapcraft/internal/elf.py" (9 Sep 2019, 23643 Bytes) of package /linux/misc/snapcraft-3.8.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "elf.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.7.2_vs_3.8.

    1 # -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
    2 #
    3 # Copyright (C) 2016-2018 Canonical Ltd
    4 #
    5 # This program is free software: you can redistribute it and/or modify
    6 # it under the terms of the GNU General Public License version 3 as
    7 # published by the Free Software Foundation.
    8 #
    9 # This program is distributed in the hope that it will be useful,
   10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
   11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12 # GNU General Public License for more details.
   13 #
   14 # You should have received a copy of the GNU General Public License
   15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 import contextlib
   17 import glob
   18 import logging
   19 import os
   20 import re
   21 import shutil
   22 import subprocess
   23 import tempfile
   24 from typing import Dict, FrozenSet, List, Set, Sequence, Tuple, Union  # noqa
   25 
   26 import elftools.elf.elffile
   27 import elftools.common.exceptions
   28 from pkg_resources import parse_version
   29 
   30 from snapcraft import file_utils
   31 from snapcraft.internal import common, errors, repo
   32 
   33 
   34 logger = logging.getLogger(__name__)
   35 
   36 
   37 class NeededLibrary:
   38     """Represents an ELF library version."""
   39 
   40     def __init__(self, *, name: str) -> None:
   41         self.name = name
   42         self.versions = set()  # type: Set[str]
   43 
   44     def add_version(self, version: str) -> None:
   45         self.versions.add(version)
   46 
   47 
   48 ElfArchitectureTuple = Tuple[str, str, str]
   49 ElfDataTuple = Tuple[
   50     ElfArchitectureTuple, str, str, Dict[str, NeededLibrary], bool, bool
   51 ]  # noqa: E501
   52 SonameCacheDict = Dict[Tuple[ElfArchitectureTuple, str], str]
   53 
   54 
   55 # Old pyelftools uses byte strings for section names.  Some data is
   56 # also returned as bytes, which is handled below.
   57 if parse_version(elftools.__version__) >= parse_version("0.24"):
   58     _DYNAMIC = ".dynamic"  # type: Union[str, bytes]
   59     _GNU_VERSION_R = ".gnu.version_r"  # type: Union[str, bytes]
   60     _INTERP = ".interp"  # type: Union[str, bytes]
   61 else:
   62     _DYNAMIC = b".dynamic"
   63     _GNU_VERSION_R = b".gnu.version_r"
   64     _INTERP = b".interp"
   65 
   66 
   67 class SonameCache:
   68     """A cache for sonames."""
   69 
   70     def __getitem__(self, key):
   71         return self._soname_paths[key]
   72 
   73     def __setitem__(self, key, item):
   74         # Initial API error checks
   75         if not isinstance(key, tuple):
   76             raise EnvironmentError(
   77                 "The key for SonameCache has to be a (arch, soname) tuple."
   78             )
   79         if not isinstance(key[0], tuple) or len(key[0]) != 3:
   80             raise EnvironmentError(
   81                 "The first element of the key needs to of type ElfArchitectureTuple."
   82             )
   83         if not isinstance(key[1], str):
   84             raise EnvironmentError(
   85                 "The second element of the key needs to be "
   86                 "of type str representing the soname."
   87             )
   88         self._soname_paths[key] = item
   89 
   90     def __contains__(self, key):
   91         return key in self._soname_paths
   92 
   93     def __init__(self):
   94         """Initialize a cache for sonames"""
   95         self._soname_paths = dict()  # type: SonameCacheDict
   96 
   97     def reset_except_root(self, root):
   98         """Reset the cache values that aren't contained within root."""
   99         new_soname_paths = dict()  # type: SonameCacheDict
  100         for key, value in self._soname_paths.items():
  101             if value is not None and value.startswith(root):
  102                 new_soname_paths[key] = value
  103 
  104         self._soname_paths = new_soname_paths
  105 
  106 
  107 class Library:
  108     """Represents the SONAME and path to the library."""
  109 
  110     def __init__(
  111         self,
  112         *,
  113         soname: str,
  114         path: str,
  115         root_path: str,
  116         core_base_path: str,
  117         arch: ElfArchitectureTuple,
  118         soname_cache: SonameCache
  119     ) -> None:
  120         self.soname = soname
  121 
  122         # We need to always look for the soname inside root first,
  123         # and after exhausting all options look in core_base_path.
  124         if path.startswith(root_path):
  125             self.path = path
  126         else:
  127             self.path = _crawl_for_path(
  128                 soname=soname,
  129                 root_path=root_path,
  130                 core_base_path=core_base_path,
  131                 arch=arch,
  132                 soname_cache=soname_cache,
  133             )
  134 
  135         if not self.path and path.startswith(core_base_path):
  136             self.path = path
  137 
  138         # Required for libraries on the host and the fetching mechanism
  139         if not self.path:
  140             self.path = path
  141 
  142         # self.path has the correct resulting path.
  143         if self.path.startswith(core_base_path):
  144             self.in_base_snap = True
  145         else:
  146             self.in_base_snap = False
  147 
  148         logger.debug(
  149             "{soname} with original path {original_path} found on {path} in base: {in_base}".format(
  150                 soname=soname,
  151                 original_path=path,
  152                 path=self.path,
  153                 in_base=self.in_base_snap,
  154             )
  155         )
  156 
  157 
  158 def _crawl_for_path(
  159     *,
  160     soname: str,
  161     root_path: str,
  162     core_base_path: str,
  163     arch: ElfArchitectureTuple,
  164     soname_cache: SonameCache
  165 ) -> str:
  166     # Speed things up and return what was already found once.
  167     if (arch, soname) in soname_cache:
  168         return soname_cache[arch, soname]
  169 
  170     logger.debug("Crawling to find soname {!r}".format(soname))
  171     for path in (root_path, core_base_path):
  172         if not os.path.exists(path):
  173             continue
  174         for root, directories, files in os.walk(path):
  175             for file_name in files:
  176                 if file_name == soname:
  177                     file_path = os.path.join(root, file_name)
  178                     if ElfFile.is_elf(file_path):
  179                         # We found a match by name, anyway. Let's verify that
  180                         # the architecture is the one we want.
  181                         elf_file = ElfFile(path=file_path)
  182                         if elf_file.arch == arch:
  183                             soname_cache[arch, soname] = file_path
  184                             return file_path
  185 
  186     # If not found we cache it too
  187     soname_cache[arch, soname] = None
  188     return None
  189 
  190 
  191 # Old versions of pyelftools return bytes rather than strings for
  192 # certain APIs.  So we pass those values through this function to get
  193 # a consistent result.
  194 def _ensure_str(s):
  195     if isinstance(s, bytes):
  196         return s.decode("ascii")
  197     assert isinstance(s, str)
  198     return s
  199 
  200 
  201 class ElfFile:
  202     """ElfFile represents and elf file on a path and its attributes."""
  203 
  204     @classmethod
  205     def is_elf(cls, path: str) -> bool:
  206         if not os.path.isfile(path):
  207             # ELF binaries are regular files
  208             return False
  209         with open(path, "rb") as bin_file:
  210             return bin_file.read(4) == b"\x7fELF"
  211 
  212     def __init__(self, *, path: str) -> None:
  213         """Initialize an ElfFile instance.
  214 
  215         :param str path: path to an elf_file within a snapcraft project.
  216         """
  217         self.path = path
  218         self.dependencies = set()  # type: Set[Library]
  219         elf_data = self._extract(path)
  220         self.arch = elf_data[0]
  221         self.interp = elf_data[1]
  222         self.soname = elf_data[2]
  223         self.needed = elf_data[3]
  224         self.execstack_set = elf_data[4]
  225         self.is_dynamic = elf_data[5]
  226 
  227     def _extract(self, path: str) -> ElfDataTuple:  # noqa: C901
  228         arch = None  # type: ElfArchitectureTuple
  229         interp = str()
  230         soname = str()
  231         libs = dict()
  232         execstack_set = False
  233 
  234         with open(path, "rb") as fp:
  235             elf = elftools.elf.elffile.ELFFile(fp)
  236 
  237             # A set of fields to identify the architecture of the ELF file:
  238             #  EI_CLASS: 32/64 bit (e.g. amd64 vs. x32)
  239             #  EI_DATA: byte orer (e.g. ppc64 vs. ppc64le)
  240             #  e_machine: instruction set (e.g. x86-64 vs. arm64)
  241             #
  242             # For amd64 binaries, this will evaluate to:
  243             #   ('ELFCLASS64', 'ELFDATA2LSB', 'EM_X86_64')
  244             arch = (
  245                 elf.header.e_ident.EI_CLASS,
  246                 elf.header.e_ident.EI_DATA,
  247                 elf.header.e_machine,
  248             )
  249 
  250             # If we are processing a detached debug info file, these
  251             # sections will be present but empty.
  252             interp_section = elf.get_section_by_name(_INTERP)
  253             if (
  254                 interp_section is not None
  255                 and interp_section.header.sh_type != "SHT_NOBITS"
  256             ):
  257                 interp = interp_section.data().rstrip(b"\x00").decode("ascii")
  258 
  259             dynamic_section = elf.get_section_by_name(_DYNAMIC)
  260             is_dynamic = dynamic_section is not None
  261 
  262             if is_dynamic and dynamic_section.header.sh_type != "SHT_NOBITS":
  263                 for tag in dynamic_section.iter_tags("DT_NEEDED"):
  264                     needed = _ensure_str(tag.needed)
  265                     libs[needed] = NeededLibrary(name=needed)
  266                 for tag in dynamic_section.iter_tags("DT_SONAME"):
  267                     soname = _ensure_str(tag.soname)
  268 
  269             verneed_section = elf.get_section_by_name(_GNU_VERSION_R)
  270             if (
  271                 verneed_section is not None
  272                 and verneed_section.header.sh_type != "SHT_NOBITS"
  273             ):
  274                 for library, versions in verneed_section.iter_versions():
  275                     library_name = _ensure_str(library.name)
  276                     # If the ELF file only references weak symbols
  277                     # from a library, it may be absent from DT_NEEDED
  278                     # but still have an entry in .gnu.version_r for
  279                     # symbol versions.
  280                     if library_name not in libs:
  281                         continue
  282                     lib = libs[library_name]
  283                     for version in versions:
  284                         lib.add_version(_ensure_str(version.name))
  285 
  286             for segment in elf.iter_segments():
  287                 if segment["p_type"] == "PT_GNU_STACK":
  288                     # p_flags holds the bit mask for this segment.
  289                     # See `man 5 elf`.
  290                     mode = segment["p_flags"]
  291                     if mode & elftools.elf.constants.P_FLAGS.PF_X:
  292                         execstack_set = True
  293 
  294         return arch, interp, soname, libs, execstack_set, is_dynamic
  295 
  296     def is_linker_compatible(self, *, linker_version: str) -> bool:
  297         """Determines if linker will work given the required glibc version."""
  298         version_required = self.get_required_glibc()
  299         r = parse_version(version_required) <= parse_version(linker_version)
  300         logger.debug(
  301             "Checking if linker {!r} will work with "
  302             "GLIBC_{} required by {!r}: {!r}".format(
  303                 linker_version, version_required, self.path, r
  304             )
  305         )
  306         return r
  307 
  308     def get_required_glibc(self) -> str:
  309         """Returns the required glibc version for this ELF file."""
  310         with contextlib.suppress(AttributeError):
  311             return self._required_glibc  # type: ignore
  312 
  313         version_required = ""
  314         for lib in self.needed.values():
  315             for version in lib.versions:
  316                 if not version.startswith("GLIBC_"):
  317                     continue
  318                 version = version[6:]
  319                 if parse_version(version) > parse_version(version_required):
  320                     version_required = version
  321 
  322         self._required_glibc = version_required
  323         return version_required
  324 
  325     def load_dependencies(
  326         self, root_path: str, core_base_path: str, soname_cache: SonameCache = None
  327     ) -> Set[str]:
  328         """Load the set of libraries that are needed to satisfy elf's runtime.
  329 
  330         This may include libraries contained within the project.
  331         The object's .dependencies attribute is set after loading.
  332 
  333         :param str root_path: the root path to search for missing dependencies.
  334         :param str core_base_path: the core base path to search for missing
  335                                    dependencies.
  336         :param SonameCache soname_cache: a cache of previously search
  337                                          dependencies.
  338         :returns: a set of string with paths to the library dependencies of
  339                   elf.
  340         """
  341         if soname_cache is None:
  342             soname_cache = SonameCache()
  343 
  344         logger.debug("Getting dependencies for {!r}".format(self.path))
  345         ldd_out = []  # type: List[str]
  346         try:
  347             # ldd output sample:
  348             # /lib64/ld-linux-x86-64.so.2 (0x00007fb3c5298000)
  349             # libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fb3bef03000)
  350             ldd_out = common.run_output(["ldd", self.path]).split("\n")
  351         except subprocess.CalledProcessError:
  352             logger.warning(
  353                 "Unable to determine library dependencies for {!r}".format(self.path)
  354             )
  355             return set()
  356         ldd_out_split = [l.split() for l in ldd_out]
  357         libs = set()
  358         for ldd_line in ldd_out_split:
  359             if len(ldd_line) > 2:
  360                 libs.add(
  361                     Library(
  362                         soname=ldd_line[0],
  363                         path=ldd_line[2],
  364                         root_path=root_path,
  365                         core_base_path=core_base_path,
  366                         arch=self.arch,
  367                         soname_cache=soname_cache,
  368                     )
  369                 )
  370 
  371         self.dependencies = libs
  372 
  373         # Return a set useful only for fetching libraries from the host
  374         library_paths = set()  # type: Set[str]
  375         for l in libs:
  376             if os.path.exists(l.path) and not l.in_base_snap:
  377                 library_paths.add(l.path)
  378         return library_paths
  379 
  380 
  381 class Patcher:
  382     """Patcher holds the necessary logic to patch elf files."""
  383 
  384     def __init__(
  385         self, *, dynamic_linker: str, root_path: str, preferred_patchelf_path=None
  386     ) -> None:
  387         """Create a Patcher instance.
  388 
  389         :param str dynamic_linker: the path to the dynamic linker to set the
  390                                    elf file to.
  391         :param str root_path: the base path for the snap to determine
  392                               if use of $ORIGIN is possible.
  393         :param str preferred_patchelf_path: patch the necessary elf_files with
  394                                         this patchelf.
  395         """
  396         self._dynamic_linker = dynamic_linker
  397         self._root_path = root_path
  398 
  399         if preferred_patchelf_path:
  400             self._patchelf_cmd = preferred_patchelf_path
  401         else:
  402             self._patchelf_cmd = file_utils.get_tool_path("patchelf")
  403 
  404         self._strip_cmd = file_utils.get_tool_path("strip")
  405 
  406     def patch(self, *, elf_file: ElfFile) -> None:
  407         """Patch elf_file with the Patcher instance configuration.
  408 
  409         If the ELF is executable, patch it to use the configured linker.
  410         If the ELF has dependencies (DT_NEEDED), set an rpath to them.
  411 
  412         :param ElfFile elf: a data object representing an elf file and its
  413                             relevant attributes.
  414         :raises snapcraft.internal.errors.PatcherError:
  415             raised when the elf_file cannot be patched.
  416         """
  417         patchelf_args = []
  418         if elf_file.interp:
  419             patchelf_args.extend(["--set-interpreter", self._dynamic_linker])
  420         if elf_file.dependencies:
  421             rpath = self._get_rpath(elf_file)
  422             # Due to https://github.com/NixOS/patchelf/issues/94 we need
  423             # to first clear the current rpath
  424             self._run_patchelf(
  425                 patchelf_args=["--remove-rpath"], elf_file_path=elf_file.path
  426             )
  427             # Parameters:
  428             # --force-rpath: use RPATH instead of RUNPATH.
  429             # --shrink-rpath: will remove unneeded entries, with the
  430             #                 side effect of preferring host libraries
  431             #                 so we simply do not use it.
  432             # --set-rpath: set the RPATH to the colon separated argument.
  433             patchelf_args.extend(["--force-rpath", "--set-rpath", rpath])
  434 
  435         # no patchelf_args means there is nothing to do.
  436         if not patchelf_args:
  437             return
  438 
  439         self._run_patchelf(patchelf_args=patchelf_args, elf_file_path=elf_file.path)
  440 
  441     def _run_patchelf(self, *, patchelf_args: List[str], elf_file_path: str) -> None:
  442         # Run patchelf on a copy of the primed file and replace it
  443         # after it is successful. This allows us to break the potential
  444         # hard link created when migrating the file across the steps of
  445         # the part.
  446         with tempfile.NamedTemporaryFile() as temp_file:
  447             shutil.copy2(elf_file_path, temp_file.name)
  448 
  449             cmd = [self._patchelf_cmd] + patchelf_args + [temp_file.name]
  450             try:
  451                 subprocess.check_call(cmd)
  452             # There is no need to catch FileNotFoundError as patchelf should be
  453             # bundled with snapcraft which means its lack of existence is a
  454             # "packager" error.
  455             except subprocess.CalledProcessError as call_error:
  456                 raise errors.PatcherGenericError(
  457                     elf_file=elf_file_path, process_exception=call_error
  458                 )
  459 
  460             # We unlink to break the potential hard link
  461             os.unlink(elf_file_path)
  462             shutil.copy2(temp_file.name, elf_file_path)
  463 
  464     def _get_existing_rpath(self, elf_file_path):
  465         output = subprocess.check_output(
  466             [self._patchelf_cmd, "--print-rpath", elf_file_path]
  467         )
  468         return output.decode().strip().split(":")
  469 
  470     def _get_rpath(self, elf_file) -> str:
  471         origin_rpaths = list()  # type: List[str]
  472         base_rpaths = set()  # type: Set[str]
  473         existing_rpaths = self._get_existing_rpath(elf_file.path)
  474 
  475         for dependency in elf_file.dependencies:
  476             if dependency.path:
  477                 if dependency.in_base_snap:
  478                     base_rpaths.add(os.path.dirname(dependency.path))
  479                 elif dependency.path.startswith(self._root_path):
  480                     rel_library_path = os.path.relpath(dependency.path, elf_file.path)
  481                     rel_library_path_dir = os.path.dirname(rel_library_path)
  482                     # return the dirname, with the first .. replace
  483                     # with $ORIGIN
  484                     origin_rpath = rel_library_path_dir.replace("..", "$ORIGIN", 1)
  485                     if origin_rpath not in origin_rpaths:
  486                         origin_rpaths.append(origin_rpath)
  487 
  488         if existing_rpaths:
  489             # Only keep those that mention origin and are not already in our
  490             # bundle.
  491             existing_rpaths = [
  492                 r for r in existing_rpaths if "$ORIGIN" in r and r not in origin_rpaths
  493             ]
  494             origin_rpaths = existing_rpaths + origin_rpaths
  495 
  496         origin_paths = ":".join((r for r in origin_rpaths if r))
  497         core_base_rpaths = ":".join(base_rpaths)
  498 
  499         if origin_paths and core_base_rpaths:
  500             return "{}:{}".format(origin_paths, core_base_rpaths)
  501         elif origin_paths and not core_base_rpaths:
  502             return origin_paths
  503         else:
  504             return core_base_rpaths
  505 
  506 
  507 def determine_ld_library_path(root: str) -> List[str]:
  508     """Determine additional library paths needed for the linker loader.
  509 
  510     This is a workaround until full library searching is implemented which
  511     works by searching for ld.so.conf in specific hard coded locations
  512     within root.
  513 
  514     :param root str: the root directory to search for specific ld.so.conf
  515                      entries.
  516     :returns: a list of strings of library paths where relevant libraries
  517               can be found within root.
  518     """
  519     # If more ld.so.conf files need to be supported, add them here.
  520     ld_config_globs = {"{}/usr/lib/*/mesa*/ld.so.conf".format(root)}
  521 
  522     ld_library_paths = []
  523     for this_glob in ld_config_globs:
  524         for ld_conf_file in glob.glob(this_glob):
  525             ld_library_paths.extend(_extract_ld_library_paths(ld_conf_file))
  526 
  527     return [root + path for path in ld_library_paths]
  528 
  529 
  530 def _extract_ld_library_paths(ld_conf_file: str) -> List[str]:
  531     # From the ldconfig manpage, paths can be colon-, space-, tab-, newline-,
  532     # or comma-separated.
  533     path_delimiters = re.compile(r"[:\s,]")
  534     comments = re.compile(r"#.*$")
  535 
  536     paths = []
  537     with open(ld_conf_file, "r") as f:
  538         for line in f:
  539             # Remove comments from line
  540             line = comments.sub("", line).strip()
  541 
  542             if line:
  543                 paths.extend(path_delimiters.split(line))
  544 
  545     return paths
  546 
  547 
  548 _libraries = None
  549 
  550 
  551 def get_elf_files(root: str, file_list: Sequence[str]) -> FrozenSet[ElfFile]:
  552     """Return a frozenset of elf files from file_list prepended with root.
  553 
  554     :param str root: the root directory from where the file_list is generated.
  555     :param file_list: a list of file in root.
  556     :returns: a frozentset of ElfFile objects.
  557     """
  558     elf_files = set()  # type: Set[ElfFile]
  559 
  560     for part_file in file_list:
  561         # Filter out object (*.o) files-- we only care about binaries.
  562         if part_file.endswith(".o"):
  563             continue
  564 
  565         # No need to crawl links-- the original should be here, too.
  566         path = os.path.join(root, part_file)  # type: str
  567         if os.path.islink(path):
  568             logger.debug("Skipped link {!r} while finding dependencies".format(path))
  569             continue
  570 
  571         # Ignore if file does not have ELF header.
  572         if not ElfFile.is_elf(path):
  573             continue
  574 
  575         try:
  576             elf_file = ElfFile(path=path)
  577         except elftools.common.exceptions.ELFError:
  578             # Ignore invalid ELF files.
  579             continue
  580 
  581         # If ELF has dynamic symbols, add it.
  582         if elf_file.needed:
  583             elf_files.add(elf_file)
  584 
  585     return frozenset(elf_files)
  586 
  587 
  588 def _get_dynamic_linker(library_list: List[str]) -> str:
  589     """Return the dynamic linker from library_list."""
  590     regex = re.compile(r"(?P<dynamic_linker>ld-[\d.]+.so)$")
  591 
  592     for library in library_list:
  593         m = regex.search(os.path.basename(library))
  594         if m:
  595             return library
  596 
  597     raise RuntimeError(
  598         "The format for the linker should be of the form "
  599         "<root>/ld-<X>.<Y>.so. There are no matches for the "
  600         "current libc6 package"
  601     )
  602 
  603 
  604 def find_linker(*, root_path: str, snap_base_path: str) -> str:
  605     """Find and return the dynamic linker that would be seen at runtime.
  606 
  607     :param str root_path: the root path of a snap tree.
  608     :param str snap_base_path: absolute path to the snap once installed to
  609                                setup proper rpaths.
  610     :returns: the path to the dynamic linker to use
  611     """
  612     # We assume the current system will satisfy the GLIBC requirement,
  613     # get the current libc6 libraries (which includes the linker)
  614     libc6_libraries_list = repo.Repo.get_package_libraries("libc6")
  615 
  616     # For security reasons, we do not want to automatically pull in
  617     # libraries but expect them to be consciously brought in by stage-packages
  618     # instead.
  619     libc6_libraries_paths = [
  620         os.path.join(root_path, l[1:]) for l in libc6_libraries_list
  621     ]
  622 
  623     dynamic_linker = _get_dynamic_linker(libc6_libraries_paths)
  624 
  625     # Get the path to the "would be" dynamic linker when this snap is
  626     # installed. Strip the root_path from the retrieved dynamic_linker
  627     # variables + the leading `/` so that os.path.join can perform the
  628     # proper join with snap_base_path.
  629     dynamic_linker_path = os.path.join(
  630         snap_base_path, dynamic_linker[len(root_path) + 1 :]
  631     )
  632 
  633     return dynamic_linker_path