"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/common/ac_rtld.c" (16 Sep 2020, 22818 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ac_rtld.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright 2014-2019 Advanced Micro Devices, Inc.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   21  * SOFTWARE.
   22  */
   23 
   24 #include "ac_rtld.h"
   25 
   26 #include <gelf.h>
   27 #include <libelf.h>
   28 #include <stdarg.h>
   29 #include <stdio.h>
   30 #include <stdlib.h>
   31 #include <string.h>
   32 
   33 #include "ac_binary.h"
   34 #include "ac_gpu_info.h"
   35 #include "util/u_dynarray.h"
   36 #include "util/u_math.h"
   37 
   38 // Old distributions may not have this enum constant
   39 #define MY_EM_AMDGPU 224
   40 
   41 #ifndef STT_AMDGPU_LDS
   42 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
   43 #endif
   44 
   45 #ifndef SHN_AMDGPU_LDS
   46 #define SHN_AMDGPU_LDS 0xff00
   47 #endif
   48 
   49 #ifndef R_AMDGPU_NONE
   50 #define R_AMDGPU_NONE 0
   51 #define R_AMDGPU_ABS32_LO 1
   52 #define R_AMDGPU_ABS32_HI 2
   53 #define R_AMDGPU_ABS64 3
   54 #define R_AMDGPU_REL32 4
   55 #define R_AMDGPU_REL64 5
   56 #define R_AMDGPU_ABS32 6
   57 #define R_AMDGPU_GOTPCREL 7
   58 #define R_AMDGPU_GOTPCREL32_LO 8
   59 #define R_AMDGPU_GOTPCREL32_HI 9
   60 #define R_AMDGPU_REL32_LO 10
   61 #define R_AMDGPU_REL32_HI 11
   62 #define R_AMDGPU_RELATIVE64 13
   63 #endif
   64 
   65 /* For the UMR disassembler. */
   66 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
   67 #define DEBUGGER_NUM_MARKERS        5
   68 
   69 struct ac_rtld_section {
   70     bool is_rx : 1;
   71     bool is_pasted_text : 1;
   72     uint64_t offset;
   73     const char *name;
   74 };
   75 
   76 struct ac_rtld_part {
   77     Elf *elf;
   78     struct ac_rtld_section *sections;
   79     unsigned num_sections;
   80 };
   81 
   82 static void report_erroraf(const char *fmt, va_list va)
   83 {
   84     char *msg;
   85     int ret = asprintf(&msg, fmt, va);
   86     if (ret < 0)
   87         msg = "(asprintf failed)";
   88 
   89     fprintf(stderr, "ac_rtld error: %s\n", msg);
   90 
   91     if (ret >= 0)
   92         free(msg);
   93 }
   94 
   95 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
   96 
   97 static void report_errorf(const char *fmt, ...)
   98 {
   99     va_list va;
  100     va_start(va, fmt);
  101     report_erroraf(fmt, va);
  102     va_end(va);
  103 }
  104 
  105 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
  106 
  107 static void report_elf_errorf(const char *fmt, ...)
  108 {
  109     va_list va;
  110     va_start(va, fmt);
  111     report_erroraf(fmt, va);
  112     va_end(va);
  113 
  114     fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
  115 }
  116 
  117 /**
  118  * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
  119  * \p part_idx.
  120  */
  121 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
  122                         const char *name, unsigned part_idx)
  123 {
  124     util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
  125         if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
  126             !strcmp(name, symbol->name))
  127             return symbol;
  128     }
  129     return 0;
  130 }
  131 
  132 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
  133 {
  134     const struct ac_rtld_symbol *lhs = lhsp;
  135     const struct ac_rtld_symbol *rhs = rhsp;
  136     if (rhs->align > lhs->align)
  137         return 1;
  138     if (rhs->align < lhs->align)
  139         return -1;
  140     return 0;
  141 }
  142 
  143 /**
  144  * Sort the given symbol list by decreasing alignment and assign offsets.
  145  */
  146 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
  147                uint64_t *ptotal_size)
  148 {
  149     qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
  150 
  151     uint64_t total_size = *ptotal_size;
  152 
  153     for (unsigned i = 0; i < num_symbols; ++i) {
  154         struct ac_rtld_symbol *s = &symbols[i];
  155         assert(util_is_power_of_two_nonzero(s->align));
  156 
  157         total_size = align64(total_size, s->align);
  158         s->offset = total_size;
  159 
  160         if (total_size + s->size < total_size) {
  161             report_errorf("%s: size overflow", __FUNCTION__);
  162             return false;
  163         }
  164 
  165         total_size += s->size;
  166     }
  167 
  168     *ptotal_size = total_size;
  169     return true;
  170 }
  171 
  172 /**
  173  * Read LDS symbols from the given \p section of the ELF of \p part and append
  174  * them to the LDS symbols list.
  175  *
  176  * Shared LDS symbols are filtered out.
  177  */
  178 static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
  179                      unsigned part_idx,
  180                      Elf_Scn *section,
  181                      uint32_t *lds_end_align)
  182 {
  183 #define report_if(cond) \
  184     do { \
  185         if ((cond)) { \
  186             report_errorf(#cond); \
  187             return false; \
  188         } \
  189     } while (false)
  190 #define report_elf_if(cond) \
  191     do { \
  192         if ((cond)) { \
  193             report_elf_errorf(#cond); \
  194             return false; \
  195         } \
  196     } while (false)
  197 
  198     struct ac_rtld_part *part = &binary->parts[part_idx];
  199     Elf64_Shdr *shdr = elf64_getshdr(section);
  200     uint32_t strtabidx = shdr->sh_link;
  201     Elf_Data *symbols_data = elf_getdata(section, NULL);
  202     report_elf_if(!symbols_data);
  203 
  204     const Elf64_Sym *symbol = symbols_data->d_buf;
  205     size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
  206 
  207     for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
  208         struct ac_rtld_symbol s = {};
  209 
  210         if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
  211             /* old-style LDS symbols from initial prototype -- remove eventually */
  212             s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
  213         } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
  214             s.align = MIN2(symbol->st_value, 1u << 16);
  215             report_if(!util_is_power_of_two_nonzero(s.align));
  216         } else
  217             continue;
  218 
  219         report_if(symbol->st_size > 1u << 29);
  220 
  221         s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
  222         s.size = symbol->st_size;
  223         s.part_idx = part_idx;
  224 
  225         if (!strcmp(s.name, "__lds_end")) {
  226             report_elf_if(s.size != 0);
  227             *lds_end_align = MAX2(*lds_end_align, s.align);
  228             continue;
  229         }
  230 
  231         const struct ac_rtld_symbol *shared =
  232             find_symbol(&binary->lds_symbols, s.name, part_idx);
  233         if (shared) {
  234             report_elf_if(s.align > shared->align);
  235             report_elf_if(s.size > shared->size);
  236             continue;
  237         }
  238 
  239         util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
  240     }
  241 
  242     return true;
  243 
  244 #undef report_if
  245 #undef report_elf_if
  246 }
  247 
  248 /**
  249  * Open a binary consisting of one or more shader parts.
  250  *
  251  * \param binary the uninitialized struct
  252  * \param i binary opening parameters
  253  */
  254 bool ac_rtld_open(struct ac_rtld_binary *binary,
  255           struct ac_rtld_open_info i)
  256 {
  257     /* One of the libelf implementations
  258      * (http://www.mr511.de/software/english.htm) requires calling
  259      * elf_version() before elf_memory().
  260      */
  261     elf_version(EV_CURRENT);
  262 
  263     memset(binary, 0, sizeof(*binary));
  264     memcpy(&binary->options, &i.options, sizeof(binary->options));
  265     binary->wave_size = i.wave_size;
  266     binary->num_parts = i.num_parts;
  267     binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
  268     if (!binary->parts)
  269         return false;
  270 
  271     uint64_t pasted_text_size = 0;
  272     uint64_t rx_align = 1;
  273     uint64_t rx_size = 0;
  274     uint64_t exec_size = 0;
  275 
  276 #define report_if(cond) \
  277     do { \
  278         if ((cond)) { \
  279             report_errorf(#cond); \
  280             goto fail; \
  281         } \
  282     } while (false)
  283 #define report_elf_if(cond) \
  284     do { \
  285         if ((cond)) { \
  286             report_elf_errorf(#cond); \
  287             goto fail; \
  288         } \
  289     } while (false)
  290 
  291     /* Copy and layout shared LDS symbols. */
  292     if (i.num_shared_lds_symbols) {
  293         if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
  294                       i.num_shared_lds_symbols))
  295             goto fail;
  296 
  297         memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
  298     }
  299 
  300     util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
  301         symbol->part_idx = ~0u;
  302 
  303     unsigned max_lds_size = 64 * 1024;
  304 
  305     if (i.info->chip_class == GFX6 ||
  306         (i.shader_type != MESA_SHADER_COMPUTE &&
  307          i.shader_type != MESA_SHADER_FRAGMENT))
  308         max_lds_size = 32 * 1024;
  309 
  310     uint64_t shared_lds_size = 0;
  311     if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
  312         goto fail;
  313 
  314     if (shared_lds_size > max_lds_size) {
  315         fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
  316             (unsigned)shared_lds_size, max_lds_size);
  317         goto fail;
  318     }
  319     binary->lds_size = shared_lds_size;
  320 
  321     /* First pass over all parts: open ELFs, pre-determine the placement of
  322      * sections in the memory image, and collect and layout private LDS symbols. */
  323     uint32_t lds_end_align = 0;
  324 
  325     if (binary->options.halt_at_entry)
  326         pasted_text_size += 4;
  327 
  328     for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
  329         struct ac_rtld_part *part = &binary->parts[part_idx];
  330         unsigned part_lds_symbols_begin =
  331             util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
  332 
  333         part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
  334         report_elf_if(!part->elf);
  335 
  336         const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
  337         report_elf_if(!ehdr);
  338         report_if(ehdr->e_machine != MY_EM_AMDGPU);
  339 
  340         size_t section_str_index;
  341         size_t num_shdrs;
  342         report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
  343         report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
  344 
  345         part->num_sections = num_shdrs;
  346         part->sections = calloc(sizeof(*part->sections), num_shdrs);
  347         report_if(!part->sections);
  348 
  349         Elf_Scn *section = NULL;
  350         while ((section = elf_nextscn(part->elf, section))) {
  351             Elf64_Shdr *shdr = elf64_getshdr(section);
  352             struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
  353             s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
  354             report_elf_if(!s->name);
  355 
  356             /* Cannot actually handle linked objects yet */
  357             report_elf_if(shdr->sh_addr != 0);
  358 
  359             /* Alignment must be 0 or a power of two */
  360             report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
  361             uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
  362 
  363             if (shdr->sh_flags & SHF_ALLOC &&
  364                 shdr->sh_type != SHT_NOTE) {
  365                 report_if(shdr->sh_flags & SHF_WRITE);
  366 
  367                 s->is_rx = true;
  368 
  369                 if (shdr->sh_flags & SHF_EXECINSTR) {
  370                     report_elf_if(shdr->sh_size & 3);
  371 
  372                     if (!strcmp(s->name, ".text"))
  373                         s->is_pasted_text = true;
  374 
  375                     exec_size += shdr->sh_size;
  376                 }
  377 
  378                 if (s->is_pasted_text) {
  379                     s->offset = pasted_text_size;
  380                     pasted_text_size += shdr->sh_size;
  381                 } else {
  382                     rx_align = align(rx_align, sh_align);
  383                     rx_size = align(rx_size, sh_align);
  384                     s->offset = rx_size;
  385                     rx_size += shdr->sh_size;
  386                 }
  387             } else if (shdr->sh_type == SHT_SYMTAB) {
  388                 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
  389                     goto fail;
  390             }
  391         }
  392 
  393         uint64_t part_lds_size = shared_lds_size;
  394         if (!layout_symbols(
  395             util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
  396             util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
  397             &part_lds_size))
  398             goto fail;
  399         binary->lds_size = MAX2(binary->lds_size, part_lds_size);
  400     }
  401 
  402     binary->rx_end_markers = pasted_text_size;
  403     pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
  404 
  405     /* __lds_end is a special symbol that points at the end of the memory
  406      * occupied by other LDS symbols. Its alignment is taken as the
  407      * maximum of its alignment over all shader parts where it occurs.
  408      */
  409     if (lds_end_align) {
  410         binary->lds_size = align(binary->lds_size, lds_end_align);
  411 
  412         struct ac_rtld_symbol *lds_end =
  413             util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
  414         lds_end->name = "__lds_end";
  415         lds_end->size = 0;
  416         lds_end->align = lds_end_align;
  417         lds_end->offset = binary->lds_size;
  418         lds_end->part_idx = ~0u;
  419     }
  420 
  421     if (binary->lds_size > max_lds_size) {
  422         fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
  423             (unsigned)binary->lds_size, max_lds_size);
  424         goto fail;
  425     }
  426 
  427     /* Second pass: Adjust offsets of non-pasted text sections. */
  428     binary->rx_size = pasted_text_size;
  429     binary->rx_size = align(binary->rx_size, rx_align);
  430 
  431     for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
  432         struct ac_rtld_part *part = &binary->parts[part_idx];
  433         size_t num_shdrs;
  434         elf_getshdrnum(part->elf, &num_shdrs);
  435 
  436         for (unsigned j = 0; j < num_shdrs; ++j) {
  437             struct ac_rtld_section *s = &part->sections[j];
  438             if (s->is_rx && !s->is_pasted_text)
  439                 s->offset += binary->rx_size;
  440         }
  441     }
  442 
  443     binary->rx_size += rx_size;
  444     binary->exec_size = exec_size;
  445 
  446     if (i.info->chip_class >= GFX10) {
  447         /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
  448          * ahead of the PC, configurable by SH_MEM_CONFIG and
  449          * S_INST_PREFETCH. This can cause two issues:
  450          *
  451          * (1) Crossing a page boundary to an unmapped page. The logic
  452          *     does not distinguish between a required fetch and a "mere"
  453          *     prefetch and will fault.
  454          *
  455          * (2) Prefetching instructions that will be changed for a
  456          *     different shader.
  457          *
  458          * (2) is not currently an issue because we flush the I$ at IB
  459          * boundaries, but (1) needs to be addressed. Due to buffer
  460          * suballocation, we just play it safe.
  461          */
  462         binary->rx_size = align(binary->rx_size + 3 * 64, 64);
  463     }
  464 
  465     return true;
  466 
  467 #undef report_if
  468 #undef report_elf_if
  469 
  470 fail:
  471     ac_rtld_close(binary);
  472     return false;
  473 }
  474 
  475 void ac_rtld_close(struct ac_rtld_binary *binary)
  476 {
  477     for (unsigned i = 0; i < binary->num_parts; ++i) {
  478         struct ac_rtld_part *part = &binary->parts[i];
  479         free(part->sections);
  480         elf_end(part->elf);
  481     }
  482 
  483     util_dynarray_fini(&binary->lds_symbols);
  484     free(binary->parts);
  485     binary->parts = NULL;
  486     binary->num_parts = 0;
  487 }
  488 
  489 static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
  490                 const char **data, size_t *nbytes)
  491 {
  492     for (unsigned i = 0; i < part->num_sections; ++i) {
  493         struct ac_rtld_section *s = &part->sections[i];
  494         if (s->name && !strcmp(name, s->name)) {
  495             Elf_Scn *target_scn = elf_getscn(part->elf, i);
  496             Elf_Data *target_data = elf_getdata(target_scn, NULL);
  497             if (!target_data) {
  498                 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
  499                 return false;
  500             }
  501 
  502             *data = target_data->d_buf;
  503             *nbytes = target_data->d_size;
  504             return true;
  505         }
  506     }
  507     return false;
  508 }
  509 
  510 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
  511                  const char **data, size_t *nbytes)
  512 {
  513     assert(binary->num_parts == 1);
  514     return get_section_by_name(&binary->parts[0], name, data, nbytes);
  515 }
  516 
  517 bool ac_rtld_read_config(struct ac_rtld_binary *binary,
  518              struct ac_shader_config *config)
  519 {
  520     for (unsigned i = 0; i < binary->num_parts; ++i) {
  521         struct ac_rtld_part *part = &binary->parts[i];
  522         const char *config_data;
  523         size_t config_nbytes;
  524 
  525         if (!get_section_by_name(part, ".AMDGPU.config",
  526                      &config_data, &config_nbytes))
  527             return false;
  528 
  529         /* TODO: be precise about scratch use? */
  530         struct ac_shader_config c = {};
  531         ac_parse_shader_binary_config(config_data, config_nbytes,
  532                           binary->wave_size, true, &c);
  533 
  534         config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
  535         config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
  536         config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
  537         config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
  538         config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
  539                               c.scratch_bytes_per_wave);
  540 
  541         assert(i == 0 || config->float_mode == c.float_mode);
  542         config->float_mode = c.float_mode;
  543 
  544         /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
  545          * the main shader part is used. */
  546         assert(config->spi_ps_input_ena == 0 &&
  547                config->spi_ps_input_addr == 0);
  548         config->spi_ps_input_ena = c.spi_ps_input_ena;
  549         config->spi_ps_input_addr = c.spi_ps_input_addr;
  550 
  551         /* TODO: consistently use LDS symbols for this */
  552         config->lds_size = MAX2(config->lds_size, c.lds_size);
  553 
  554         /* TODO: Should we combine these somehow? It's currently only
  555          * used for radeonsi's compute, where multiple parts aren't used. */
  556         assert(config->rsrc1 == 0 && config->rsrc2 == 0);
  557         config->rsrc1 = c.rsrc1;
  558         config->rsrc2 = c.rsrc2;
  559     }
  560 
  561     return true;
  562 }
  563 
  564 static bool resolve_symbol(const struct ac_rtld_upload_info *u,
  565                unsigned part_idx, const Elf64_Sym *sym,
  566                const char *name, uint64_t *value)
  567 {
  568     /* TODO: properly disentangle the undef and the LDS cases once
  569      * STT_AMDGPU_LDS is retired. */
  570     if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
  571         const struct ac_rtld_symbol *lds_sym =
  572             find_symbol(&u->binary->lds_symbols, name, part_idx);
  573 
  574         if (lds_sym) {
  575             *value = lds_sym->offset;
  576             return true;
  577         }
  578 
  579         /* TODO: resolve from other parts */
  580 
  581         if (u->get_external_symbol(u->cb_data, name, value))
  582             return true;
  583 
  584         report_errorf("symbol %s: unknown", name);
  585         return false;
  586     }
  587 
  588     struct ac_rtld_part *part = &u->binary->parts[part_idx];
  589     if (sym->st_shndx >= part->num_sections) {
  590         report_errorf("symbol %s: section out of bounds", name);
  591         return false;
  592     }
  593 
  594     struct ac_rtld_section *s = &part->sections[sym->st_shndx];
  595     if (!s->is_rx) {
  596         report_errorf("symbol %s: bad section", name);
  597         return false;
  598     }
  599 
  600     uint64_t section_base = u->rx_va + s->offset;
  601 
  602     *value = section_base + sym->st_value;
  603     return true;
  604 }
  605 
  606 static bool apply_relocs(const struct ac_rtld_upload_info *u,
  607              unsigned part_idx, const Elf64_Shdr *reloc_shdr,
  608              const Elf_Data *reloc_data)
  609 {
  610 #define report_if(cond) \
  611     do { \
  612         if ((cond)) { \
  613             report_errorf(#cond); \
  614             return false; \
  615         } \
  616     } while (false)
  617 #define report_elf_if(cond) \
  618     do { \
  619         if ((cond)) { \
  620             report_elf_errorf(#cond); \
  621             return false; \
  622         } \
  623     } while (false)
  624 
  625     struct ac_rtld_part *part = &u->binary->parts[part_idx];
  626     Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
  627     report_elf_if(!target_scn);
  628 
  629     Elf_Data *target_data = elf_getdata(target_scn, NULL);
  630     report_elf_if(!target_data);
  631 
  632     Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
  633     report_elf_if(!symbols_scn);
  634 
  635     Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
  636     report_elf_if(!symbols_shdr);
  637     uint32_t strtabidx = symbols_shdr->sh_link;
  638 
  639     Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
  640     report_elf_if(!symbols_data);
  641 
  642     const Elf64_Sym *symbols = symbols_data->d_buf;
  643     size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
  644 
  645     struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
  646     report_if(!s->is_rx);
  647 
  648     const char *orig_base = target_data->d_buf;
  649     char *dst_base = u->rx_ptr + s->offset;
  650     uint64_t va_base = u->rx_va + s->offset;
  651 
  652     Elf64_Rel *rel = reloc_data->d_buf;
  653     size_t num_relocs = reloc_data->d_size / sizeof(*rel);
  654     for (size_t i = 0; i < num_relocs; ++i, ++rel) {
  655         size_t r_sym = ELF64_R_SYM(rel->r_info);
  656         unsigned r_type = ELF64_R_TYPE(rel->r_info);
  657 
  658         const char *orig_ptr = orig_base + rel->r_offset;
  659         char *dst_ptr = dst_base + rel->r_offset;
  660         uint64_t va = va_base + rel->r_offset;
  661 
  662         uint64_t symbol;
  663         uint64_t addend;
  664 
  665         if (r_sym == STN_UNDEF) {
  666             symbol = 0;
  667         } else {
  668             report_elf_if(r_sym >= num_symbols);
  669 
  670             const Elf64_Sym *sym = &symbols[r_sym];
  671             const char *symbol_name =
  672                 elf_strptr(part->elf, strtabidx, sym->st_name);
  673             report_elf_if(!symbol_name);
  674 
  675             if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
  676                 return false;
  677         }
  678 
  679         /* TODO: Should we also support .rela sections, where the
  680          * addend is part of the relocation record? */
  681 
  682         /* Load the addend from the ELF instead of the destination,
  683          * because the destination may be in VRAM. */
  684         switch (r_type) {
  685         case R_AMDGPU_ABS32:
  686         case R_AMDGPU_ABS32_LO:
  687         case R_AMDGPU_ABS32_HI:
  688         case R_AMDGPU_REL32:
  689         case R_AMDGPU_REL32_LO:
  690         case R_AMDGPU_REL32_HI:
  691             addend = *(const uint32_t *)orig_ptr;
  692             break;
  693         case R_AMDGPU_ABS64:
  694         case R_AMDGPU_REL64:
  695             addend = *(const uint64_t *)orig_ptr;
  696             break;
  697         default:
  698             report_errorf("unsupported r_type == %u", r_type);
  699             return false;
  700         }
  701 
  702         uint64_t abs = symbol + addend;
  703 
  704         switch (r_type) {
  705         case R_AMDGPU_ABS32:
  706             assert((uint32_t)abs == abs);
  707         case R_AMDGPU_ABS32_LO:
  708             *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
  709             break;
  710         case R_AMDGPU_ABS32_HI:
  711             *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
  712             break;
  713         case R_AMDGPU_ABS64:
  714             *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
  715             break;
  716         case R_AMDGPU_REL32:
  717             assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
  718         case R_AMDGPU_REL32_LO:
  719             *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
  720             break;
  721         case R_AMDGPU_REL32_HI:
  722             *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
  723             break;
  724         case R_AMDGPU_REL64:
  725             *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
  726             break;
  727         default:
  728             unreachable("bad r_type");
  729         }
  730     }
  731 
  732     return true;
  733 
  734 #undef report_if
  735 #undef report_elf_if
  736 }
  737 
  738 /**
  739  * Upload the binary or binaries to the provided GPU buffers, including
  740  * relocations.
  741  */
  742 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
  743 {
  744 #define report_if(cond) \
  745     do { \
  746         if ((cond)) { \
  747             report_errorf(#cond); \
  748             return false; \
  749         } \
  750     } while (false)
  751 #define report_elf_if(cond) \
  752     do { \
  753         if ((cond)) { \
  754             report_errorf(#cond); \
  755             return false; \
  756         } \
  757     } while (false)
  758 
  759     if (u->binary->options.halt_at_entry) {
  760         /* s_sethalt 1 */
  761         *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
  762     }
  763 
  764     /* First pass: upload raw section data and lay out private LDS symbols. */
  765     for (unsigned i = 0; i < u->binary->num_parts; ++i) {
  766         struct ac_rtld_part *part = &u->binary->parts[i];
  767 
  768         Elf_Scn *section = NULL;
  769         while ((section = elf_nextscn(part->elf, section))) {
  770             Elf64_Shdr *shdr = elf64_getshdr(section);
  771             struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
  772 
  773             if (!s->is_rx)
  774                 continue;
  775 
  776             report_if(shdr->sh_type != SHT_PROGBITS);
  777 
  778             Elf_Data *data = elf_getdata(section, NULL);
  779             report_elf_if(!data || data->d_size != shdr->sh_size);
  780             memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
  781         }
  782     }
  783 
  784     if (u->binary->rx_end_markers) {
  785         uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
  786         for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
  787             *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
  788     }
  789 
  790     /* Second pass: handle relocations, overwriting uploaded data where
  791      * appropriate. */
  792     for (unsigned i = 0; i < u->binary->num_parts; ++i) {
  793         struct ac_rtld_part *part = &u->binary->parts[i];
  794         Elf_Scn *section = NULL;
  795         while ((section = elf_nextscn(part->elf, section))) {
  796             Elf64_Shdr *shdr = elf64_getshdr(section);
  797             if (shdr->sh_type == SHT_REL) {
  798                 Elf_Data *relocs = elf_getdata(section, NULL);
  799                 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
  800                 if (!apply_relocs(u, i, shdr, relocs))
  801                     return false;
  802             } else if (shdr->sh_type == SHT_RELA) {
  803                 report_errorf("SHT_RELA not supported");
  804                 return false;
  805             }
  806         }
  807     }
  808 
  809     return true;
  810 
  811 #undef report_if
  812 #undef report_elf_if
  813 }