"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c" (16 Sep 2020, 22917 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nv50_shader_state.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2008 Ben Skeggs
    3  * Copyright 2010 Christoph Bumiller
    4  *
    5  * Permission is hereby granted, free of charge, to any person obtaining a
    6  * copy of this software and associated documentation files (the "Software"),
    7  * to deal in the Software without restriction, including without limitation
    8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    9  * and/or sell copies of the Software, and to permit persons to whom the
   10  * Software is furnished to do so, subject to the following conditions:
   11  *
   12  * The above copyright notice and this permission notice shall be included in
   13  * all copies or substantial portions of the Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   21  * OTHER DEALINGS IN THE SOFTWARE.
   22  */
   23 
   24 #include "pipe/p_context.h"
   25 #include "pipe/p_defines.h"
   26 #include "pipe/p_state.h"
   27 #include "util/u_inlines.h"
   28 
   29 #include "nv50/nv50_context.h"
   30 #include "nv50/nv50_query_hw.h"
   31 
   32 #include "nv50/nv50_compute.xml.h"
   33 
   34 void
   35 nv50_constbufs_validate(struct nv50_context *nv50)
   36 {
   37    struct nouveau_pushbuf *push = nv50->base.pushbuf;
   38    unsigned s;
   39 
   40    for (s = 0; s < 3; ++s) {
   41       unsigned p;
   42 
   43       if (s == PIPE_SHADER_FRAGMENT)
   44          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
   45       else
   46       if (s == PIPE_SHADER_GEOMETRY)
   47          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
   48       else
   49          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
   50 
   51       while (nv50->constbuf_dirty[s]) {
   52          const unsigned i = (unsigned)ffs(nv50->constbuf_dirty[s]) - 1;
   53 
   54          assert(i < NV50_MAX_PIPE_CONSTBUFS);
   55          nv50->constbuf_dirty[s] &= ~(1 << i);
   56 
   57          if (nv50->constbuf[s][i].user) {
   58             const unsigned b = NV50_CB_PVP + s;
   59             unsigned start = 0;
   60             unsigned words = nv50->constbuf[s][0].size / 4;
   61             if (i) {
   62                NOUVEAU_ERR("user constbufs only supported in slot 0\n");
   63                continue;
   64             }
   65             if (!nv50->state.uniform_buffer_bound[s]) {
   66                nv50->state.uniform_buffer_bound[s] = true;
   67                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
   68                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
   69             }
   70             while (words) {
   71                unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
   72 
   73                PUSH_SPACE(push, nr + 3);
   74                BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
   75                PUSH_DATA (push, (start << 8) | b);
   76                BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
   77                PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
   78 
   79                start += nr;
   80                words -= nr;
   81             }
   82          } else {
   83             struct nv04_resource *res =
   84                nv04_resource(nv50->constbuf[s][i].u.buf);
   85             if (res) {
   86                /* TODO: allocate persistent bindings */
   87                const unsigned b = s * 16 + i;
   88 
   89                assert(nouveau_resource_mapped_by_gpu(&res->base));
   90 
   91                BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   92                PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
   93                PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
   94                PUSH_DATA (push, (b << 16) |
   95                           (nv50->constbuf[s][i].size & 0xffff));
   96                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
   97                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
   98 
   99                BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD);
  100 
  101                nv50->cb_dirty = 1; /* Force cache flush for UBO. */
  102                res->cb_bindings[s] |= 1 << i;
  103             } else {
  104                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
  105                PUSH_DATA (push, (i << 8) | p | 0);
  106             }
  107             if (i == 0)
  108                nv50->state.uniform_buffer_bound[s] = false;
  109          }
  110       }
  111    }
  112 }
  113 
  114 static bool
  115 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
  116 {
  117    if (!prog->translated) {
  118       prog->translated = nv50_program_translate(
  119          prog, nv50->screen->base.device->chipset, &nv50->base.debug);
  120       if (!prog->translated)
  121          return false;
  122    } else
  123    if (prog->mem)
  124       return true;
  125 
  126    return nv50_program_upload_code(nv50, prog);
  127 }
  128 
  129 static inline void
  130 nv50_program_update_context_state(struct nv50_context *nv50,
  131                                   struct nv50_program *prog, int stage)
  132 {
  133    const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
  134 
  135    if (prog && prog->tls_space) {
  136       if (nv50->state.new_tls_space)
  137          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
  138       if (!nv50->state.tls_required || nv50->state.new_tls_space)
  139          BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo);
  140       nv50->state.new_tls_space = false;
  141       nv50->state.tls_required |= 1 << stage;
  142    } else {
  143       if (nv50->state.tls_required == (1 << stage))
  144          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
  145       nv50->state.tls_required &= ~(1 << stage);
  146    }
  147 }
  148 
  149 void
  150 nv50_vertprog_validate(struct nv50_context *nv50)
  151 {
  152    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  153    struct nv50_program *vp = nv50->vertprog;
  154 
  155    if (!nv50_program_validate(nv50, vp))
  156          return;
  157    nv50_program_update_context_state(nv50, vp, 0);
  158 
  159    BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
  160    PUSH_DATA (push, vp->vp.attrs[0]);
  161    PUSH_DATA (push, vp->vp.attrs[1]);
  162    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
  163    PUSH_DATA (push, vp->max_out);
  164    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
  165    PUSH_DATA (push, vp->max_gpr);
  166    BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
  167    PUSH_DATA (push, vp->code_base);
  168 }
  169 
  170 void
  171 nv50_fragprog_validate(struct nv50_context *nv50)
  172 {
  173    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  174    struct nv50_program *fp = nv50->fragprog;
  175    struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
  176 
  177    if (!fp || !rast)
  178       return;
  179 
  180    if (nv50->zsa && nv50->zsa->pipe.alpha.enabled) {
  181       struct pipe_framebuffer_state *fb = &nv50->framebuffer;
  182       bool blendable = fb->nr_cbufs == 0 || !fb->cbufs[0] ||
  183          nv50->screen->base.base.is_format_supported(
  184                &nv50->screen->base.base,
  185                fb->cbufs[0]->format,
  186                fb->cbufs[0]->texture->target,
  187                fb->cbufs[0]->texture->nr_samples,
  188                fb->cbufs[0]->texture->nr_storage_samples,
  189                PIPE_BIND_BLENDABLE);
  190       /* If we already have alphatest code, we have to keep updating
  191        * it. However we only have to have different code if the current RT0 is
  192        * non-blendable. Otherwise we just set it to always pass and use the
  193        * hardware alpha test.
  194        */
  195       if (fp->fp.alphatest || !blendable) {
  196          uint8_t alphatest = PIPE_FUNC_ALWAYS + 1;
  197          if (!blendable)
  198             alphatest = nv50->zsa->pipe.alpha.func + 1;
  199          if (!fp->fp.alphatest)
  200             nv50_program_destroy(nv50, fp);
  201          else if (fp->mem && fp->fp.alphatest != alphatest)
  202             nouveau_heap_free(&fp->mem);
  203 
  204          fp->fp.alphatest = alphatest;
  205       }
  206    } else if (fp->fp.alphatest && fp->fp.alphatest != PIPE_FUNC_ALWAYS + 1) {
  207       /* Alpha test is disabled but we have a shader where it's filled
  208        * in. Make sure to reset the function to 'always', otherwise it'll end
  209        * up discarding fragments incorrectly.
  210        */
  211       if (fp->mem)
  212          nouveau_heap_free(&fp->mem);
  213 
  214       fp->fp.alphatest = PIPE_FUNC_ALWAYS + 1;
  215    }
  216 
  217    if (fp->fp.force_persample_interp != rast->force_persample_interp) {
  218       /* Force the program to be reuploaded, which will trigger interp fixups
  219        * to get applied
  220        */
  221       if (fp->mem)
  222          nouveau_heap_free(&fp->mem);
  223 
  224       fp->fp.force_persample_interp = rast->force_persample_interp;
  225    }
  226 
  227    if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))
  228       return;
  229 
  230    if (!nv50_program_validate(nv50, fp))
  231       return;
  232    nv50_program_update_context_state(nv50, fp, 1);
  233 
  234    BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
  235    PUSH_DATA (push, fp->max_gpr);
  236    BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
  237    PUSH_DATA (push, fp->max_out);
  238    BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
  239    PUSH_DATA (push, fp->fp.flags[0]);
  240    BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
  241    PUSH_DATA (push, fp->fp.flags[1]);
  242    BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
  243    PUSH_DATA (push, fp->code_base);
  244 
  245    if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) {
  246       BEGIN_NV04(push, SUBC_3D(NVA3_3D_FP_MULTISAMPLE), 1);
  247       if (nv50->min_samples > 1 || fp->fp.has_samplemask)
  248          PUSH_DATA(push,
  249                    NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE |
  250                    (NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK *
  251                     fp->fp.has_samplemask));
  252       else
  253          PUSH_DATA(push, 0);
  254    }
  255 }
  256 
  257 void
  258 nv50_gmtyprog_validate(struct nv50_context *nv50)
  259 {
  260    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  261    struct nv50_program *gp = nv50->gmtyprog;
  262 
  263    if (gp) {
  264       if (!nv50_program_validate(nv50, gp))
  265          return;
  266       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
  267       PUSH_DATA (push, gp->max_gpr);
  268       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
  269       PUSH_DATA (push, gp->max_out);
  270       BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
  271       PUSH_DATA (push, gp->gp.prim_type);
  272       BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
  273       PUSH_DATA (push, gp->gp.vert_count);
  274       BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
  275       PUSH_DATA (push, gp->code_base);
  276 
  277       nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
  278    }
  279    nv50_program_update_context_state(nv50, gp, 2);
  280 
  281    /* GP_ENABLE is updated in linkage validation */
  282 }
  283 
  284 void
  285 nv50_compprog_validate(struct nv50_context *nv50)
  286 {
  287    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  288    struct nv50_program *cp = nv50->compprog;
  289 
  290    if (cp && !nv50_program_validate(nv50, cp))
  291       return;
  292 
  293    BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1);
  294    PUSH_DATA (push, 0);
  295 }
  296 
  297 static void
  298 nv50_sprite_coords_validate(struct nv50_context *nv50)
  299 {
  300    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  301    uint32_t pntc[8], mode;
  302    struct nv50_program *fp = nv50->fragprog;
  303    unsigned i, c;
  304    unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
  305 
  306    if (!nv50->rast->pipe.point_quad_rasterization) {
  307       if (nv50->state.point_sprite) {
  308          BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
  309          for (i = 0; i < 8; ++i)
  310             PUSH_DATA(push, 0);
  311 
  312          nv50->state.point_sprite = false;
  313       }
  314       return;
  315    } else {
  316       nv50->state.point_sprite = true;
  317    }
  318 
  319    memset(pntc, 0, sizeof(pntc));
  320 
  321    for (i = 0; i < fp->in_nr; i++) {
  322       unsigned n = util_bitcount(fp->in[i].mask);
  323 
  324       if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
  325          m += n;
  326          continue;
  327       }
  328       if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
  329          m += n;
  330          continue;
  331       }
  332 
  333       for (c = 0; c < 4; ++c) {
  334          if (fp->in[i].mask & (1 << c)) {
  335             pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
  336             ++m;
  337          }
  338       }
  339    }
  340 
  341    if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
  342       mode = 0x00;
  343    else
  344       mode = 0x10;
  345 
  346    BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
  347    PUSH_DATA (push, mode);
  348 
  349    BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
  350    PUSH_DATAp(push, pntc, 8);
  351 }
  352 
  353 /* Validate state derived from shaders and the rasterizer cso. */
  354 void
  355 nv50_validate_derived_rs(struct nv50_context *nv50)
  356 {
  357    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  358    uint32_t color, psize;
  359 
  360    nv50_sprite_coords_validate(nv50);
  361 
  362    if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
  363       nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
  364       BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
  365       PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
  366    }
  367 
  368    if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG)
  369       return;
  370    psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
  371    color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
  372 
  373    if (nv50->rast->pipe.clamp_vertex_color)
  374       color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
  375 
  376    if (color != nv50->state.semantic_color) {
  377       nv50->state.semantic_color = color;
  378       BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
  379       PUSH_DATA (push, color);
  380    }
  381 
  382    if (nv50->rast->pipe.point_size_per_vertex)
  383       psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
  384 
  385    if (psize != nv50->state.semantic_psize) {
  386       nv50->state.semantic_psize = psize;
  387       BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
  388       PUSH_DATA (push, psize);
  389    }
  390 }
  391 
  392 static int
  393 nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
  394               struct nv50_varying *in, struct nv50_varying *out)
  395 {
  396    int c;
  397    uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
  398 
  399    for (c = 0; c < 4; ++c) {
  400       if (mf & 1) {
  401          if (in->linear)
  402             lin[mid / 32] |= 1 << (mid % 32);
  403          if (mv & 1)
  404             map[mid] = oid;
  405          else
  406          if (c == 3)
  407             map[mid] |= 1;
  408          ++mid;
  409       }
  410 
  411       oid += mv & 1;
  412       mf >>= 1;
  413       mv >>= 1;
  414    }
  415 
  416    return mid;
  417 }
  418 
  419 void
  420 nv50_fp_linkage_validate(struct nv50_context *nv50)
  421 {
  422    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  423    struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
  424    struct nv50_program *fp = nv50->fragprog;
  425    struct nv50_varying dummy;
  426    int i, n, c, m;
  427    uint32_t primid = 0;
  428    uint32_t layerid = 0;
  429    uint32_t viewportid = 0;
  430    uint32_t psiz = 0x000;
  431    uint32_t interp = fp->fp.interp;
  432    uint32_t colors = fp->fp.colors;
  433    uint32_t clpd_nr = util_last_bit(vp->vp.clip_enable | vp->vp.cull_enable);
  434    uint32_t lin[4];
  435    uint8_t map[64];
  436    uint8_t so_map[64];
  437 
  438    if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |
  439                            NV50_NEW_3D_FRAGPROG |
  440                            NV50_NEW_3D_GMTYPROG))) {
  441       uint8_t bfc, ffc;
  442       ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
  443       bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
  444          >> 8;
  445       if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
  446          return;
  447    }
  448 
  449    memset(lin, 0x00, sizeof(lin));
  450 
  451    /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
  452     *  or is it the first byte ?
  453     */
  454    memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
  455 
  456    dummy.mask = 0xf; /* map all components of HPOS */
  457    dummy.linear = 0;
  458    m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
  459 
  460    for (c = 0; c < clpd_nr; ++c)
  461       map[m++] = vp->vp.clpd[c / 4] + (c % 4);
  462 
  463    colors |= m << 8; /* adjust BFC0 id */
  464 
  465    dummy.mask = 0x0;
  466 
  467    /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
  468    if (nv50->rast->pipe.light_twoside) {
  469       for (i = 0; i < 2; ++i) {
  470          n = vp->vp.bfc[i];
  471          if (fp->vp.bfc[i] >= fp->in_nr)
  472             continue;
  473          m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
  474                            (n < vp->out_nr) ? &vp->out[n] : &dummy);
  475       }
  476    }
  477    colors += m - 4; /* adjust FFC0 id */
  478    interp |= m << 8; /* set map id where 'normal' FP inputs start */
  479 
  480    for (i = 0; i < fp->in_nr; ++i) {
  481       for (n = 0; n < vp->out_nr; ++n)
  482          if (vp->out[n].sn == fp->in[i].sn &&
  483              vp->out[n].si == fp->in[i].si)
  484             break;
  485       switch (fp->in[i].sn) {
  486       case TGSI_SEMANTIC_PRIMID:
  487          primid = m;
  488          break;
  489       case TGSI_SEMANTIC_LAYER:
  490          layerid = m;
  491          break;
  492       case TGSI_SEMANTIC_VIEWPORT_INDEX:
  493          viewportid = m;
  494          break;
  495       }
  496       m = nv50_vec4_map(map, m, lin,
  497                         &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
  498    }
  499 
  500    if (vp->gp.has_layer && !layerid) {
  501       layerid = m;
  502       map[m++] = vp->gp.layerid;
  503    }
  504 
  505    if (vp->gp.has_viewport && !viewportid) {
  506       viewportid = m;
  507       map[m++] = vp->gp.viewportid;
  508    }
  509 
  510    if (nv50->rast->pipe.point_size_per_vertex) {
  511       psiz = (m << 4) | 1;
  512       map[m++] = vp->vp.psiz;
  513    }
  514 
  515    if (nv50->rast->pipe.clamp_vertex_color)
  516       colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
  517 
  518    if (unlikely(vp->so)) {
  519       /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
  520        * gets written.
  521        *
  522        * TODO:
  523        * Inverting vp->so->map (output -> offset) would probably speed this up.
  524        */
  525       memset(so_map, 0, sizeof(so_map));
  526       for (i = 0; i < vp->so->map_size; ++i) {
  527          if (vp->so->map[i] == 0xff)
  528             continue;
  529          for (c = 0; c < m; ++c)
  530             if (map[c] == vp->so->map[i] && !so_map[c])
  531                break;
  532          if (c == m) {
  533             c = m;
  534             map[m++] = vp->so->map[i];
  535          }
  536          so_map[c] = 0x80 | i;
  537       }
  538       for (c = m; c & 3; ++c)
  539          so_map[c] = 0;
  540    }
  541 
  542    n = (m + 3) / 4;
  543    assert(m <= 64);
  544 
  545    if (unlikely(nv50->gmtyprog)) {
  546       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
  547       PUSH_DATA (push, m);
  548       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
  549       PUSH_DATAp(push, map, n);
  550    } else {
  551       BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
  552       PUSH_DATA (push, vp->vp.attrs[2] | fp->vp.attrs[2]);
  553 
  554       BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
  555       PUSH_DATA (push, primid);
  556 
  557       assert(m > 0);
  558       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
  559       PUSH_DATA (push, m);
  560       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
  561       PUSH_DATAp(push, map, n);
  562    }
  563 
  564    BEGIN_NV04(push, NV50_3D(GP_VIEWPORT_ID_ENABLE), 5);
  565    PUSH_DATA (push, vp->gp.has_viewport);
  566    PUSH_DATA (push, colors);
  567    PUSH_DATA (push, (clpd_nr << 8) | 4);
  568    PUSH_DATA (push, layerid);
  569    PUSH_DATA (push, psiz);
  570 
  571    BEGIN_NV04(push, NV50_3D(SEMANTIC_VIEWPORT), 1);
  572    PUSH_DATA (push, viewportid);
  573 
  574    BEGIN_NV04(push, NV50_3D(LAYER), 1);
  575    PUSH_DATA (push, vp->gp.has_layer << 16);
  576 
  577    BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
  578    PUSH_DATA (push, interp);
  579 
  580    nv50->state.interpolant_ctrl = interp;
  581 
  582    nv50->state.semantic_color = colors;
  583    nv50->state.semantic_psize = psiz;
  584 
  585    BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
  586    PUSH_DATAp(push, lin, 4);
  587 
  588    BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
  589    PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
  590 
  591    if (vp->so) {
  592       BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
  593       PUSH_DATAp(push, so_map, n);
  594    }
  595 }
  596 
  597 static int
  598 nv50_vp_gp_mapping(uint8_t *map, int m,
  599                    struct nv50_program *vp, struct nv50_program *gp)
  600 {
  601    int i, j, c;
  602 
  603    for (i = 0; i < gp->in_nr; ++i) {
  604       uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
  605 
  606       for (j = 0; j < vp->out_nr; ++j) {
  607          if (vp->out[j].sn == gp->in[i].sn &&
  608              vp->out[j].si == gp->in[i].si) {
  609             mv = vp->out[j].mask;
  610             oid = vp->out[j].hw;
  611             break;
  612          }
  613       }
  614 
  615       for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
  616          if (mg & mv & 1)
  617             map[m++] = oid;
  618          else
  619          if (mg & 1)
  620             map[m++] = (c == 3) ? 0x41 : 0x40;
  621          oid += mv & 1;
  622       }
  623    }
  624    if (!m)
  625       map[m++] = 0;
  626    return m;
  627 }
  628 
  629 void
  630 nv50_gp_linkage_validate(struct nv50_context *nv50)
  631 {
  632    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  633    struct nv50_program *vp = nv50->vertprog;
  634    struct nv50_program *gp = nv50->gmtyprog;
  635    int m = 0;
  636    int n;
  637    uint8_t map[64];
  638 
  639    if (!gp)
  640       return;
  641    memset(map, 0, sizeof(map));
  642 
  643    m = nv50_vp_gp_mapping(map, m, vp, gp);
  644 
  645    n = (m + 3) / 4;
  646 
  647    BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
  648    PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
  649 
  650    assert(m > 0);
  651    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
  652    PUSH_DATA (push, m);
  653    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
  654    PUSH_DATAp(push, map, n);
  655 }
  656 
  657 void
  658 nv50_stream_output_validate(struct nv50_context *nv50)
  659 {
  660    struct nouveau_pushbuf *push = nv50->base.pushbuf;
  661    struct nv50_stream_output_state *so;
  662    uint32_t ctrl;
  663    unsigned i;
  664    unsigned prims = ~0;
  665 
  666    so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
  667 
  668    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
  669    PUSH_DATA (push, 0);
  670    if (!so || !nv50->num_so_targets) {
  671       if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
  672          BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
  673          PUSH_DATA (push, 0);
  674       }
  675       BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
  676       PUSH_DATA (push, 1);
  677       return;
  678    }
  679 
  680    /* previous TFB needs to complete */
  681    if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
  682       BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
  683       PUSH_DATA (push, 0);
  684    }
  685 
  686    ctrl = so->ctrl;
  687    if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
  688       ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
  689 
  690    BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
  691    PUSH_DATA (push, ctrl);
  692 
  693    for (i = 0; i < nv50->num_so_targets; ++i) {
  694       struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
  695       struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
  696 
  697       const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
  698 
  699       if (n == 4 && !targ->clean)
  700          nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
  701       BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
  702       PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
  703       PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
  704       PUSH_DATA (push, so->num_attribs[i]);
  705       if (n == 4) {
  706          PUSH_DATA(push, targ->pipe.buffer_size);
  707          if (!targ->clean) {
  708             assert(targ->pq);
  709             nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
  710                                          nv50_query(targ->pq), 0x4);
  711          } else {
  712             BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
  713             PUSH_DATA(push, 0);
  714             targ->clean = false;
  715          }
  716       } else {
  717          const unsigned limit = targ->pipe.buffer_size /
  718             (so->stride[i] * nv50->state.prim_size);
  719          prims = MIN2(prims, limit);
  720       }
  721       targ->stride = so->stride[i];
  722       BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
  723    }
  724    if (prims != ~0) {
  725       BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
  726       PUSH_DATA (push, prims);
  727    }
  728    BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
  729    PUSH_DATA (push, 1);
  730    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
  731    PUSH_DATA (push, 1);
  732 }