"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/nouveau/nv50/nv50_screen.c" (16 Sep 2020, 41621 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nv50_screen.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright 2010 Christoph Bumiller
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice shall be included in
   12  * all copies or substantial portions of the Software.
   13  *
   14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   20  * OTHER DEALINGS IN THE SOFTWARE.
   21  */
   22 
   23 #include <errno.h>
   24 #include <xf86drm.h>
   25 #include <nouveau_drm.h>
   26 #include "util/format/u_format.h"
   27 #include "util/format/u_format_s3tc.h"
   28 #include "util/u_screen.h"
   29 #include "pipe/p_screen.h"
   30 #include "compiler/nir/nir.h"
   31 
   32 #include "nv50/nv50_context.h"
   33 #include "nv50/nv50_screen.h"
   34 
   35 #include "nouveau_vp3_video.h"
   36 
   37 #include "nv_object.xml.h"
   38 
   39 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
   40 #define LOCAL_WARPS_ALLOC 32
   41 /* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
   42 #define STACK_WARPS_ALLOC 32
   43 
   44 #define THREADS_IN_WARP 32
   45 
   46 static bool
   47 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
   48                                 enum pipe_format format,
   49                                 enum pipe_texture_target target,
   50                                 unsigned sample_count,
   51                                 unsigned storage_sample_count,
   52                                 unsigned bindings)
   53 {
   54    if (sample_count > 8)
   55       return false;
   56    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
   57       return false;
   58    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
   59       return false;
   60 
   61    if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
   62       return false;
   63 
   64    switch (format) {
   65    case PIPE_FORMAT_Z16_UNORM:
   66       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
   67          return false;
   68       break;
   69    default:
   70       break;
   71    }
   72 
   73    if (bindings & PIPE_BIND_LINEAR)
   74       if (util_format_is_depth_or_stencil(format) ||
   75           (target != PIPE_TEXTURE_1D &&
   76            target != PIPE_TEXTURE_2D &&
   77            target != PIPE_TEXTURE_RECT) ||
   78           sample_count > 1)
   79          return false;
   80 
   81    /* shared is always supported */
   82    bindings &= ~(PIPE_BIND_LINEAR |
   83                  PIPE_BIND_SHARED);
   84 
   85    return (( nv50_format_table[format].usage |
   86             nv50_vertex_format[format].usage) & bindings) == bindings;
   87 }
   88 
   89 static int
   90 nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   91 {
   92    const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
   93    struct nouveau_device *dev = nouveau_screen(pscreen)->device;
   94 
   95    switch (param) {
   96    /* non-boolean caps */
   97    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
   98       return 8192;
   99    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
  100       return 12;
  101    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
  102       return 14;
  103    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
  104       return 512;
  105    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
  106    case PIPE_CAP_MIN_TEXEL_OFFSET:
  107       return -8;
  108    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
  109    case PIPE_CAP_MAX_TEXEL_OFFSET:
  110       return 7;
  111    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
  112       return 128 * 1024 * 1024;
  113    case PIPE_CAP_GLSL_FEATURE_LEVEL:
  114       return 330;
  115    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
  116       return 330;
  117    case PIPE_CAP_MAX_RENDER_TARGETS:
  118       return 8;
  119    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
  120       return 1;
  121    case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
  122    case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS:
  123       return 8;
  124    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
  125       return 4;
  126    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
  127    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
  128       return 64;
  129    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
  130    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
  131       return 1024;
  132    case PIPE_CAP_MAX_VERTEX_STREAMS:
  133       return 1;
  134    case PIPE_CAP_MAX_GS_INVOCATIONS:
  135       return 0;
  136    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
  137       return 0;
  138    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
  139       return 2048;
  140    case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
  141       return 2047;
  142    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
  143       return 256;
  144    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
  145       return 16; /* 256 for binding as RT, but that's not possible in GL */
  146    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
  147       return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
  148    case PIPE_CAP_MAX_VIEWPORTS:
  149       return NV50_MAX_VIEWPORTS;
  150    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
  151       return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
  152    case PIPE_CAP_ENDIANNESS:
  153       return PIPE_ENDIAN_LITTLE;
  154    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
  155       return (class_3d >= NVA3_3D_CLASS) ? 4 : 0;
  156    case PIPE_CAP_MAX_WINDOW_RECTANGLES:
  157       return NV50_MAX_WINDOW_RECTANGLES;
  158    case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
  159       return 16 * 1024 * 1024;
  160    case PIPE_CAP_MAX_VARYINGS:
  161       return 15;
  162    case PIPE_CAP_MAX_VERTEX_BUFFERS:
  163       return 16;
  164    case PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE:
  165       return 512 * 1024; /* TODO: Investigate tuning this */
  166 
  167    /* supported caps */
  168    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
  169    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
  170    case PIPE_CAP_TEXTURE_SWIZZLE:
  171    case PIPE_CAP_TEXTURE_SHADOW_MAP:
  172    case PIPE_CAP_NPOT_TEXTURES:
  173    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
  174    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
  175    case PIPE_CAP_ANISOTROPIC_FILTER:
  176    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
  177    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
  178    case PIPE_CAP_DEPTH_CLIP_DISABLE:
  179    case PIPE_CAP_POINT_SPRITE:
  180    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
  181    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
  182    case PIPE_CAP_VERTEX_SHADER_SATURATE:
  183    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
  184    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
  185    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
  186    case PIPE_CAP_QUERY_TIMESTAMP:
  187    case PIPE_CAP_QUERY_TIME_ELAPSED:
  188    case PIPE_CAP_OCCLUSION_QUERY:
  189    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
  190    case PIPE_CAP_INDEP_BLEND_ENABLE:
  191    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
  192    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
  193    case PIPE_CAP_PRIMITIVE_RESTART:
  194    case PIPE_CAP_TGSI_INSTANCEID:
  195    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
  196    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
  197    case PIPE_CAP_CONDITIONAL_RENDER:
  198    case PIPE_CAP_TEXTURE_BARRIER:
  199    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
  200    case PIPE_CAP_START_INSTANCE:
  201    case PIPE_CAP_USER_VERTEX_BUFFERS:
  202    case PIPE_CAP_TEXTURE_MULTISAMPLE:
  203    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
  204    case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
  205    case PIPE_CAP_SAMPLER_VIEW_TARGET:
  206    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
  207    case PIPE_CAP_CLIP_HALFZ:
  208    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
  209    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
  210    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
  211    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
  212    case PIPE_CAP_DEPTH_BOUNDS_TEST:
  213    case PIPE_CAP_TGSI_TXQS:
  214    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
  215    case PIPE_CAP_SHAREABLE_SHADERS:
  216    case PIPE_CAP_CLEAR_TEXTURE:
  217    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
  218    case PIPE_CAP_INVALIDATE_BUFFER:
  219    case PIPE_CAP_STRING_MARKER:
  220    case PIPE_CAP_CULL_DISTANCE:
  221    case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
  222    case PIPE_CAP_TGSI_MUL_ZERO_WINS:
  223    case PIPE_CAP_TGSI_TEX_TXF_LZ:
  224    case PIPE_CAP_TGSI_CLOCK:
  225    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
  226    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
  227    case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
  228    case PIPE_CAP_TGSI_DIV:
  229    case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
  230    case PIPE_CAP_FLATSHADE:
  231    case PIPE_CAP_ALPHA_TEST:
  232    case PIPE_CAP_POINT_SIZE_FIXED:
  233    case PIPE_CAP_TWO_SIDED_COLOR:
  234    case PIPE_CAP_CLIP_PLANES:
  235    case PIPE_CAP_PACKED_STREAM_OUTPUT:
  236    case PIPE_CAP_DRAW_INFO_START_WITH_USER_INDICES:
  237       return 1;
  238    case PIPE_CAP_SEAMLESS_CUBE_MAP:
  239       return 1; /* class_3d >= NVA0_3D_CLASS; */
  240    /* supported on nva0+ */
  241    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
  242       return class_3d >= NVA0_3D_CLASS;
  243    /* supported on nva3+ */
  244    case PIPE_CAP_CUBE_MAP_ARRAY:
  245    case PIPE_CAP_INDEP_BLEND_FUNC:
  246    case PIPE_CAP_TEXTURE_QUERY_LOD:
  247    case PIPE_CAP_SAMPLE_SHADING:
  248    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
  249       return class_3d >= NVA3_3D_CLASS;
  250 
  251    /* unsupported caps */
  252    case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
  253    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
  254    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
  255    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
  256    case PIPE_CAP_SHADER_STENCIL_EXPORT:
  257    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
  258    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
  259    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
  260    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
  261    case PIPE_CAP_TGSI_TEXCOORD:
  262    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
  263    case PIPE_CAP_TEXTURE_GATHER_SM5:
  264    case PIPE_CAP_FAKE_SW_MSAA:
  265    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
  266    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
  267    case PIPE_CAP_DRAW_INDIRECT:
  268    case PIPE_CAP_MULTI_DRAW_INDIRECT:
  269    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
  270    case PIPE_CAP_VERTEXID_NOBASE:
  271    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
  272    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
  273    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
  274    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
  275    case PIPE_CAP_DRAW_PARAMETERS:
  276    case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
  277    case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
  278    case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
  279    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
  280    case PIPE_CAP_GENERATE_MIPMAP:
  281    case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
  282    case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
  283    case PIPE_CAP_QUERY_BUFFER_OBJECT:
  284    case PIPE_CAP_QUERY_MEMORY_INFO:
  285    case PIPE_CAP_PCI_GROUP:
  286    case PIPE_CAP_PCI_BUS:
  287    case PIPE_CAP_PCI_DEVICE:
  288    case PIPE_CAP_PCI_FUNCTION:
  289    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
  290    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
  291    case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
  292    case PIPE_CAP_TGSI_VOTE:
  293    case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
  294    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
  295    case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
  296    case PIPE_CAP_NATIVE_FENCE_FD:
  297    case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
  298    case PIPE_CAP_FBFETCH:
  299    case PIPE_CAP_DOUBLES:
  300    case PIPE_CAP_INT64:
  301    case PIPE_CAP_INT64_DIVMOD:
  302    case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
  303    case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
  304    case PIPE_CAP_TGSI_BALLOT:
  305    case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
  306    case PIPE_CAP_POST_DEPTH_COVERAGE:
  307    case PIPE_CAP_BINDLESS_TEXTURE:
  308    case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
  309    case PIPE_CAP_QUERY_SO_OVERFLOW:
  310    case PIPE_CAP_MEMOBJ:
  311    case PIPE_CAP_LOAD_CONSTBUF:
  312    case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
  313    case PIPE_CAP_TILE_RASTER_ORDER:
  314    case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
  315    case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
  316    case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
  317    case PIPE_CAP_CONTEXT_PRIORITY_MASK:
  318    case PIPE_CAP_FENCE_SIGNAL:
  319    case PIPE_CAP_CONSTBUF0_FLAGS:
  320    case PIPE_CAP_PACKED_UNIFORMS:
  321    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
  322    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
  323    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
  324    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
  325    case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
  326    case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
  327    case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
  328    case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
  329    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
  330    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
  331    case PIPE_CAP_SURFACE_SAMPLE_COUNT:
  332    case PIPE_CAP_TGSI_ATOMFADD:
  333    case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
  334    case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
  335    case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
  336    case PIPE_CAP_NIR_COMPACT_ARRAYS:
  337    case PIPE_CAP_COMPUTE:
  338    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
  339    case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
  340    case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
  341    case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
  342    case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
  343    case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
  344    case PIPE_CAP_FBFETCH_COHERENT:
  345    case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
  346    case PIPE_CAP_TGSI_ATOMINC_WRAP:
  347    case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
  348    case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
  349    case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS:
  350    case PIPE_CAP_INTEGER_MULTIPLY_32X16: /* could be done */
  351    case PIPE_CAP_FRONTEND_NOOP:
  352    case PIPE_CAP_GL_SPIRV:
  353    case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
  354    case PIPE_CAP_TEXTURE_SHADOW_LOD:
  355    case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
  356    case PIPE_CAP_PSIZ_CLAMPED:
  357    case PIPE_CAP_VIEWPORT_SWIZZLE:
  358    case PIPE_CAP_VIEWPORT_MASK:
  359       return 0;
  360 
  361    case PIPE_CAP_VENDOR_ID:
  362       return 0x10de;
  363    case PIPE_CAP_DEVICE_ID: {
  364       uint64_t device_id;
  365       if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) {
  366          NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n");
  367          return -1;
  368       }
  369       return device_id;
  370    }
  371    case PIPE_CAP_ACCELERATED:
  372       return 1;
  373    case PIPE_CAP_VIDEO_MEMORY:
  374       return dev->vram_size >> 20;
  375    case PIPE_CAP_UMA:
  376       return 0;
  377 
  378    default:
  379       debug_printf("%s: unhandled cap %d\n", __func__, param);
  380       /* fallthrough */
  381    /* caps where we want the default value */
  382    case PIPE_CAP_DMABUF:
  383    case PIPE_CAP_ESSL_FEATURE_LEVEL:
  384    case PIPE_CAP_THROTTLE:
  385       return u_pipe_screen_get_param_defaults(pscreen, param);
  386    }
  387 }
  388 
  389 static int
  390 nv50_screen_get_shader_param(struct pipe_screen *pscreen,
  391                              enum pipe_shader_type shader,
  392                              enum pipe_shader_cap param)
  393 {
  394    const struct nouveau_screen *screen = nouveau_screen(pscreen);
  395 
  396    switch (shader) {
  397    case PIPE_SHADER_VERTEX:
  398    case PIPE_SHADER_GEOMETRY:
  399    case PIPE_SHADER_FRAGMENT:
  400       break;
  401    case PIPE_SHADER_COMPUTE:
  402    default:
  403       return 0;
  404    }
  405 
  406    switch (param) {
  407    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
  408    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
  409    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
  410    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
  411       return 16384;
  412    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
  413       return 4;
  414    case PIPE_SHADER_CAP_MAX_INPUTS:
  415       if (shader == PIPE_SHADER_VERTEX)
  416          return 32;
  417       return 15;
  418    case PIPE_SHADER_CAP_MAX_OUTPUTS:
  419       return 16;
  420    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
  421       return 65536;
  422    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
  423       return NV50_MAX_PIPE_CONSTBUFS;
  424    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
  425       return shader != PIPE_SHADER_FRAGMENT;
  426    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
  427    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
  428    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
  429       return 1;
  430    case PIPE_SHADER_CAP_MAX_TEMPS:
  431       return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
  432    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
  433       return 1;
  434    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
  435       return 1;
  436    case PIPE_SHADER_CAP_INT64_ATOMICS:
  437    case PIPE_SHADER_CAP_FP16:
  438    case PIPE_SHADER_CAP_SUBROUTINES:
  439       return 0; /* please inline, or provide function declarations */
  440    case PIPE_SHADER_CAP_INTEGERS:
  441       return 1;
  442    case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
  443       return 1;
  444    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
  445       /* The chip could handle more sampler views than samplers */
  446    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
  447       return MIN2(16, PIPE_MAX_SAMPLERS);
  448    case PIPE_SHADER_CAP_PREFERRED_IR:
  449       return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
  450    case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
  451       return 32;
  452    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
  453    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
  454    case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
  455    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
  456    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
  457    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
  458    case PIPE_SHADER_CAP_SUPPORTED_IRS:
  459    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
  460    case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
  461    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
  462    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
  463       return 0;
  464    default:
  465       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
  466       return 0;
  467    }
  468 }
  469 
  470 static float
  471 nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
  472 {
  473    switch (param) {
  474    case PIPE_CAPF_MAX_LINE_WIDTH:
  475    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
  476       return 10.0f;
  477    case PIPE_CAPF_MAX_POINT_WIDTH:
  478    case PIPE_CAPF_MAX_POINT_WIDTH_AA:
  479       return 64.0f;
  480    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
  481       return 16.0f;
  482    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
  483       return 15.0f;
  484    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
  485    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
  486    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
  487       return 0.0f;
  488    }
  489 
  490    NOUVEAU_ERR("unknown PIPE_CAPF %d\n", param);
  491    return 0.0f;
  492 }
  493 
  494 static int
  495 nv50_screen_get_compute_param(struct pipe_screen *pscreen,
  496                               enum pipe_shader_ir ir_type,
  497                               enum pipe_compute_cap param, void *data)
  498 {
  499    struct nv50_screen *screen = nv50_screen(pscreen);
  500 
  501 #define RET(x) do {                  \
  502    if (data)                         \
  503       memcpy(data, x, sizeof(x));    \
  504    return sizeof(x);                 \
  505 } while (0)
  506 
  507    switch (param) {
  508    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
  509       RET((uint64_t []) { 2 });
  510    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
  511       RET(((uint64_t []) { 65535, 65535 }));
  512    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
  513       RET(((uint64_t []) { 512, 512, 64 }));
  514    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
  515       RET((uint64_t []) { 512 });
  516    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
  517       RET((uint64_t []) { 1ULL << 32 });
  518    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
  519       RET((uint64_t []) { 16 << 10 });
  520    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
  521       RET((uint64_t []) { 16 << 10 });
  522    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
  523       RET((uint64_t []) { 4096 });
  524    case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
  525       RET((uint32_t []) { 32 });
  526    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
  527       RET((uint64_t []) { 1ULL << 40 });
  528    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
  529       RET((uint32_t []) { 0 });
  530    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
  531       RET((uint32_t []) { screen->mp_count });
  532    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
  533       RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
  534    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
  535       RET((uint32_t []) { 32 });
  536    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
  537       RET((uint64_t []) { 0 });
  538    default:
  539       return 0;
  540    }
  541 
  542 #undef RET
  543 }
  544 
  545 static void
  546 nv50_screen_destroy(struct pipe_screen *pscreen)
  547 {
  548    struct nv50_screen *screen = nv50_screen(pscreen);
  549 
  550    if (!nouveau_drm_screen_unref(&screen->base))
  551       return;
  552 
  553    if (screen->base.fence.current) {
  554       struct nouveau_fence *current = NULL;
  555 
  556       /* nouveau_fence_wait will create a new current fence, so wait on the
  557        * _current_ one, and remove both.
  558        */
  559       nouveau_fence_ref(screen->base.fence.current, &current);
  560       nouveau_fence_wait(current, NULL);
  561       nouveau_fence_ref(NULL, &current);
  562       nouveau_fence_ref(NULL, &screen->base.fence.current);
  563    }
  564    if (screen->base.pushbuf)
  565       screen->base.pushbuf->user_priv = NULL;
  566 
  567    if (screen->blitter)
  568       nv50_blitter_destroy(screen);
  569    if (screen->pm.prog) {
  570       screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
  571       nv50_program_destroy(NULL, screen->pm.prog);
  572       FREE(screen->pm.prog);
  573    }
  574 
  575    nouveau_bo_ref(NULL, &screen->code);
  576    nouveau_bo_ref(NULL, &screen->tls_bo);
  577    nouveau_bo_ref(NULL, &screen->stack_bo);
  578    nouveau_bo_ref(NULL, &screen->txc);
  579    nouveau_bo_ref(NULL, &screen->uniforms);
  580    nouveau_bo_ref(NULL, &screen->fence.bo);
  581 
  582    nouveau_heap_destroy(&screen->vp_code_heap);
  583    nouveau_heap_destroy(&screen->gp_code_heap);
  584    nouveau_heap_destroy(&screen->fp_code_heap);
  585 
  586    FREE(screen->tic.entries);
  587 
  588    nouveau_object_del(&screen->tesla);
  589    nouveau_object_del(&screen->eng2d);
  590    nouveau_object_del(&screen->m2mf);
  591    nouveau_object_del(&screen->compute);
  592    nouveau_object_del(&screen->sync);
  593 
  594    nouveau_screen_fini(&screen->base);
  595 
  596    FREE(screen);
  597 }
  598 
  599 static void
  600 nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
  601 {
  602    struct nv50_screen *screen = nv50_screen(pscreen);
  603    struct nouveau_pushbuf *push = screen->base.pushbuf;
  604 
  605    /* we need to do it after possible flush in MARK_RING */
  606    *sequence = ++screen->base.fence.sequence;
  607 
  608    assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
  609    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
  610    PUSH_DATAh(push, screen->fence.bo->offset);
  611    PUSH_DATA (push, screen->fence.bo->offset);
  612    PUSH_DATA (push, *sequence);
  613    PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
  614                     NV50_3D_QUERY_GET_UNK4 |
  615                     NV50_3D_QUERY_GET_UNIT_CROP |
  616                     NV50_3D_QUERY_GET_TYPE_QUERY |
  617                     NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
  618                     NV50_3D_QUERY_GET_SHORT);
  619 }
  620 
  621 static u32
  622 nv50_screen_fence_update(struct pipe_screen *pscreen)
  623 {
  624    return nv50_screen(pscreen)->fence.map[0];
  625 }
  626 
  627 static void
  628 nv50_screen_init_hwctx(struct nv50_screen *screen)
  629 {
  630    struct nouveau_pushbuf *push = screen->base.pushbuf;
  631    struct nv04_fifo *fifo;
  632    unsigned i;
  633 
  634    fifo = (struct nv04_fifo *)screen->base.channel->data;
  635 
  636    BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
  637    PUSH_DATA (push, screen->m2mf->handle);
  638    BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
  639    PUSH_DATA (push, screen->sync->handle);
  640    PUSH_DATA (push, fifo->vram);
  641    PUSH_DATA (push, fifo->vram);
  642 
  643    BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
  644    PUSH_DATA (push, screen->eng2d->handle);
  645    BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
  646    PUSH_DATA (push, screen->sync->handle);
  647    PUSH_DATA (push, fifo->vram);
  648    PUSH_DATA (push, fifo->vram);
  649    PUSH_DATA (push, fifo->vram);
  650    BEGIN_NV04(push, NV50_2D(OPERATION), 1);
  651    PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
  652    BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
  653    PUSH_DATA (push, 0);
  654    BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
  655    PUSH_DATA (push, 0);
  656    BEGIN_NV04(push, SUBC_2D(0x0888), 1);
  657    PUSH_DATA (push, 1);
  658    BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
  659    PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
  660 
  661    BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
  662    PUSH_DATA (push, screen->tesla->handle);
  663 
  664    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
  665    PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
  666 
  667    BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
  668    PUSH_DATA (push, screen->sync->handle);
  669    BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
  670    for (i = 0; i < 11; ++i)
  671       PUSH_DATA(push, fifo->vram);
  672    BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
  673    for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
  674       PUSH_DATA(push, fifo->vram);
  675 
  676    BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
  677    PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
  678    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
  679    PUSH_DATA (push, 0xf);
  680 
  681    if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
  682       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
  683       PUSH_DATA (push, 0x18);
  684    }
  685 
  686    BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
  687    PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
  688 
  689    BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
  690    for (i = 0; i < 8; ++i)
  691       PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
  692 
  693    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
  694    PUSH_DATA (push, 1);
  695 
  696    BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
  697    PUSH_DATA (push, 0);
  698    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
  699    PUSH_DATA (push, 0);
  700    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
  701    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
  702    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
  703    PUSH_DATA (push, 0);
  704    BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
  705    PUSH_DATA (push, 1);
  706    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
  707    PUSH_DATA (push, 1);
  708 
  709    if (screen->tesla->oclass >= NVA0_3D_CLASS) {
  710       BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
  711       PUSH_DATA (push, 0);
  712    }
  713 
  714    BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
  715    PUSH_DATA (push, 0);
  716    BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
  717    PUSH_DATA (push, 0);
  718    PUSH_DATA (push, 0);
  719    BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
  720    PUSH_DATA (push, 0x3f);
  721 
  722    BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
  723    PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
  724    PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
  725 
  726    BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
  727    PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
  728    PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
  729 
  730    BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
  731    PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
  732    PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
  733 
  734    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
  735    PUSH_DATAh(push, screen->tls_bo->offset);
  736    PUSH_DATA (push, screen->tls_bo->offset);
  737    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
  738 
  739    BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
  740    PUSH_DATAh(push, screen->stack_bo->offset);
  741    PUSH_DATA (push, screen->stack_bo->offset);
  742    PUSH_DATA (push, 4);
  743 
  744    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
  745    PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
  746    PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
  747    PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
  748 
  749    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
  750    PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
  751    PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
  752    PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
  753 
  754    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
  755    PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
  756    PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
  757    PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
  758 
  759    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
  760    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
  761    PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
  762    PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));
  763 
  764    BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
  765    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
  766    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
  767    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
  768 
  769    /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
  770    BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
  771    PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);
  772    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
  773    PUSH_DATAf(push, 0.0f);
  774    PUSH_DATAf(push, 0.0f);
  775    PUSH_DATAf(push, 0.0f);
  776    PUSH_DATAf(push, 0.0f);
  777    BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
  778    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
  779    PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
  780 
  781    nv50_upload_ms_info(push);
  782 
  783    /* max TIC (bits 4:8) & TSC bindings, per program type */
  784    for (i = 0; i < 3; ++i) {
  785       BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
  786       PUSH_DATA (push, 0x54);
  787    }
  788 
  789    BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
  790    PUSH_DATAh(push, screen->txc->offset);
  791    PUSH_DATA (push, screen->txc->offset);
  792    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
  793 
  794    BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
  795    PUSH_DATAh(push, screen->txc->offset + 65536);
  796    PUSH_DATA (push, screen->txc->offset + 65536);
  797    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
  798 
  799    BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
  800    PUSH_DATA (push, 0);
  801 
  802    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
  803    PUSH_DATA (push, 0);
  804    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
  805    PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
  806    BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
  807    for (i = 0; i < 8 * 2; ++i)
  808       PUSH_DATA(push, 0);
  809    BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
  810    PUSH_DATA (push, 0);
  811 
  812    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
  813    PUSH_DATA (push, 1);
  814    for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
  815       BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
  816       PUSH_DATAf(push, 0.0f);
  817       PUSH_DATAf(push, 1.0f);
  818       BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2);
  819       PUSH_DATA (push, 8192 << 16);
  820       PUSH_DATA (push, 8192 << 16);
  821    }
  822 
  823    BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
  824 #ifdef NV50_SCISSORS_CLIPPING
  825    PUSH_DATA (push, 0x0000);
  826 #else
  827    PUSH_DATA (push, 0x1080);
  828 #endif
  829 
  830    BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
  831    PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
  832 
  833    /* We use scissors instead of exact view volume clipping,
  834     * so they're always enabled.
  835     */
  836    for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
  837       BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3);
  838       PUSH_DATA (push, 1);
  839       PUSH_DATA (push, 8192 << 16);
  840       PUSH_DATA (push, 8192 << 16);
  841    }
  842 
  843    BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
  844    PUSH_DATA (push, 1);
  845    BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
  846    PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
  847    BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
  848    PUSH_DATA (push, 0x11111111);
  849    BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
  850    PUSH_DATA (push, 1);
  851 
  852    BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
  853    PUSH_DATA (push, 0);
  854    if (screen->base.class_3d >= NV84_3D_CLASS) {
  855       BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
  856       PUSH_DATA (push, 0);
  857    }
  858 
  859    BEGIN_NV04(push, NV50_3D(UNK0FDC), 1);
  860    PUSH_DATA (push, 1);
  861    BEGIN_NV04(push, NV50_3D(UNK19C0), 1);
  862    PUSH_DATA (push, 1);
  863 
  864    PUSH_KICK (push);
  865 }
  866 
  867 static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
  868       uint64_t *tls_size)
  869 {
  870    struct nouveau_device *dev = screen->base.device;
  871    int ret;
  872 
  873    screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
  874          ONE_TEMP_SIZE;
  875    if (nouveau_mesa_debug)
  876       debug_printf("allocating space for %u temps\n",
  877             util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
  878    *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
  879          screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
  880 
  881    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
  882                         *tls_size, NULL, &screen->tls_bo);
  883    if (ret) {
  884       NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
  885       return ret;
  886    }
  887 
  888    return 0;
  889 }
  890 
  891 int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
  892 {
  893    struct nouveau_pushbuf *push = screen->base.pushbuf;
  894    int ret;
  895    uint64_t tls_size;
  896 
  897    if (tls_space < screen->cur_tls_space)
  898       return 0;
  899    if (tls_space > screen->max_tls_space) {
  900       /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
  901        * LOCAL_WARPS_NO_CLAMP) */
  902       NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
  903             (unsigned)(tls_space / ONE_TEMP_SIZE),
  904             (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
  905       return -ENOMEM;
  906    }
  907 
  908    nouveau_bo_ref(NULL, &screen->tls_bo);
  909    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
  910    if (ret)
  911       return ret;
  912 
  913    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
  914    PUSH_DATAh(push, screen->tls_bo->offset);
  915    PUSH_DATA (push, screen->tls_bo->offset);
  916    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
  917 
  918    return 1;
  919 }
  920 
  921 static const nir_shader_compiler_options nir_options = {
  922    .fuse_ffma = false, /* nir doesn't track mad vs fma */
  923    .lower_flrp32 = true,
  924    .lower_flrp64 = true,
  925    .lower_fpow = false,
  926    .lower_uadd_carry = true,
  927    .lower_usub_borrow = true,
  928    .lower_sub = true,
  929    .lower_ffract = true,
  930    .lower_pack_half_2x16 = true,
  931    .lower_pack_unorm_2x16 = true,
  932    .lower_pack_snorm_2x16 = true,
  933    .lower_pack_unorm_4x8 = true,
  934    .lower_pack_snorm_4x8 = true,
  935    .lower_unpack_half_2x16 = true,
  936    .lower_unpack_unorm_2x16 = true,
  937    .lower_unpack_snorm_2x16 = true,
  938    .lower_unpack_unorm_4x8 = true,
  939    .lower_unpack_snorm_4x8 = true,
  940    .lower_extract_byte = true,
  941    .lower_extract_word = true,
  942    .lower_all_io_to_temps = false,
  943    .lower_cs_local_index_from_id = true,
  944    .lower_rotate = true,
  945    .lower_to_scalar = true,
  946    .use_interpolated_input_intrinsics = true,
  947    .max_unroll_iterations = 32,
  948 };
  949 
  950 static const void *
  951 nv50_screen_get_compiler_options(struct pipe_screen *pscreen,
  952                                  enum pipe_shader_ir ir,
  953                                  enum pipe_shader_type shader)
  954 {
  955    if (ir == PIPE_SHADER_IR_NIR)
  956       return &nir_options;
  957    return NULL;
  958 }
  959 
  960 struct nouveau_screen *
  961 nv50_screen_create(struct nouveau_device *dev)
  962 {
  963    struct nv50_screen *screen;
  964    struct pipe_screen *pscreen;
  965    struct nouveau_object *chan;
  966    uint64_t value;
  967    uint32_t tesla_class;
  968    unsigned stack_size;
  969    int ret;
  970 
  971    screen = CALLOC_STRUCT(nv50_screen);
  972    if (!screen)
  973       return NULL;
  974    pscreen = &screen->base.base;
  975    pscreen->destroy = nv50_screen_destroy;
  976 
  977    ret = nouveau_screen_init(&screen->base, dev);
  978    if (ret) {
  979       NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
  980       goto fail;
  981    }
  982 
  983    /* TODO: Prevent FIFO prefetch before transfer of index buffers and
  984     *  admit them to VRAM.
  985     */
  986    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
  987       PIPE_BIND_VERTEX_BUFFER;
  988    screen->base.sysmem_bindings |=
  989       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
  990 
  991    screen->base.pushbuf->user_priv = screen;
  992    screen->base.pushbuf->rsvd_kick = 5;
  993 
  994    chan = screen->base.channel;
  995 
  996    pscreen->context_create = nv50_create;
  997    pscreen->is_format_supported = nv50_screen_is_format_supported;
  998    pscreen->get_param = nv50_screen_get_param;
  999    pscreen->get_shader_param = nv50_screen_get_shader_param;
 1000    pscreen->get_paramf = nv50_screen_get_paramf;
 1001    pscreen->get_compute_param = nv50_screen_get_compute_param;
 1002    pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
 1003    pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
 1004 
 1005    /* nir stuff */
 1006    pscreen->get_compiler_options = nv50_screen_get_compiler_options;
 1007 
 1008    nv50_screen_init_resource_functions(pscreen);
 1009 
 1010    if (screen->base.device->chipset < 0x84 ||
 1011        debug_get_bool_option("NOUVEAU_PMPEG", false)) {
 1012       /* PMPEG */
 1013       nouveau_screen_init_vdec(&screen->base);
 1014    } else if (screen->base.device->chipset < 0x98 ||
 1015               screen->base.device->chipset == 0xa0) {
 1016       /* VP2 */
 1017       screen->base.base.get_video_param = nv84_screen_get_video_param;
 1018       screen->base.base.is_video_format_supported = nv84_screen_video_supported;
 1019    } else {
 1020       /* VP3/4 */
 1021       screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
 1022       screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
 1023    }
 1024 
 1025    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
 1026                         NULL, &screen->fence.bo);
 1027    if (ret) {
 1028       NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
 1029       goto fail;
 1030    }
 1031 
 1032    nouveau_bo_map(screen->fence.bo, 0, NULL);
 1033    screen->fence.map = screen->fence.bo->map;
 1034    screen->base.fence.emit = nv50_screen_fence_emit;
 1035    screen->base.fence.update = nv50_screen_fence_update;
 1036 
 1037    ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
 1038                             &(struct nv04_notify){ .length = 32 },
 1039                             sizeof(struct nv04_notify), &screen->sync);
 1040    if (ret) {
 1041       NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
 1042       goto fail;
 1043    }
 1044 
 1045    ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
 1046                             NULL, 0, &screen->m2mf);
 1047    if (ret) {
 1048       NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
 1049       goto fail;
 1050    }
 1051 
 1052    ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
 1053                             NULL, 0, &screen->eng2d);
 1054    if (ret) {
 1055       NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
 1056       goto fail;
 1057    }
 1058 
 1059    switch (dev->chipset & 0xf0) {
 1060    case 0x50:
 1061       tesla_class = NV50_3D_CLASS;
 1062       break;
 1063    case 0x80:
 1064    case 0x90:
 1065       tesla_class = NV84_3D_CLASS;
 1066       break;
 1067    case 0xa0:
 1068       switch (dev->chipset) {
 1069       case 0xa0:
 1070       case 0xaa:
 1071       case 0xac:
 1072          tesla_class = NVA0_3D_CLASS;
 1073          break;
 1074       case 0xaf:
 1075          tesla_class = NVAF_3D_CLASS;
 1076          break;
 1077       default:
 1078          tesla_class = NVA3_3D_CLASS;
 1079          break;
 1080       }
 1081       break;
 1082    default:
 1083       NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
 1084       goto fail;
 1085    }
 1086    screen->base.class_3d = tesla_class;
 1087 
 1088    ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
 1089                             NULL, 0, &screen->tesla);
 1090    if (ret) {
 1091       NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
 1092       goto fail;
 1093    }
 1094 
 1095    /* This over-allocates by a page. The GP, which would execute at the end of
 1096     * the last page, would trigger faults. The going theory is that it
 1097     * prefetches up to a certain amount.
 1098     */
 1099    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
 1100                         (3 << NV50_CODE_BO_SIZE_LOG2) + 0x1000,
 1101                         NULL, &screen->code);
 1102    if (ret) {
 1103       NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
 1104       goto fail;
 1105    }
 1106 
 1107    nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 1108    nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 1109    nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 1110 
 1111    nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
 1112 
 1113    screen->TPs = util_bitcount(value & 0xffff);
 1114    screen->MPsInTP = util_bitcount(value & 0x0f000000);
 1115 
 1116    screen->mp_count = screen->TPs * screen->MPsInTP;
 1117 
 1118    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
 1119          STACK_WARPS_ALLOC * 64 * 8;
 1120 
 1121    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
 1122                         &screen->stack_bo);
 1123    if (ret) {
 1124       NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
 1125       goto fail;
 1126    }
 1127 
 1128    uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
 1129          screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
 1130          ONE_TEMP_SIZE;
 1131    screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
 1132    screen->max_tls_space /= 2; /* half of vram */
 1133 
 1134    /* hw can address max 64 KiB */
 1135    screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
 1136 
 1137    uint64_t tls_size;
 1138    unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
 1139    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
 1140    if (ret)
 1141       goto fail;
 1142 
 1143    if (nouveau_mesa_debug)
 1144       debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
 1145             screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
 1146 
 1147    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
 1148                         &screen->uniforms);
 1149    if (ret) {
 1150       NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
 1151       goto fail;
 1152    }
 1153 
 1154    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
 1155                         &screen->txc);
 1156    if (ret) {
 1157       NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
 1158       goto fail;
 1159    }
 1160 
 1161    screen->tic.entries = CALLOC(4096, sizeof(void *));
 1162    screen->tsc.entries = screen->tic.entries + 2048;
 1163 
 1164    if (!nv50_blitter_create(screen))
 1165       goto fail;
 1166 
 1167    nv50_screen_init_hwctx(screen);
 1168 
 1169    ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
 1170    if (ret) {
 1171       NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
 1172       goto fail;
 1173    }
 1174 
 1175    nouveau_fence_new(&screen->base, &screen->base.fence.current);
 1176 
 1177    return &screen->base;
 1178 
 1179 fail:
 1180    screen->base.base.context_create = NULL;
 1181    return &screen->base;
 1182 }
 1183 
 1184 int
 1185 nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
 1186 {
 1187    int i = screen->tic.next;
 1188 
 1189    while (screen->tic.lock[i / 32] & (1 << (i % 32)))
 1190       i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
 1191 
 1192    screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
 1193 
 1194    if (screen->tic.entries[i])
 1195       nv50_tic_entry(screen->tic.entries[i])->id = -1;
 1196 
 1197    screen->tic.entries[i] = entry;
 1198    return i;
 1199 }
 1200 
 1201 int
 1202 nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
 1203 {
 1204    int i = screen->tsc.next;
 1205 
 1206    while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
 1207       i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
 1208 
 1209    screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
 1210 
 1211    if (screen->tsc.entries[i])
 1212       nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
 1213 
 1214    screen->tsc.entries[i] = entry;
 1215    return i;
 1216 }