"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/compiler/aco_opcodes.py" (16 Sep 2020, 79197 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "aco_opcodes.py" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 #
    2 # Copyright (c) 2018 Valve Corporation
    3 #
    4 # Permission is hereby granted, free of charge, to any person obtaining a
    5 # copy of this software and associated documentation files (the "Software"),
    6 # to deal in the Software without restriction, including without limitation
    7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8 # and/or sell copies of the Software, and to permit persons to whom the
    9 # Software is furnished to do so, subject to the following conditions:
   10 #
   11 # The above copyright notice and this permission notice (including the next
   12 # paragraph) shall be included in all copies or substantial portions of the
   13 # Software.
   14 #
   15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21 # IN THE SOFTWARE.
   22 #
   23 # Authors:
   24 #    Daniel Schuermann (daniel.schuermann@campus.tu-berlin.de)
   25 
   26 
   27 # Class that represents all the information we have about the opcode
   28 # NOTE: this must be kept in sync with aco_op_info
   29 
   30 import sys
   31 from enum import Enum
   32 
   33 class Format(Enum):
   34    PSEUDO = 0
   35    SOP1 = 1
   36    SOP2 = 2
   37    SOPK = 3
   38    SOPP = 4
   39    SOPC = 5
   40    SMEM = 6
   41    DS = 8
   42    MTBUF = 9
   43    MUBUF = 10
   44    MIMG = 11
   45    EXP = 12
   46    FLAT = 13
   47    GLOBAL = 14
   48    SCRATCH = 15
   49    PSEUDO_BRANCH = 16
   50    PSEUDO_BARRIER = 17
   51    PSEUDO_REDUCTION = 18
   52    VOP3P = 19
   53    VOP1 = 1 << 8
   54    VOP2 = 1 << 9
   55    VOPC = 1 << 10
   56    VOP3A = 1 << 11
   57    VOP3B = 1 << 11
   58    VINTRP = 1 << 12
   59    DPP = 1 << 13
   60    SDWA = 1 << 14
   61 
   62    def get_builder_fields(self):
   63       if self == Format.SOPK:
   64          return [('uint16_t', 'imm', None)]
   65       elif self == Format.SOPP:
   66          return [('uint32_t', 'block', '-1'),
   67                  ('uint32_t', 'imm', '0')]
   68       elif self == Format.SMEM:
   69          return [('bool', 'can_reorder', 'true'),
   70                  ('bool', 'glc', 'false'),
   71                  ('bool', 'dlc', 'false'),
   72                  ('bool', 'nv', 'false')]
   73       elif self == Format.DS:
   74          return [('int16_t', 'offset0', '0'),
   75                  ('int8_t', 'offset1', '0'),
   76                  ('bool', 'gds', 'false')]
   77       elif self == Format.MTBUF:
   78          return [('unsigned', 'dfmt', None),
   79                  ('unsigned', 'nfmt', None),
   80                  ('unsigned', 'offset', None),
   81                  ('bool', 'offen', None),
   82                  ('bool', 'idxen', 'false'),
   83                  ('bool', 'disable_wqm', 'false'),
   84                  ('bool', 'glc', 'false'),
   85                  ('bool', 'dlc', 'false'),
   86                  ('bool', 'slc', 'false'),
   87                  ('bool', 'tfe', 'false')]
   88       elif self == Format.MUBUF:
   89          return [('unsigned', 'offset', None),
   90                  ('bool', 'offen', None),
   91                  ('bool', 'idxen', 'false'),
   92                  ('bool', 'addr64', 'false'),
   93                  ('bool', 'disable_wqm', 'false'),
   94                  ('bool', 'glc', 'false'),
   95                  ('bool', 'dlc', 'false'),
   96                  ('bool', 'slc', 'false'),
   97                  ('bool', 'tfe', 'false'),
   98                  ('bool', 'lds', 'false')]
   99       elif self == Format.MIMG:
  100          return [('unsigned', 'dmask', '0xF'),
  101                  ('bool', 'da', 'false'),
  102                  ('bool', 'unrm', 'true'),
  103                  ('bool', 'disable_wqm', 'false'),
  104                  ('bool', 'glc', 'false'),
  105                  ('bool', 'dlc', 'false'),
  106                  ('bool', 'slc', 'false'),
  107                  ('bool', 'tfe', 'false'),
  108                  ('bool', 'lwe', 'false'),
  109                  ('bool', 'r128_a16', 'false', 'r128'),
  110                  ('bool', 'd16', 'false')]
  111          return [('unsigned', 'attribute', None),
  112                  ('unsigned', 'component', None)]
  113       elif self == Format.EXP:
  114          return [('unsigned', 'enabled_mask', None),
  115                  ('unsigned', 'dest', None),
  116                  ('bool', 'compr', 'false', 'compressed'),
  117                  ('bool', 'done', 'false'),
  118                  ('bool', 'vm', 'false', 'valid_mask')]
  119       elif self == Format.PSEUDO_BRANCH:
  120          return [('uint32_t', 'target0', '0', 'target[0]'),
  121                  ('uint32_t', 'target1', '0', 'target[1]')]
  122       elif self == Format.PSEUDO_REDUCTION:
  123          return [('ReduceOp', 'op', None, 'reduce_op'),
  124                  ('unsigned', 'cluster_size', '0')]
  125       elif self == Format.VINTRP:
  126          return [('unsigned', 'attribute', None),
  127                  ('unsigned', 'component', None)]
  128       elif self == Format.DPP:
  129          return [('uint16_t', 'dpp_ctrl', None),
  130                  ('uint8_t', 'row_mask', '0xF'),
  131                  ('uint8_t', 'bank_mask', '0xF'),
  132                  ('bool', 'bound_ctrl', 'false')]
  133       elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
  134          return [('uint16_t', 'offset', 0),
  135                  ('bool', 'can_reorder', 'true'),
  136                  ('bool', 'glc', 'false'),
  137                  ('bool', 'slc', 'false'),
  138                  ('bool', 'lds', 'false'),
  139                  ('bool', 'nv', 'false')]
  140       else:
  141          return []
  142 
  143    def get_builder_field_names(self):
  144       return [f[1] for f in self.get_builder_fields()]
  145 
  146    def get_builder_field_dests(self):
  147       return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]
  148 
  149    def get_builder_field_decls(self):
  150       return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]
  151 
  152    def get_builder_initialization(self, num_operands):
  153       res = ''
  154       if self == Format.SDWA:
  155          for i in range(min(num_operands, 2)):
  156             res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)
  157          res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
  158          res += 'instr->dst_preserve = true;'
  159       return res
  160 
  161 
  162 class Opcode(object):
  163    """Class that represents all the information we have about the opcode
  164    NOTE: this must be kept in sync with aco_op_info
  165    """
  166    def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic):
  167       """Parameters:
  168 
  169       - name is the name of the opcode (prepend nir_op_ for the enum name)
  170       - all types are strings that get nir_type_ prepended to them
  171       - input_types is a list of types
  172       - algebraic_properties is a space-seperated string, where nir_op_is_ is
  173         prepended before each entry
  174       - const_expr is an expression or series of statements that computes the
  175         constant value of the opcode given the constant values of its inputs.
  176       """
  177       assert isinstance(name, str)
  178       assert isinstance(opcode_gfx7, int)
  179       assert isinstance(opcode_gfx9, int)
  180       assert isinstance(opcode_gfx10, int)
  181       assert isinstance(format, Format)
  182       assert isinstance(input_mod, bool)
  183       assert isinstance(output_mod, bool)
  184 
  185       self.name = name
  186       self.opcode_gfx7 = opcode_gfx7
  187       self.opcode_gfx9 = opcode_gfx9
  188       self.opcode_gfx10 = opcode_gfx10
  189       self.input_mod = "1" if input_mod else "0"
  190       self.output_mod = "1" if output_mod else "0"
  191       self.is_atomic = "1" if is_atomic else "0"
  192       self.format = format
  193 
  194 
  195 # global dictionary of opcodes
  196 opcodes = {}
  197 
  198 def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False, is_atomic = False):
  199    assert name not in opcodes
  200    opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic)
  201 
  202 opcode("exp", 0, 0, 0, format = Format.EXP)
  203 opcode("p_parallelcopy")
  204 opcode("p_startpgm")
  205 opcode("p_phi")
  206 opcode("p_linear_phi")
  207 opcode("p_as_uniform")
  208 
  209 opcode("p_create_vector")
  210 opcode("p_extract_vector")
  211 opcode("p_split_vector")
  212 
  213 # start/end the parts where we can use exec based instructions
  214 # implicitly
  215 opcode("p_logical_start")
  216 opcode("p_logical_end")
  217 
  218 # e.g. subgroupMin() in SPIR-V
  219 opcode("p_reduce", format=Format.PSEUDO_REDUCTION)
  220 # e.g. subgroupInclusiveMin()
  221 opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)
  222 # e.g. subgroupExclusiveMin()
  223 opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)
  224 # simulates proper bpermute behavior on GFX10 wave64
  225 opcode("p_wave64_bpermute", format=Format.PSEUDO_REDUCTION)
  226 
  227 opcode("p_branch", format=Format.PSEUDO_BRANCH)
  228 opcode("p_cbranch", format=Format.PSEUDO_BRANCH)
  229 opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)
  230 opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)
  231 
  232 opcode("p_memory_barrier_common", format=Format.PSEUDO_BARRIER) # atomic, buffer, image and shared
  233 opcode("p_memory_barrier_atomic", format=Format.PSEUDO_BARRIER)
  234 opcode("p_memory_barrier_buffer", format=Format.PSEUDO_BARRIER)
  235 opcode("p_memory_barrier_image", format=Format.PSEUDO_BARRIER)
  236 opcode("p_memory_barrier_shared", format=Format.PSEUDO_BARRIER)
  237 opcode("p_memory_barrier_gs_data", format=Format.PSEUDO_BARRIER)
  238 opcode("p_memory_barrier_gs_sendmsg", format=Format.PSEUDO_BARRIER)
  239 
  240 opcode("p_spill")
  241 opcode("p_reload")
  242 
  243 # start/end linear vgprs
  244 opcode("p_start_linear_vgpr")
  245 opcode("p_end_linear_vgpr")
  246 
  247 opcode("p_wqm")
  248 opcode("p_discard_if")
  249 opcode("p_load_helper")
  250 opcode("p_demote_to_helper")
  251 opcode("p_is_helper")
  252 opcode("p_exit_early_if")
  253 
  254 opcode("p_fs_buffer_store_smem", format=Format.SMEM)
  255 
  256 
  257 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
  258 SOP2 = {
  259   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  260    (0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
  261    (0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
  262    (0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
  263    (0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
  264    (0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
  265    (0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
  266    (0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"),
  267    (0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"),
  268    (0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"),
  269    (0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"),
  270    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"),
  271    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"),
  272    (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"),
  273    (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"),
  274    (0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"),
  275    (0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"),
  276    (0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"),
  277    (0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"),
  278    (0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"),
  279    (0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"),
  280    (0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"),
  281    (0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"),
  282    (0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"),
  283    (0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"),
  284    (0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"),
  285    (0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"),
  286    (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"),
  287    (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"),
  288    (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"),
  289    (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"),
  290    (0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"),
  291    (0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"),
  292    (0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"),
  293    (0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"),
  294    (0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"),
  295    (0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"),
  296    (0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"),
  297    (0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"),
  298    (0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),
  299    (0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),
  300    (0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),
  301    (0x2b, 0x2b, 0x29, 0x29,   -1, "s_cbranch_g_fork"),
  302    (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),
  303    (  -1,   -1, 0x2b, 0x2b,   -1, "s_rfe_restore_b64"),
  304    (  -1,   -1,   -1, 0x2e, 0x2e, "s_lshl1_add_u32"),
  305    (  -1,   -1,   -1, 0x2f, 0x2f, "s_lshl2_add_u32"),
  306    (  -1,   -1,   -1, 0x30, 0x30, "s_lshl3_add_u32"),
  307    (  -1,   -1,   -1, 0x31, 0x31, "s_lshl4_add_u32"),
  308    (  -1,   -1,   -1, 0x32, 0x32, "s_pack_ll_b32_b16"),
  309    (  -1,   -1,   -1, 0x33, 0x33, "s_pack_lh_b32_b16"),
  310    (  -1,   -1,   -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
  311    (  -1,   -1,   -1, 0x2c, 0x35, "s_mul_hi_u32"),
  312    (  -1,   -1,   -1, 0x2d, 0x36, "s_mul_hi_i32"),
  313 }
  314 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2:
  315     opcode(name, gfx7, gfx9, gfx10, Format.SOP2)
  316 
  317 
  318 # SOPK instructions: 0 input (+ imm), 1 output + optional scc
  319 SOPK = {
  320   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  321    (0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
  322    (  -1,   -1,   -1,   -1, 0x01, "s_version"), # GFX10+
  323    (0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9
  324    (0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"),
  325    (0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"),
  326    (0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"),
  327    (0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"),
  328    (0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"),
  329    (0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"),
  330    (0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"),
  331    (0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"),
  332    (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"),
  333    (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"),
  334    (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"),
  335    (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),
  336    (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),
  337    (0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),
  338    (0x11, 0x11, 0x10, 0x10,   -1, "s_cbranch_i_fork"),
  339    (0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),
  340    (0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),
  341    (0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal
  342    (  -1,   -1, 0x15, 0x15, 0x16, "s_call_b64"),
  343    (  -1,   -1,   -1,   -1, 0x17, "s_waitcnt_vscnt"),
  344    (  -1,   -1,   -1,   -1, 0x18, "s_waitcnt_vmcnt"),
  345    (  -1,   -1,   -1,   -1, 0x19, "s_waitcnt_expcnt"),
  346    (  -1,   -1,   -1,   -1, 0x1a, "s_waitcnt_lgkmcnt"),
  347    (  -1,   -1,   -1,   -1, 0x1b, "s_subvector_loop_begin"),
  348    (  -1,   -1,   -1,   -1, 0x1c, "s_subvector_loop_end"),
  349 }
  350 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPK:
  351    opcode(name, gfx7, gfx9, gfx10, Format.SOPK)
  352 
  353 
  354 # SOP1 instructions: 1 input, 1 output (+optional SCC)
  355 SOP1 = {
  356   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  357    (0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"),
  358    (0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"),
  359    (0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"),
  360    (0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"),
  361    (0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"),
  362    (0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"),
  363    (0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"),
  364    (0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"),
  365    (0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"),
  366    (0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"),
  367    (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"),
  368    (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"),
  369    (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"),
  370    (0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"),
  371    (0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"),
  372    (0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"),
  373    (0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"),
  374    (0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"),
  375    (0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"),
  376    (0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"),
  377    (0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"),
  378    (0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"),
  379    (0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"),
  380    (0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"),
  381    (0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"),
  382    (0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"),
  383    (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),
  384    (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),
  385    (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),
  386    (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64"),
  387    (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64"),
  388    (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64"),
  389    (0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),
  390    (0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),
  391    (0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),
  392    (0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"),
  393    (0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"),
  394    (0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"),
  395    (0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"),
  396    (0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"),
  397    (0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"),
  398    (0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"),
  399    (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"),
  400    (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),
  401    (0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),
  402    (0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),
  403    (0x32, 0x32, 0x2e, 0x2e,   -1, "s_cbranch_join"),
  404    (0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),
  405    (0x35, 0x35,   -1,   -1, 0x35, "s_mov_fed_b32"),
  406    (  -1,   -1, 0x32, 0x32,   -1, "s_set_gpr_idx_idx"),
  407    (  -1,   -1,   -1, 0x33, 0x37, "s_andn1_saveexec_b64"),
  408    (  -1,   -1,   -1, 0x34, 0x38, "s_orn1_saveexec_b64"),
  409    (  -1,   -1,   -1, 0x35, 0x39, "s_andn1_wrexec_b64"),
  410    (  -1,   -1,   -1, 0x36, 0x3a, "s_andn2_wrexec_b64"),
  411    (  -1,   -1,   -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"),
  412    (  -1,   -1,   -1,   -1, 0x3c, "s_and_saveexec_b32"),
  413    (  -1,   -1,   -1,   -1, 0x3d, "s_or_saveexec_b32"),
  414    (  -1,   -1,   -1,   -1, 0x3e, "s_xor_saveexec_b32"),
  415    (  -1,   -1,   -1,   -1, 0x3f, "s_andn2_saveexec_b32"),
  416    (  -1,   -1,   -1,   -1, 0x40, "s_orn2_saveexec_b32"),
  417    (  -1,   -1,   -1,   -1, 0x41, "s_nand_saveexec_b32"),
  418    (  -1,   -1,   -1,   -1, 0x42, "s_nor_saveexec_b32"),
  419    (  -1,   -1,   -1,   -1, 0x43, "s_xnor_saveexec_b32"),
  420    (  -1,   -1,   -1,   -1, 0x44, "s_andn1_saveexec_b32"),
  421    (  -1,   -1,   -1,   -1, 0x45, "s_orn1_saveexec_b32"),
  422    (  -1,   -1,   -1,   -1, 0x46, "s_andn1_wrexec_b32"),
  423    (  -1,   -1,   -1,   -1, 0x47, "s_andn2_wrexec_b32"),
  424    (  -1,   -1,   -1,   -1, 0x49, "s_movrelsd_2_b32"),
  425    # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
  426    (  -1,   -1,   -1,   -1,   -1, "p_constaddr"),
  427 }
  428 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1:
  429    opcode(name, gfx7, gfx9, gfx10, Format.SOP1)
  430 
  431 
  432 # SOPC instructions: 2 inputs and 0 outputs (+SCC)
  433 SOPC = {
  434   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  435    (0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
  436    (0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
  437    (0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
  438    (0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
  439    (0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
  440    (0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
  441    (0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
  442    (0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
  443    (0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
  444    (0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
  445    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
  446    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
  447    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
  448    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
  449    (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
  450    (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
  451    (0x10, 0x10, 0x10, 0x10,   -1, "s_setvskip"),
  452    (  -1,   -1, 0x11, 0x11,   -1, "s_set_gpr_idx_on"),
  453    (  -1,   -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"),
  454    (  -1,   -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
  455 }
  456 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:
  457    opcode(name, gfx7, gfx9, gfx10, Format.SOPC)
  458 
  459 
  460 # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
  461 SOPP = {
  462   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  463    (0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
  464    (0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),
  465    (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch"),
  466    (  -1,   -1, 0x03, 0x03, 0x03, "s_wakeup"),
  467    (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0"),
  468    (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1"),
  469    (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz"),
  470    (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz"),
  471    (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz"),
  472    (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz"),
  473    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier"),
  474    (  -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),
  475    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt"),
  476    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),
  477    (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),
  478    (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),
  479    (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg"),
  480    (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt"),
  481    (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap"),
  482    (0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),
  483    (0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),
  484    (0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),
  485    (0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),
  486    (  -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys"),
  487    (  -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser"),
  488    (  -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user"),
  489    (  -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user"),
  490    (  -1,   -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),
  491    (  -1,   -1, 0x1c, 0x1c,   -1, "s_set_gpr_idx_off"),
  492    (  -1,   -1, 0x1d, 0x1d,   -1, "s_set_gpr_idx_mode"),
  493    (  -1,   -1,   -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"),
  494    (  -1,   -1,   -1,   -1, 0x1f, "s_code_end"),
  495    (  -1,   -1,   -1,   -1, 0x20, "s_inst_prefetch"),
  496    (  -1,   -1,   -1,   -1, 0x21, "s_clause"),
  497    (  -1,   -1,   -1,   -1, 0x22, "s_wait_idle"),
  498    (  -1,   -1,   -1,   -1, 0x23, "s_waitcnt_depctr"),
  499    (  -1,   -1,   -1,   -1, 0x24, "s_round_mode"),
  500    (  -1,   -1,   -1,   -1, 0x25, "s_denorm_mode"),
  501    (  -1,   -1,   -1,   -1, 0x26, "s_ttracedata_imm"),
  502 }
  503 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPP:
  504    opcode(name, gfx7, gfx9, gfx10, Format.SOPP)
  505 
  506 
  507 # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
  508 SMEM = {
  509   # GFX6, GFX7, GFX8, GFX9, GFX10, name
  510    (0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"),
  511    (0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"),
  512    (0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"),
  513    (0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"),
  514    (0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"),
  515    (  -1,   -1,   -1, 0x05, 0x05, "s_scratch_load_dword"),
  516    (  -1,   -1,   -1, 0x06, 0x06, "s_scratch_load_dwordx2"),
  517    (  -1,   -1,   -1, 0x07, 0x07, "s_scratch_load_dwordx4"),
  518    (0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"),
  519    (0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"),
  520    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"),
  521    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"),
  522    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"),
  523    (  -1,   -1, 0x10, 0x10, 0x10, "s_store_dword"),
  524    (  -1,   -1, 0x11, 0x11, 0x11, "s_store_dwordx2"),
  525    (  -1,   -1, 0x12, 0x12, 0x12, "s_store_dwordx4"),
  526    (  -1,   -1,   -1, 0x15, 0x15, "s_scratch_store_dword"),
  527    (  -1,   -1,   -1, 0x16, 0x16, "s_scratch_store_dwordx2"),
  528    (  -1,   -1,   -1, 0x17, 0x17, "s_scratch_store_dwordx4"),
  529    (  -1,   -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"),
  530    (  -1,   -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"),
  531    (  -1,   -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"),
  532    (  -1,   -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"),
  533    (0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"),
  534    (  -1,   -1, 0x21, 0x21, 0x21, "s_dcache_wb"),
  535    (  -1, 0x1d, 0x22, 0x22,   -1, "s_dcache_inv_vol"),
  536    (  -1,   -1, 0x23, 0x23,   -1, "s_dcache_wb_vol"),
  537    (0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"),
  538    (  -1,   -1, 0x25, 0x25, 0x25, "s_memrealtime"),
  539    (  -1,   -1, 0x26, 0x26, 0x26, "s_atc_probe"),
  540    (  -1,   -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"),
  541    (  -1,   -1,   -1, 0x28, 0x28, "s_dcache_discard"),
  542    (  -1,   -1,   -1, 0x29, 0x29, "s_dcache_discard_x2"),
  543    (  -1,   -1,   -1,   -1, 0x2a, "s_get_waveid_in_workgroup"),
  544    (  -1,   -1,   -1, 0x40, 0x40, "s_buffer_atomic_swap"),
  545    (  -1,   -1,   -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"),
  546    (  -1,   -1,   -1, 0x42, 0x42, "s_buffer_atomic_add"),
  547    (  -1,   -1,   -1, 0x43, 0x43, "s_buffer_atomic_sub"),
  548    (  -1,   -1,   -1, 0x44, 0x44, "s_buffer_atomic_smin"),
  549    (  -1,   -1,   -1, 0x45, 0x45, "s_buffer_atomic_umin"),
  550    (  -1,   -1,   -1, 0x46, 0x46, "s_buffer_atomic_smax"),
  551    (  -1,   -1,   -1, 0x47, 0x47, "s_buffer_atomic_umax"),
  552    (  -1,   -1,   -1, 0x48, 0x48, "s_buffer_atomic_and"),
  553    (  -1,   -1,   -1, 0x49, 0x49, "s_buffer_atomic_or"),
  554    (  -1,   -1,   -1, 0x4a, 0x4a, "s_buffer_atomic_xor"),
  555    (  -1,   -1,   -1, 0x4b, 0x4b, "s_buffer_atomic_inc"),
  556    (  -1,   -1,   -1, 0x4c, 0x4c, "s_buffer_atomic_dec"),
  557    (  -1,   -1,   -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"),
  558    (  -1,   -1,   -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"),
  559    (  -1,   -1,   -1, 0x62, 0x62, "s_buffer_atomic_add_x2"),
  560    (  -1,   -1,   -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"),
  561    (  -1,   -1,   -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"),
  562    (  -1,   -1,   -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"),
  563    (  -1,   -1,   -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"),
  564    (  -1,   -1,   -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"),
  565    (  -1,   -1,   -1, 0x68, 0x68, "s_buffer_atomic_and_x2"),
  566    (  -1,   -1,   -1, 0x69, 0x69, "s_buffer_atomic_or_x2"),
  567    (  -1,   -1,   -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"),
  568    (  -1,   -1,   -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"),
  569    (  -1,   -1,   -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"),
  570    (  -1,   -1,   -1, 0x80, 0x80, "s_atomic_swap"),
  571    (  -1,   -1,   -1, 0x81, 0x81, "s_atomic_cmpswap"),
  572    (  -1,   -1,   -1, 0x82, 0x82, "s_atomic_add"),
  573    (  -1,   -1,   -1, 0x83, 0x83, "s_atomic_sub"),
  574    (  -1,   -1,   -1, 0x84, 0x84, "s_atomic_smin"),
  575    (  -1,   -1,   -1, 0x85, 0x85, "s_atomic_umin"),
  576    (  -1,   -1,   -1, 0x86, 0x86, "s_atomic_smax"),
  577    (  -1,   -1,   -1, 0x87, 0x87, "s_atomic_umax"),
  578    (  -1,   -1,   -1, 0x88, 0x88, "s_atomic_and"),
  579    (  -1,   -1,   -1, 0x89, 0x89, "s_atomic_or"),
  580    (  -1,   -1,   -1, 0x8a, 0x8a, "s_atomic_xor"),
  581    (  -1,   -1,   -1, 0x8b, 0x8b, "s_atomic_inc"),
  582    (  -1,   -1,   -1, 0x8c, 0x8c, "s_atomic_dec"),
  583    (  -1,   -1,   -1, 0xa0, 0xa0, "s_atomic_swap_x2"),
  584    (  -1,   -1,   -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"),
  585    (  -1,   -1,   -1, 0xa2, 0xa2, "s_atomic_add_x2"),
  586    (  -1,   -1,   -1, 0xa3, 0xa3, "s_atomic_sub_x2"),
  587    (  -1,   -1,   -1, 0xa4, 0xa4, "s_atomic_smin_x2"),
  588    (  -1,   -1,   -1, 0xa5, 0xa5, "s_atomic_umin_x2"),
  589    (  -1,   -1,   -1, 0xa6, 0xa6, "s_atomic_smax_x2"),
  590    (  -1,   -1,   -1, 0xa7, 0xa7, "s_atomic_umax_x2"),
  591    (  -1,   -1,   -1, 0xa8, 0xa8, "s_atomic_and_x2"),
  592    (  -1,   -1,   -1, 0xa9, 0xa9, "s_atomic_or_x2"),
  593    (  -1,   -1,   -1, 0xaa, 0xaa, "s_atomic_xor_x2"),
  594    (  -1,   -1,   -1, 0xab, 0xab, "s_atomic_inc_x2"),
  595    (  -1,   -1,   -1, 0xac, 0xac, "s_atomic_dec_x2"),
  596 }
  597 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
  598    opcode(name, gfx7, gfx9, gfx10, Format.SMEM, is_atomic = "atomic" in name)
  599 
  600 
  601 # VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
  602 # TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
  603 VOP2 = {
  604   # GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers
  605    (0x01, 0x01,   -1,   -1,   -1, "v_readlane_b32", False),
  606    (0x02, 0x02,   -1,   -1,   -1, "v_writelane_b32", False),
  607    (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
  608    (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
  609    (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
  610    (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True),
  611    (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
  612    (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
  613    (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
  614    (0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),
  615    (0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),
  616    (0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),
  617    (0x0d, 0x0d,   -1,   -1,   -1, "v_min_legacy_f32", True),
  618    (0x0e, 0x0e,   -1,   -1,   -1, "v_max_legacy_f32", True),
  619    (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),
  620    (0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True),
  621    (0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False),
  622    (0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False),
  623    (0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False),
  624    (0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False),
  625    (0x15, 0x15,   -1,   -1,   -1, "v_lshr_b32", False),
  626    (0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False),
  627    (0x17, 0x17,   -1,   -1,   -1, "v_ashr_i32", False),
  628    (0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False),
  629    (0x19, 0x19,   -1,   -1,   -1, "v_lshl_b32", False),
  630    (0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False),
  631    (0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False),
  632    (0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False),
  633    (0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False),
  634    (  -1,   -1,   -1,   -1, 0x1e, "v_xnor_b32", False),
  635    (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True),
  636    (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False),
  637    (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False),
  638    (0x25, 0x25, 0x19, 0x19,   -1, "v_add_co_u32", False), # VOP3B only in RDNA
  639    (0x26, 0x26, 0x1a, 0x1a,   -1, "v_sub_co_u32", False), # VOP3B only in RDNA
  640    (0x27, 0x27, 0x1b, 0x1b,   -1, "v_subrev_co_u32", False), # VOP3B only in RDNA
  641    (0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA
  642    (0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA
  643    (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA
  644    (  -1,   -1,   -1,   -1, 0x2b, "v_fmac_f32", True),
  645    (  -1,   -1,   -1,   -1, 0x2c, "v_fmamk_f32", True),
  646    (  -1,   -1,   -1,   -1, 0x2d, "v_fmaak_f32", True),
  647    (  -1,   -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
  648    (  -1,   -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
  649    (  -1,   -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
  650    (  -1,   -1, 0x22, 0x22, 0x35, "v_mul_f16", True),
  651    (  -1,   -1, 0x23, 0x23,   -1, "v_mac_f16", True),
  652    (  -1,   -1, 0x24, 0x24,   -1, "v_madmk_f16", False),
  653    (  -1,   -1, 0x25, 0x25,   -1, "v_madak_f16", False),
  654    (  -1,   -1, 0x26, 0x26,   -1, "v_add_u16", False),
  655    (  -1,   -1, 0x27, 0x27,   -1, "v_sub_u16", False),
  656    (  -1,   -1, 0x28, 0x28,   -1, "v_subrev_u16", False),
  657    (  -1,   -1, 0x29, 0x29,   -1, "v_mul_lo_u16", False),
  658    (  -1,   -1, 0x2a, 0x2a,   -1, "v_lshlrev_b16", False),
  659    (  -1,   -1, 0x2b, 0x2b,   -1, "v_lshrrev_b16", False),
  660    (  -1,   -1, 0x2c, 0x2c,   -1, "v_ashrrev_i16", False),
  661    (  -1,   -1, 0x2d, 0x2d, 0x39, "v_max_f16", True),
  662    (  -1,   -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True),
  663    (  -1,   -1, 0x2f, 0x2f,   -1, "v_max_u16", False),
  664    (  -1,   -1, 0x30, 0x30,   -1, "v_max_i16", False),
  665    (  -1,   -1, 0x31, 0x31,   -1, "v_min_u16", False),
  666    (  -1,   -1, 0x32, 0x32,   -1, "v_min_i16", False),
  667    (  -1,   -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False),
  668    (  -1,   -1, 0x34, 0x34, 0x25, "v_add_u32", False), # v_add_nc_u32 in RDNA
  669    (  -1,   -1, 0x35, 0x35, 0x26, "v_sub_u32", False), # v_sub_nc_u32 in RDNA
  670    (  -1,   -1, 0x36, 0x36, 0x27, "v_subrev_u32", False), # v_subrev_nc_u32 in RDNA
  671    (  -1,   -1,   -1,   -1, 0x36, "v_fmac_f16", False),
  672    (  -1,   -1,   -1,   -1, 0x37, "v_fmamk_f16", False),
  673    (  -1,   -1,   -1,   -1, 0x38, "v_fmaak_f16", False),
  674    (  -1,   -1,   -1,   -1, 0x3c, "v_pk_fmac_f16", False),
  675 }
  676 for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:
  677    opcode(name, gfx7, gfx9, gfx10, Format.VOP2, modifiers, modifiers)
  678 
  679 if True:
  680     # v_cndmask_b32 can use input modifiers but not output modifiers
  681     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")
  682     opcode(name, gfx7, gfx9, gfx10, Format.VOP2, True, False)
  683 
  684 
  685 # VOP1 instructions: instructions with 1 input and 1 output
  686 VOP1 = {
  687   # GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers
  688    (0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
  689    (0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
  690    (0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
  691    (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False),
  692    (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True),
  693    (0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
  694    (0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
  695    (0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
  696    (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
  697    (0x09, 0x09,   -1,   -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
  698    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
  699    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
  700    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
  701    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
  702    (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
  703    (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True),
  704    (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True),
  705    (0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
  706    (0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
  707    (0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
  708    (0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
  709    (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False),
  710    (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True),
  711    (  -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True),
  712    (  -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True),
  713    (  -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True),
  714    (  -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True),
  715    (  -1,   -1,   -1,   -1, 0x1b, "v_pipeflush", False, False),
  716    (0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),
  717    (0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),
  718    (0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),
  719    (0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),
  720    (0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),
  721    (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True),
  722    (0x26, 0x26,   -1,   -1,   -1, "v_log_clamp_f32", True, True),
  723    (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True),
  724    (0x28, 0x28,   -1,   -1,   -1, "v_rcp_clamp_f32", True, True),
  725    (0x29, 0x29,   -1,   -1,   -1, "v_rcp_legacy_f32", True, True),
  726    (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True),
  727    (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True),
  728    (0x2c, 0x2c,   -1,   -1,   -1, "v_rsq_clamp_f32", True, True),
  729    (0x2d, 0x2d,   -1,   -1,   -1, "v_rsq_legacy_f32", True, True),
  730    (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True),
  731    (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True),
  732    (0x30, 0x30,   -1,   -1,   -1, "v_rcp_clamp_f64", True, True),
  733    (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True),
  734    (0x32, 0x32,   -1,   -1,   -1, "v_rsq_clamp_f64", True, True),
  735    (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True),
  736    (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True),
  737    (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True),
  738    (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True),
  739    (0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),
  740    (0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),
  741    (0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),
  742    (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),
  743    (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),
  744    (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False),
  745    (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False),
  746    (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True),
  747    (0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),
  748    (0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),
  749    (0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),
  750    (0x42, 0x42, 0x36,   -1, 0x42, "v_movreld_b32", False, False),
  751    (0x43, 0x43, 0x37,   -1, 0x43, "v_movrels_b32", False, False),
  752    (0x44, 0x44, 0x38,   -1, 0x44, "v_movrelsd_b32", False, False),
  753    (  -1,   -1,   -1,   -1, 0x48, "v_movrelsd_2_b32", False, False),
  754    (  -1,   -1,   -1, 0x37,   -1, "v_screen_partition_4se_b32", False, False),
  755    (  -1,   -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True),
  756    (  -1,   -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),
  757    (  -1,   -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),
  758    (  -1,   -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),
  759    (  -1,   -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True),
  760    (  -1,   -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True),
  761    (  -1,   -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True),
  762    (  -1,   -1, 0x40, 0x40, 0x57, "v_log_f16", True, True),
  763    (  -1,   -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True),
  764    (  -1,   -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),
  765    (  -1,   -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),
  766    (  -1,   -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),
  767    (  -1,   -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True),
  768    (  -1,   -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),
  769    (  -1,   -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),
  770    (  -1,   -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),
  771    (  -1,   -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True),
  772    (  -1,   -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True),
  773    (  -1, 0x46, 0x4b, 0x4b,   -1, "v_exp_legacy_f32", True, True),
  774    (  -1, 0x45, 0x4c, 0x4c,   -1, "v_log_legacy_f32", True, True),
  775    (  -1,   -1,   -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),
  776    (  -1,   -1,   -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),
  777    (  -1,   -1,   -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),
  778    (  -1,   -1,   -1, 0x51, 0x65, "v_swap_b32", False, False),
  779    (  -1,   -1,   -1,   -1, 0x68, "v_swaprel_b32", False, False),
  780 }
  781 for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP1:
  782    opcode(name, gfx7, gfx9, gfx10, Format.VOP1, in_mod, out_mod)
  783 
  784 
  785 # VOPC instructions:
  786 
  787 VOPC_CLASS = {
  788    (0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"),
  789    (  -1,   -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),
  790    (0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),
  791    (  -1,   -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),
  792    (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64"),
  793    (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64"),
  794 }
  795 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in VOPC_CLASS:
  796     opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  797 
  798 COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
  799 
  800 for i in range(8):
  801    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")
  802    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  803    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")
  804    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  805    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")
  806    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  807    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")
  808    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  809 
  810 for i in range(16):
  811    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")
  812    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  813    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")
  814    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  815    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
  816    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  817    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")
  818    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  819    # GFX_6_7
  820    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
  821    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
  822    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")
  823    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")
  824 
  825 COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
  826 
  827 # GFX_8_9
  828 for i in [0,7]: # only 0 and 7
  829    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")
  830    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  831    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")
  832    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  833    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")
  834    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  835    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")
  836    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  837 
  838 for i in range(1, 7): # [1..6]
  839    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")
  840    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  841    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")
  842    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  843    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")
  844    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  845    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
  846    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  847 
  848 for i in range(8):
  849    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")
  850    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  851    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")
  852    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  853    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")
  854    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  855    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")
  856    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  857    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")
  858    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  859    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")
  860    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  861    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")
  862    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  863    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")
  864    opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  865 
  866 
  867 # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
  868 VOPP = {
  869    (0x00, "v_pk_mad_i16"),
  870    (0x01, "v_pk_mul_lo_u16"),
  871    (0x02, "v_pk_add_i16"),
  872    (0x03, "v_pk_sub_i16"),
  873    (0x04, "v_pk_lshlrev_b16"),
  874    (0x05, "v_pk_lshrrev_b16"),
  875    (0x06, "v_pk_ashrrev_i16"),
  876    (0x07, "v_pk_max_i16"),
  877    (0x08, "v_pk_min_i16"),
  878    (0x09, "v_pk_mad_u16"),
  879    (0x0a, "v_pk_add_u16"),
  880    (0x0b, "v_pk_sub_u16"),
  881    (0x0c, "v_pk_max_u16"),
  882    (0x0d, "v_pk_min_u16"),
  883    (0x0e, "v_pk_fma_f16"),
  884    (0x0f, "v_pk_add_f16"),
  885    (0x10, "v_pk_mul_f16"),
  886    (0x11, "v_pk_min_f16"),
  887    (0x12, "v_pk_max_f16"),
  888    (0x20, "v_pk_fma_mix_f32"), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
  889    (0x21, "v_pk_fma_mixlo_f16"), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
  890    (0x22, "v_pk_fma_mixhi_f16"), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
  891 }
  892 # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
  893 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
  894 for (code, name) in VOPP:
  895    opcode(name, -1, code, code, Format.VOP3P)
  896 
  897 
  898 # VINTERP instructions: 
  899 VINTRP = {
  900    (0x00, "v_interp_p1_f32"),
  901    (0x01, "v_interp_p2_f32"),
  902    (0x02, "v_interp_mov_f32"),
  903 }
  904 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
  905 for (code, name) in VINTRP:
  906    opcode(name, code, code, code, Format.VINTRP)
  907 
  908 # VOP3 instructions: 3 inputs, 1 output
  909 # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
  910 VOP3 = {
  911    (0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True),
  912    (0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True),
  913    (0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False),
  914    (0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False),
  915    (0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True),
  916    (0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True),
  917    (0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True),
  918    (0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True),
  919    (0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),
  920    (0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),
  921    (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),
  922    (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True),
  923    (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True),
  924    (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),
  925    (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),
  926    (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),
  927    (0x150, 0x150,    -1,    -1, 0x150, "v_mullit_f32", True, True),
  928    (0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True),
  929    (0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False),
  930    (0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False),
  931    (0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True),
  932    (0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False),
  933    (0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False),
  934    (0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True),
  935    (0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False),
  936    (0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False),
  937    (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False),
  938    (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False),
  939    (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False),
  940    (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False),
  941    (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),
  942    (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),
  943    (0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),
  944    (0x161, 0x161,    -1,    -1,    -1, "v_lshl_b64", False, False),
  945    (0x162, 0x162,    -1,    -1,    -1, "v_lshr_b64", False, False),
  946    (0x163, 0x163,    -1,    -1,    -1, "v_ashr_i64", False, False),
  947    (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True),
  948    (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True),
  949    (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True),
  950    (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True),
  951    (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True), # src1 can take input modifiers
  952    (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False),
  953    (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False),
  954    (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False), # identical to v_mul_lo_u32
  955    (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False),
  956    (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC
  957    (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True), # writes to VCC
  958    (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input
  959    (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True), # takes VCC input
  960    (0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),
  961    (0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),
  962    (0x172,    -1,    -1,    -1,    -1, "v_qsad_u8", False, False), # what's the difference?
  963    (0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),
  964    (0x173,    -1,    -1,    -1,    -1, "v_mqsad_u8", False, False), # what's the difference?
  965    (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False),
  966    (   -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),
  967    (   -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False),
  968    (   -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False),
  969    (   -1,    -1, 0x1ea, 0x1ea,    -1, "v_mad_legacy_f16", True, True),
  970    (   -1,    -1, 0x1eb, 0x1eb,    -1, "v_mad_legacy_u16", False, False),
  971    (   -1,    -1, 0x1ec, 0x1ec,    -1, "v_mad_legacy_i16", False, False),
  972    (   -1,    -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),
  973    (   -1,    -1, 0x1ee, 0x1ee,    -1, "v_fma_legacy_f16", True, True),
  974    (   -1,    -1, 0x1ef, 0x1ef,    -1, "v_div_fixup_legacy_f16", True, True),
  975    (0x12c, 0x12c, 0x1f0, 0x1f0,    -1, "v_cvt_pkaccum_u8_f32", True, False),
  976    (   -1,    -1,    -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),
  977    (   -1,    -1,    -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False),
  978    (   -1,    -1,    -1, 0x1f3, 0x345, "v_xad_u32", False, False),
  979    (   -1,    -1,    -1, 0x1f4, 0x351, "v_min3_f16", True, True),
  980    (   -1,    -1,    -1, 0x1f5, 0x352, "v_min3_i16", False, False),
  981    (   -1,    -1,    -1, 0x1f6, 0x353, "v_min3_u16", False, False),
  982    (   -1,    -1,    -1, 0x1f7, 0x354, "v_max3_f16", True, True),
  983    (   -1,    -1,    -1, 0x1f8, 0x355, "v_max3_i16", False, False),
  984    (   -1,    -1,    -1, 0x1f9, 0x356, "v_max3_u16", False, False),
  985    (   -1,    -1,    -1, 0x1fa, 0x357, "v_med3_f16", True, True),
  986    (   -1,    -1,    -1, 0x1fb, 0x358, "v_med3_i16", False, False),
  987    (   -1,    -1,    -1, 0x1fc, 0x359, "v_med3_u16", False, False),
  988    (   -1,    -1,    -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False),
  989    (   -1,    -1,    -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False),
  990    (   -1,    -1,    -1, 0x1ff, 0x36d, "v_add3_u32", False, False),
  991    (   -1,    -1,    -1, 0x200, 0x36f, "v_lshl_or_b32", False, False),
  992    (   -1,    -1,    -1, 0x201, 0x371, "v_and_or_b32", False, False),
  993    (   -1,    -1,    -1, 0x202, 0x372, "v_or3_b32", False, False),
  994    (   -1,    -1,    -1, 0x203,    -1, "v_mad_f16", True, True),
  995    (   -1,    -1,    -1, 0x204, 0x340, "v_mad_u16", False, False),
  996    (   -1,    -1,    -1, 0x205, 0x35e, "v_mad_i16", False, False),
  997    (   -1,    -1,    -1, 0x206, 0x34b, "v_fma_f16", True, True),
  998    (   -1,    -1,    -1, 0x207, 0x35f, "v_div_fixup_f16", True, True),
  999    (   -1,    -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True),
 1000    (   -1,    -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True),
 1001    (   -1,    -1, 0x276, 0x276,    -1, "v_interp_p2_legacy_f16", True, True),
 1002    (   -1,    -1,    -1, 0x277, 0x35a, "v_interp_p2_f16", True, True),
 1003    (0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True),
 1004    (   -1,    -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False),
 1005    (   -1,    -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False),
 1006    (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),
 1007    (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),
 1008    (0x124, 0x124, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32", False, False),
 1009    (   -1,    -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False),
 1010    (   -1,    -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False),
 1011    (   -1,    -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False),
 1012    (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
 1013    (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
 1014    (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
 1015    (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
 1016    (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
 1017    (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
 1018    (   -1,    -1,    -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),
 1019    (   -1,    -1,    -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False),
 1020    (   -1,    -1,    -1, 0x29c, 0x37f, "v_add_i32", False, False),
 1021    (   -1,    -1,    -1, 0x29d, 0x376, "v_sub_i32", False, False),
 1022    (   -1,    -1,    -1, 0x29e, 0x30d, "v_add_i16", False, False),
 1023    (   -1,    -1,    -1, 0x29f, 0x30e, "v_sub_i16", False, False),
 1024    (   -1,    -1,    -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False),
 1025    (   -1,    -1,    -1,    -1, 0x178, "v_xor3_b32", False, False),
 1026    (   -1,    -1,    -1,    -1, 0x377, "v_permlane16_b32", False, False),
 1027    (   -1,    -1,    -1,    -1, 0x378, "v_permlanex16_b32", False, False),
 1028    (   -1,    -1,    -1,    -1, 0x30f, "v_add_co_u32_e64", False, False),
 1029    (   -1,    -1,    -1,    -1, 0x310, "v_sub_co_u32_e64", False, False),
 1030    (   -1,    -1,    -1,    -1, 0x319, "v_subrev_co_u32_e64", False, False),
 1031    (   -1,    -1,    -1,    -1, 0x303, "v_add_u16_e64", False, False),
 1032    (   -1,    -1,    -1,    -1, 0x304, "v_sub_u16_e64", False, False),
 1033    (   -1,    -1,    -1,    -1, 0x305, "v_mul_lo_u16_e64", False, False),
 1034    (   -1,    -1,    -1,    -1, 0x309, "v_max_u16_e64", False, False),
 1035    (   -1,    -1,    -1,    -1, 0x30a, "v_max_i16_e64", False, False),
 1036    (   -1,    -1,    -1,    -1, 0x30b, "v_min_u16_e64", False, False),
 1037    (   -1,    -1,    -1,    -1, 0x30c, "v_min_i16_e64", False, False),
 1038    (   -1,    -1,    -1,    -1, 0x307, "v_lshrrev_b16_e64", False, False),
 1039    (   -1,    -1,    -1,    -1, 0x308, "v_ashrrev_i16_e64", False, False),
 1040    (   -1,    -1,    -1,    -1, 0x314, "v_lshlrev_b16_e64", False, False),
 1041 }
 1042 for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
 1043    opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
 1044 
 1045 
 1046 # DS instructions: 3 inputs (1 addr, 2 data), 1 output
 1047 DS = {
 1048    (0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
 1049    (0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
 1050    (0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
 1051    (0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
 1052    (0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
 1053    (0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
 1054    (0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
 1055    (0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
 1056    (0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
 1057    (0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
 1058    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
 1059    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
 1060    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
 1061    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"),
 1062    (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"),
 1063    (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"),
 1064    (0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"),
 1065    (0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"),
 1066    (0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
 1067    (0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
 1068    (  -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
 1069    (  -1,   -1, 0x15, 0x15, 0x15, "ds_add_f32"),
 1070    (  -1,   -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"),
 1071    (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"),
 1072    (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"),
 1073    (0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
 1074    (0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
 1075    (0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
 1076    (0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
 1077    (0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
 1078    (0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
 1079    (0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
 1080    (0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
 1081    (0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
 1082    (0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
 1083    (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
 1084    (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
 1085    (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
 1086    (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"),
 1087    (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"),
 1088    (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"),
 1089    (0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"),
 1090    (0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"),
 1091    (0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
 1092    (0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
 1093    (  -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
 1094    (  -1,   -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"),
 1095    (0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"),
 1096    (0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"),
 1097    (0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"),
 1098    (0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"),
 1099    (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"),
 1100    (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"),
 1101    (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"),
 1102    (0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
 1103    (  -1,   -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"),
 1104    (  -1,   -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"),
 1105    (0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
 1106    (0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
 1107    (0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
 1108    (0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
 1109    (0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
 1110    (0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
 1111    (0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
 1112    (0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
 1113    (0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
 1114    (0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
 1115    (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
 1116    (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
 1117    (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
 1118    (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"),
 1119    (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"),
 1120    (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"),
 1121    (0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"),
 1122    (0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),
 1123    (0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
 1124    (0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
 1125    (  -1,   -1, 0x54, 0x54, 0xa0, "ds_write_b8_d16_hi"),
 1126    (  -1,   -1, 0x55, 0x55, 0xa1, "ds_write_b16_d16_hi"),
 1127    (  -1,   -1, 0x56, 0x56, 0xa2, "ds_read_u8_d16"),
 1128    (  -1,   -1, 0x57, 0x57, 0xa3, "ds_read_u8_d16_hi"),
 1129    (  -1,   -1, 0x58, 0x58, 0xa4, "ds_read_i8_d16"),
 1130    (  -1,   -1, 0x59, 0x59, 0xa5, "ds_read_i8_d16_hi"),
 1131    (  -1,   -1, 0x5a, 0x5a, 0xa6, "ds_read_u16_d16"),
 1132    (  -1,   -1, 0x5b, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
 1133    (0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
 1134    (0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
 1135    (0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
 1136    (0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
 1137    (0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
 1138    (0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
 1139    (0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
 1140    (0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
 1141    (0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
 1142    (0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
 1143    (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
 1144    (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
 1145    (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
 1146    (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"),
 1147    (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"),
 1148    (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"),
 1149    (0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"),
 1150    (0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"),
 1151    (0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
 1152    (0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
 1153    (0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"),
 1154    (0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"),
 1155    (0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"),
 1156    (  -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
 1157    (0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"),
 1158    (0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"),
 1159    (0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"),
 1160    (0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"),
 1161    (0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"),
 1162    (0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"),
 1163    (0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"),
 1164    (0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"),
 1165    (0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"),
 1166    (0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"),
 1167    (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"),
 1168    (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"),
 1169    (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"),
 1170    (0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"),
 1171    (0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"),
 1172    (  -1,   -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"),
 1173    (  -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"),
 1174    (0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"),
 1175    (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"),
 1176    (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"),
 1177    (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"),
 1178    (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"),
 1179    (  -1,   -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"),
 1180    (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"),
 1181    (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"),
 1182    (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"),
 1183    (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"),
 1184    (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"),
 1185    (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"),
 1186    (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"),
 1187    (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"),
 1188    (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"),
 1189    (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"),
 1190    (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"),
 1191    (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"),
 1192    (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"),
 1193    (0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"),
 1194    (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"),
 1195    (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"),
 1196    (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"),
 1197    (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"),
 1198    (  -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"),
 1199    (  -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"),
 1200    (  -1, 0xfd, 0xfd,   -1,   -1, "ds_condxchg32_rtn_b128"),
 1201    (  -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"),
 1202    (  -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
 1203 }
 1204 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:
 1205     opcode(name, gfx7, gfx9, gfx10, Format.DS)
 1206 
 1207 # MUBUF instructions:
 1208 MUBUF = {
 1209    (0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
 1210    (0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
 1211    (0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
 1212    (0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
 1213    (0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
 1214    (0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
 1215    (0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
 1216    (0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
 1217    (  -1,   -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"),
 1218    (  -1,   -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"),
 1219    (  -1,   -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"),
 1220    (  -1,   -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"),
 1221    (  -1,   -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"),
 1222    (  -1,   -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"),
 1223    (  -1,   -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"),
 1224    (  -1,   -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"),
 1225    (0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"),
 1226    (0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"),
 1227    (0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"),
 1228    (0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"),
 1229    (0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"),
 1230    (0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"),
 1231    (  -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"),
 1232    (0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"),
 1233    (0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
 1234    (  -1,   -1,   -1, 0x19, 0x19, "buffer_store_byte_d16_hi"),
 1235    (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"),
 1236    (  -1,   -1,   -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"),
 1237    (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"),
 1238    (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"),
 1239    (  -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"),
 1240    (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"),
 1241    (  -1,   -1,   -1, 0x20, 0x20, "buffer_load_ubyte_d16"),
 1242    (  -1,   -1,   -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
 1243    (  -1,   -1,   -1, 0x22, 0x22, "buffer_load_sbyte_d16"),
 1244    (  -1,   -1,   -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"),
 1245    (  -1,   -1,   -1, 0x24, 0x24, "buffer_load_short_d16"),
 1246    (  -1,   -1,   -1, 0x25, 0x25, "buffer_load_short_d16_hi"),
 1247    (  -1,   -1,   -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
 1248    (  -1,   -1,   -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
 1249    (  -1,   -1, 0x3d, 0x3d,   -1, "buffer_store_lds_dword"),
 1250    (0x71, 0x71, 0x3e, 0x3e,   -1, "buffer_wbinvl1"),
 1251    (0x70, 0x70, 0x3f, 0x3f,   -1, "buffer_wbinvl1_vol"),
 1252    (0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"),
 1253    (0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"),
 1254    (0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"),
 1255    (0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"),
 1256    (0x34,   -1,   -1,   -1,   -1, "buffer_atomic_rsub"),
 1257    (0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"),
 1258    (0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"),
 1259    (0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"),
 1260    (0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"),
 1261    (0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"),
 1262    (0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"),
 1263    (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"),
 1264    (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"),
 1265    (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"),
 1266    (0x3e, 0x3e,   -1,   -1, 0x3e, "buffer_atomic_fcmpswap"),
 1267    (0x3f, 0x3f,   -1,   -1, 0x3f, "buffer_atomic_fmin"),
 1268    (0x40, 0x40,   -1,   -1, 0x40, "buffer_atomic_fmax"),
 1269    (0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"),
 1270    (0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"),
 1271    (0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"),
 1272    (0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"),
 1273    (0x54,   -1,   -1,   -1,   -1, "buffer_atomic_rsub_x2"),
 1274    (0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"),
 1275    (0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"),
 1276    (0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"),
 1277    (0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"),
 1278    (0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"),
 1279    (0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"),
 1280    (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"),
 1281    (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"),
 1282    (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"),
 1283    (0x5e, 0x5e,   -1,   -1, 0x5e, "buffer_atomic_fcmpswap_x2"),
 1284    (0x5f, 0x5f,   -1,   -1, 0x5f, "buffer_atomic_fmin_x2"),
 1285    (0x60, 0x60,   -1,   -1, 0x60, "buffer_atomic_fmax_x2"),
 1286    (  -1,   -1,   -1,   -1, 0x71, "buffer_gl0_inv"),
 1287    (  -1,   -1,   -1,   -1, 0x72, "buffer_gl1_inv"),
 1288 }
 1289 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
 1290     opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, is_atomic = "atomic" in name)
 1291 
 1292 MTBUF = {
 1293    (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
 1294    (0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
 1295    (0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
 1296    (0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
 1297    (0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
 1298    (0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
 1299    (0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
 1300    (0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
 1301    (  -1,   -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
 1302    (  -1,   -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
 1303    (  -1,   -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
 1304    (  -1,   -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
 1305    (  -1,   -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
 1306    (  -1,   -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
 1307    (  -1,   -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
 1308    (  -1,   -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
 1309 }
 1310 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:
 1311     opcode(name, gfx7, gfx9, gfx10, Format.MTBUF)
 1312 
 1313 
 1314 IMAGE = {
 1315    (0x00, "image_load"),
 1316    (0x01, "image_load_mip"),
 1317    (0x02, "image_load_pck"),
 1318    (0x03, "image_load_pck_sgn"),
 1319    (0x04, "image_load_mip_pck"),
 1320    (0x05, "image_load_mip_pck_sgn"),
 1321    (0x08, "image_store"),
 1322    (0x09, "image_store_mip"),
 1323    (0x0a, "image_store_pck"),
 1324    (0x0b, "image_store_mip_pck"),
 1325    (0x0e, "image_get_resinfo"),
 1326    (0x60, "image_get_lod"),
 1327 }
 1328 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
 1329 for (code, name) in IMAGE:
 1330    opcode(name, code, code, code, Format.MIMG)
 1331 
 1332 IMAGE_ATOMIC = {
 1333    (0x0f, 0x0f, 0x10, "image_atomic_swap"),
 1334    (0x10, 0x10, 0x11, "image_atomic_cmpswap"),
 1335    (0x11, 0x11, 0x12, "image_atomic_add"),
 1336    (0x12, 0x12, 0x13, "image_atomic_sub"),
 1337    (0x13,   -1,   -1, "image_atomic_rsub"),
 1338    (0x14, 0x14, 0x14, "image_atomic_smin"),
 1339    (0x15, 0x15, 0x15, "image_atomic_umin"),
 1340    (0x16, 0x16, 0x16, "image_atomic_smax"),
 1341    (0x17, 0x17, 0x17, "image_atomic_umax"),
 1342    (0x18, 0x18, 0x18, "image_atomic_and"),
 1343    (0x19, 0x19, 0x19, "image_atomic_or"),
 1344    (0x1a, 0x1a, 0x1a, "image_atomic_xor"),
 1345    (0x1b, 0x1b, 0x1b, "image_atomic_inc"),
 1346    (0x1c, 0x1c, 0x1c, "image_atomic_dec"),
 1347    (0x1d, 0x1d,   -1, "image_atomic_fcmpswap"),
 1348    (0x1e, 0x1e,   -1, "image_atomic_fmin"),
 1349    (0x1f, 0x1f,   -1, "image_atomic_fmax"),
 1350 }
 1351 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
 1352 # gfx7 and gfx10 opcodes are the same here
 1353 for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
 1354    opcode(name, gfx7, gfx89, gfx7, Format.MIMG, is_atomic = True)
 1355 
 1356 IMAGE_SAMPLE = {
 1357    (0x20, "image_sample"),
 1358    (0x21, "image_sample_cl"),
 1359    (0x22, "image_sample_d"),
 1360    (0x23, "image_sample_d_cl"),
 1361    (0x24, "image_sample_l"),
 1362    (0x25, "image_sample_b"),
 1363    (0x26, "image_sample_b_cl"),
 1364    (0x27, "image_sample_lz"),
 1365    (0x28, "image_sample_c"),
 1366    (0x29, "image_sample_c_cl"),
 1367    (0x2a, "image_sample_c_d"),
 1368    (0x2b, "image_sample_c_d_cl"),
 1369    (0x2c, "image_sample_c_l"),
 1370    (0x2d, "image_sample_c_b"),
 1371    (0x2e, "image_sample_c_b_cl"),
 1372    (0x2f, "image_sample_c_lz"),
 1373    (0x30, "image_sample_o"),
 1374    (0x31, "image_sample_cl_o"),
 1375    (0x32, "image_sample_d_o"),
 1376    (0x33, "image_sample_d_cl_o"),
 1377    (0x34, "image_sample_l_o"),
 1378    (0x35, "image_sample_b_o"),
 1379    (0x36, "image_sample_b_cl_o"),
 1380    (0x37, "image_sample_lz_o"),
 1381    (0x38, "image_sample_c_o"),
 1382    (0x39, "image_sample_c_cl_o"),
 1383    (0x3a, "image_sample_c_d_o"),
 1384    (0x3b, "image_sample_c_d_cl_o"),
 1385    (0x3c, "image_sample_c_l_o"),
 1386    (0x3d, "image_sample_c_b_o"),
 1387    (0x3e, "image_sample_c_b_cl_o"),
 1388    (0x3f, "image_sample_c_lz_o"),
 1389    (0x68, "image_sample_cd"),
 1390    (0x69, "image_sample_cd_cl"),
 1391    (0x6a, "image_sample_c_cd"),
 1392    (0x6b, "image_sample_c_cd_cl"),
 1393    (0x6c, "image_sample_cd_o"),
 1394    (0x6d, "image_sample_cd_cl_o"),
 1395    (0x6e, "image_sample_c_cd_o"),
 1396    (0x6f, "image_sample_c_cd_cl_o"),
 1397 }
 1398 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
 1399 for (code, name) in IMAGE_SAMPLE:
 1400    opcode(name, code, code, code, Format.MIMG)
 1401 
 1402 IMAGE_GATHER4 = {
 1403    (0x40, "image_gather4"),
 1404    (0x41, "image_gather4_cl"),
 1405    #(0x42, "image_gather4h"), VEGA only?
 1406    (0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
 1407    (0x45, "image_gather4_b"),
 1408    (0x46, "image_gather4_b_cl"),
 1409    (0x47, "image_gather4_lz"),
 1410    (0x48, "image_gather4_c"),
 1411    (0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
 1412    #(0x4a, "image_gather4h_pck"), VEGA only?
 1413    #(0x4b, "image_gather8h_pck"), VGEA only?
 1414    (0x4c, "image_gather4_c_l"),
 1415    (0x4d, "image_gather4_c_b"),
 1416    (0x4e, "image_gather4_c_b_cl"),
 1417    (0x4f, "image_gather4_c_lz"),
 1418    (0x50, "image_gather4_o"),
 1419    (0x51, "image_gather4_cl_o"),
 1420    (0x54, "image_gather4_l_o"),
 1421    (0x55, "image_gather4_b_o"),
 1422    (0x56, "image_gather4_b_cl_o"),
 1423    (0x57, "image_gather4_lz_o"),
 1424    (0x58, "image_gather4_c_o"),
 1425    (0x59, "image_gather4_c_cl_o"),
 1426    (0x5c, "image_gather4_c_l_o"),
 1427    (0x5d, "image_gather4_c_b_o"),
 1428    (0x5e, "image_gather4_c_b_cl_o"),
 1429    (0x5f, "image_gather4_c_lz_o"),
 1430 }
 1431 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
 1432 for (code, name) in IMAGE_GATHER4:
 1433    opcode(name, code, code, code, Format.MIMG)
 1434 
 1435 
 1436 FLAT = {
 1437    #GFX7, GFX8_9, GFX10
 1438    (0x08, 0x10, 0x08, "flat_load_ubyte"),
 1439    (0x09, 0x11, 0x09, "flat_load_sbyte"),
 1440    (0x0a, 0x12, 0x0a, "flat_load_ushort"),
 1441    (0x0b, 0x13, 0x0b, "flat_load_sshort"),
 1442    (0x0c, 0x14, 0x0c, "flat_load_dword"),
 1443    (0x0d, 0x15, 0x0d, "flat_load_dwordx2"),
 1444    (0x0f, 0x16, 0x0f, "flat_load_dwordx3"),
 1445    (0x0e, 0x17, 0x0e, "flat_load_dwordx4"),
 1446    (0x18, 0x18, 0x18, "flat_store_byte"),
 1447    (  -1, 0x19, 0x19, "flat_store_byte_d16_hi"),
 1448    (0x1a, 0x1a, 0x1a, "flat_store_short"),
 1449    (  -1, 0x1b, 0x1b, "flat_store_short_d16_hi"),
 1450    (0x1c, 0x1c, 0x1c, "flat_store_dword"),
 1451    (0x1d, 0x1d, 0x1d, "flat_store_dwordx2"),
 1452    (0x1f, 0x1e, 0x1f, "flat_store_dwordx3"),
 1453    (0x1e, 0x1f, 0x1e, "flat_store_dwordx4"),
 1454    (  -1, 0x20, 0x20, "flat_load_ubyte_d16"),
 1455    (  -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
 1456    (  -1, 0x22, 0x22, "flat_load_sbyte_d16"),
 1457    (  -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"),
 1458    (  -1, 0x24, 0x24, "flat_load_short_d16"),
 1459    (  -1, 0x25, 0x25, "flat_load_short_d16_hi"),
 1460    (0x30, 0x40, 0x30, "flat_atomic_swap"),
 1461    (0x31, 0x41, 0x31, "flat_atomic_cmpswap"),
 1462    (0x32, 0x42, 0x32, "flat_atomic_add"),
 1463    (0x33, 0x43, 0x33, "flat_atomic_sub"),
 1464    (0x35, 0x44, 0x35, "flat_atomic_smin"),
 1465    (0x36, 0x45, 0x36, "flat_atomic_umin"),
 1466    (0x37, 0x46, 0x37, "flat_atomic_smax"),
 1467    (0x38, 0x47, 0x38, "flat_atomic_umax"),
 1468    (0x39, 0x48, 0x39, "flat_atomic_and"),
 1469    (0x3a, 0x49, 0x3a, "flat_atomic_or"),
 1470    (0x3b, 0x4a, 0x3b, "flat_atomic_xor"),
 1471    (0x3c, 0x4b, 0x3c, "flat_atomic_inc"),
 1472    (0x3d, 0x4c, 0x3d, "flat_atomic_dec"),
 1473    (0x3e,   -1, 0x3e, "flat_atomic_fcmpswap"),
 1474    (0x3f,   -1, 0x3f, "flat_atomic_fmin"),
 1475    (0x40,   -1, 0x40, "flat_atomic_fmax"),
 1476    (0x50, 0x60, 0x50, "flat_atomic_swap_x2"),
 1477    (0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"),
 1478    (0x52, 0x62, 0x52, "flat_atomic_add_x2"),
 1479    (0x53, 0x63, 0x53, "flat_atomic_sub_x2"),
 1480    (0x55, 0x64, 0x55, "flat_atomic_smin_x2"),
 1481    (0x56, 0x65, 0x56, "flat_atomic_umin_x2"),
 1482    (0x57, 0x66, 0x57, "flat_atomic_smax_x2"),
 1483    (0x58, 0x67, 0x58, "flat_atomic_umax_x2"),
 1484    (0x59, 0x68, 0x59, "flat_atomic_and_x2"),
 1485    (0x5a, 0x69, 0x5a, "flat_atomic_or_x2"),
 1486    (0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"),
 1487    (0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"),
 1488    (0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"),
 1489    (0x5e,   -1, 0x5e, "flat_atomic_fcmpswap_x2"),
 1490    (0x5f,   -1, 0x5f, "flat_atomic_fmin_x2"),
 1491    (0x60,   -1, 0x60, "flat_atomic_fmax_x2"),
 1492 }
 1493 for (gfx7, gfx8, gfx10, name) in FLAT:
 1494     opcode(name, gfx7, gfx8, gfx10, Format.FLAT, is_atomic = "atomic" in name)
 1495 
 1496 GLOBAL = {
 1497    #GFX8_9, GFX10
 1498    (0x10, 0x08, "global_load_ubyte"),
 1499    (0x11, 0x09, "global_load_sbyte"),
 1500    (0x12, 0x0a, "global_load_ushort"),
 1501    (0x13, 0x0b, "global_load_sshort"),
 1502    (0x14, 0x0c, "global_load_dword"),
 1503    (0x15, 0x0d, "global_load_dwordx2"),
 1504    (0x16, 0x0f, "global_load_dwordx3"),
 1505    (0x17, 0x0e, "global_load_dwordx4"),
 1506    (0x18, 0x18, "global_store_byte"),
 1507    (0x19, 0x19, "global_store_byte_d16_hi"),
 1508    (0x1a, 0x1a, "global_store_short"),
 1509    (0x1b, 0x1b, "global_store_short_d16_hi"),
 1510    (0x1c, 0x1c, "global_store_dword"),
 1511    (0x1d, 0x1d, "global_store_dwordx2"),
 1512    (0x1e, 0x1f, "global_store_dwordx3"),
 1513    (0x1f, 0x1e, "global_store_dwordx4"),
 1514    (0x20, 0x20, "global_load_ubyte_d16"),
 1515    (0x21, 0x21, "global_load_ubyte_d16_hi"),
 1516    (0x22, 0x22, "global_load_sbyte_d16"),
 1517    (0x23, 0x23, "global_load_sbyte_d16_hi"),
 1518    (0x24, 0x24, "global_load_short_d16"),
 1519    (0x25, 0x25, "global_load_short_d16_hi"),
 1520    (0x40, 0x30, "global_atomic_swap"),
 1521    (0x41, 0x31, "global_atomic_cmpswap"),
 1522    (0x42, 0x32, "global_atomic_add"),
 1523    (0x43, 0x33, "global_atomic_sub"),
 1524    (0x44, 0x35, "global_atomic_smin"),
 1525    (0x45, 0x36, "global_atomic_umin"),
 1526    (0x46, 0x37, "global_atomic_smax"),
 1527    (0x47, 0x38, "global_atomic_umax"),
 1528    (0x48, 0x39, "global_atomic_and"),
 1529    (0x49, 0x3a, "global_atomic_or"),
 1530    (0x4a, 0x3b, "global_atomic_xor"),
 1531    (0x4b, 0x3c, "global_atomic_inc"),
 1532    (0x4c, 0x3d, "global_atomic_dec"),
 1533    (  -1, 0x3e, "global_atomic_fcmpswap"),
 1534    (  -1, 0x3f, "global_atomic_fmin"),
 1535    (  -1, 0x40, "global_atomic_fmax"),
 1536    (0x60, 0x50, "global_atomic_swap_x2"),
 1537    (0x61, 0x51, "global_atomic_cmpswap_x2"),
 1538    (0x62, 0x52, "global_atomic_add_x2"),
 1539    (0x63, 0x53, "global_atomic_sub_x2"),
 1540    (0x64, 0x55, "global_atomic_smin_x2"),
 1541    (0x65, 0x56, "global_atomic_umin_x2"),
 1542    (0x66, 0x57, "global_atomic_smax_x2"),
 1543    (0x67, 0x58, "global_atomic_umax_x2"),
 1544    (0x68, 0x59, "global_atomic_and_x2"),
 1545    (0x69, 0x5a, "global_atomic_or_x2"),
 1546    (0x6a, 0x5b, "global_atomic_xor_x2"),
 1547    (0x6b, 0x5c, "global_atomic_inc_x2"),
 1548    (0x6c, 0x5d, "global_atomic_dec_x2"),
 1549    (  -1, 0x5e, "global_atomic_fcmpswap_x2"),
 1550    (  -1, 0x5f, "global_atomic_fmin_x2"),
 1551    (  -1, 0x60, "global_atomic_fmax_x2"),
 1552 }
 1553 for (gfx8, gfx10, name) in GLOBAL:
 1554     opcode(name, -1, gfx8, gfx10, Format.GLOBAL, is_atomic = "atomic" in name)
 1555 
 1556 SCRATCH = {
 1557    #GFX8_9, GFX10
 1558    (0x10, 0x08, "scratch_load_ubyte"),
 1559    (0x11, 0x09, "scratch_load_sbyte"),
 1560    (0x12, 0x0a, "scratch_load_ushort"),
 1561    (0x13, 0x0b, "scratch_load_sshort"),
 1562    (0x14, 0x0c, "scratch_load_dword"),
 1563    (0x15, 0x0d, "scratch_load_dwordx2"),
 1564    (0x16, 0x0f, "scratch_load_dwordx3"),
 1565    (0x17, 0x0e, "scratch_load_dwordx4"),
 1566    (0x18, 0x18, "scratch_store_byte"),
 1567    (0x19, 0x19, "scratch_store_byte_d16_hi"),
 1568    (0x1a, 0x1a, "scratch_store_short"),
 1569    (0x1b, 0x1b, "scratch_store_short_d16_hi"),
 1570    (0x1c, 0x1c, "scratch_store_dword"),
 1571    (0x1d, 0x1d, "scratch_store_dwordx2"),
 1572    (0x1e, 0x1f, "scratch_store_dwordx3"),
 1573    (0x1f, 0x1e, "scratch_store_dwordx4"),
 1574    (0x20, 0x20, "scratch_load_ubyte_d16"),
 1575    (0x21, 0x21, "scratch_load_ubyte_d16_hi"),
 1576    (0x22, 0x22, "scratch_load_sbyte_d16"),
 1577    (0x23, 0x23, "scratch_load_sbyte_d16_hi"),
 1578    (0x24, 0x24, "scratch_load_short_d16"),
 1579    (0x25, 0x25, "scratch_load_short_d16_hi"),
 1580 }
 1581 for (gfx8, gfx10, name) in SCRATCH:
 1582     opcode(name, -1, gfx8, gfx10, Format.SCRATCH)
 1583 
 1584 # check for duplicate opcode numbers
 1585 for ver in ['gfx9', 'gfx10']:
 1586     op_to_name = {}
 1587     for op in opcodes.values():
 1588         if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:
 1589             continue
 1590 
 1591         num = getattr(op, 'opcode_' + ver)
 1592         if num == -1:
 1593             continue
 1594 
 1595         key = (op.format, num)
 1596 
 1597         if key in op_to_name:
 1598             # exceptions
 1599             names = set([op_to_name[key], op.name])
 1600             if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):
 1601                 continue
 1602 
 1603             print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
 1604             sys.exit(1)
 1605         else:
 1606             op_to_name[key] = op.name