"Fossies" - the Fresh Open Source Software Archive

Member "dmd-2.089.0/test/runnable/iasm64.d" (2 Nov 2019, 253072 Bytes) of package /linux/misc/dmd-2.089.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) D source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "iasm64.d": 2.088.1_vs_2.089.0.

    1 // PERMUTE_ARGS:
    2 
    3 // Copyright (c) 1999-2016 by The D Language Foundation
    4 // All Rights Reserved
    5 // written by Walter Bright
    6 // http://www.digitalmars.com
    7 
    8 import core.stdc.stdio;
    9 
   10 version (D_PIC)
   11 {
   12     int main() { return 0; }
   13 }
   14 else version (D_PIE)
   15 {
   16     int main() { return 0; }
   17 }
   18 else version (D_InlineAsm_X86_64)
   19 {
   20 
   21 struct M128 { int a,b,c,d; };
   22 struct M64 { int a,b; };
   23 
   24 /+
   25 __gshared byte b;
   26 __gshared short w;
   27 __gshared int i;
   28 __gshared long l;
   29 +/
   30 
   31 /****************************************************/
   32 
   33 void test1()
   34 {
   35     int foo;
   36     int bar;
   37     static const int x = 4;
   38 
   39     asm
   40     {
   41         align x;                ;
   42         mov EAX, __LOCAL_SIZE   ;
   43         mov foo[RBP], EAX       ;
   44     }
   45     assert(foo == 16); // stack must be 16 byte aligned
   46 }
   47 
   48 /****************************************************/
   49 
   50 void test2()
   51 {
   52     int foo;
   53     int bar;
   54 
   55     asm
   56     {
   57         even                    ;
   58         mov EAX,0               ;
   59         inc EAX                 ;
   60         mov foo[RBP], EAX       ;
   61     }
   62     assert(foo == 1);
   63 }
   64 
   65 /****************************************************/
   66 
   67 void test3()
   68 {
   69     int foo;
   70     int bar;
   71 
   72     asm
   73     {
   74         mov     EAX,5           ;
   75         jmp     $ + 2           ;
   76         dw      0xC0FF,0xC8FF   ;       // inc EAX, dec EAX
   77         mov     foo[RBP],EAX    ;
   78     }
   79     assert(foo == 4);
   80 }
   81 
   82 /****************************************************/
   83 
   84 void test4()
   85 {
   86     int foo;
   87     int bar;
   88 
   89     asm
   90     {
   91         xor     EAX,EAX         ;
   92         add     EAX,5           ;
   93         jne     L1              ;
   94         dw      0xC0FF,0xC8FF   ;       // inc EAX, dec EAX
   95 L1:
   96         dw      0xC8FF          ;
   97         mov     foo[RBP],EAX    ;
   98     }
   99     assert(foo == 4);
  100 }
  101 
  102 /****************************************************/
  103 
  104 void test5()
  105 {
  106     int foo;
  107     ubyte *p;
  108     ushort *w;
  109     uint *u;
  110     ulong *ul;
  111     float *f;
  112     double *d;
  113     real *e;
  114 
  115     static float fs = 1.1;
  116     static double ds = 1.2;
  117     static real es = 1.3;
  118 
  119     asm
  120     {
  121         call    L1              ;
  122         db      0xFF,0xC0;      ;       // inc EAX
  123         db      "abc"           ;
  124         ds      "def"           ;
  125         di      "ghi"           ;
  126         dl      0x12345678ABCDEF;
  127         df      1.1             ;
  128         dd      1.2             ;
  129         de      1.3             ;
  130 L1:
  131         pop     RBX             ;
  132         mov     p[RBP],RBX      ;
  133     }
  134     assert(p[0] == 0xFF);
  135     assert(p[1] == 0xC0);
  136     assert(p[2] == 'a');
  137     assert(p[3] == 'b');
  138     assert(p[4] == 'c');
  139     w = cast(ushort *)(p + 5);
  140     assert(w[0] == 'd');
  141     assert(w[1] == 'e');
  142     assert(w[2] == 'f');
  143     u = cast(uint *)(w + 3);
  144     assert(u[0] == 'g');
  145     assert(u[1] == 'h');
  146     assert(u[2] == 'i');
  147     ul = cast(ulong *)(u + 3);
  148     assert(ul[0] == 0x12345678ABCDEF);
  149     f = cast(float *)(ul + 1);
  150     assert(*f == fs);
  151     d = cast(double *)(f + 1);
  152     assert(*d == ds);
  153     e = cast(real *)(d + 1);
  154     assert(*e == es);
  155 }
  156 
  157 /****************************************************/
  158 
  159 void test6()
  160 {
  161     ubyte *p;
  162     static ubyte[] data =
  163     [
  164         0x8B, 0x01,             // mov  EAX,[RCX]
  165         0x8B, 0x04, 0x19,       // mov  EAX,[RBX][RCX]
  166         0x8B, 0x04, 0x4B,       // mov  EAX,[RCX*2][RBX]
  167         0x8B, 0x04, 0x5A,       // mov  EAX,[RBX*2][RDX]
  168         0x8B, 0x04, 0x8E,       // mov  EAX,[RCX*4][RSI]
  169         0x8B, 0x04, 0xF9,       // mov  EAX,[RDI*8][RCX]
  170 
  171         0x2B, 0x1C, 0x19,       // sub  EBX,[RBX][RCX]
  172         0x3B, 0x0C, 0x4B,       // cmp  ECX,[RCX*2][RBX]
  173         0x03, 0x14, 0x5A,       // add  EDX,[RBX*2][RDX]
  174         0x33, 0x34, 0x8E,       // xor  ESI,[RCX*4][RSI]
  175 
  176         0x29, 0x1C, 0x19,       // sub  [RBX][RCX],EBX
  177         0x39, 0x0C, 0x4B,       // cmp  [RCX*2][RBX],ECX
  178         0x01, 0x24, 0x5A,       // add  [RBX*2][RDX],ESP
  179         0x31, 0x2C, 0x8E,       // xor  [RCX*4][RSI],EBP
  180 
  181         0xA8, 0x03,                     // test AL,3
  182         0x66, 0xA9, 0x04, 0x00,         // test AX,4
  183         0xA9, 0x05, 0x00, 0x00, 0x00,   // test EAX,5
  184         0x85, 0x3C, 0xF9,               // test [RDI*8][RCX],EDI
  185     ];
  186     int i;
  187 
  188     asm
  189     {
  190         call    L1                      ;
  191 
  192         mov     EAX,[RCX]               ;
  193         mov     EAX,[RCX][RBX]          ;
  194         mov     EAX,[RCX*2][RBX]        ;
  195         mov     EAX,[RDX][RBX*2]        ;
  196         mov     EAX,[RCX*4][RSI]        ;
  197         mov     EAX,[RCX][RDI*8]        ;
  198 
  199         sub     EBX,[RCX][RBX]          ;
  200         cmp     ECX,[RCX*2][RBX]        ;
  201         add     EDX,[RDX][RBX*2]        ;
  202         xor     ESI,[RCX*4][RSI]        ;
  203 
  204         sub     [RCX][RBX],EBX          ;
  205         cmp     [RCX*2][RBX],ECX        ;
  206         add     [RDX][RBX*2],ESP        ;
  207         xor     [RCX*4][RSI],EBP        ;
  208 
  209         test    AL,3                    ;
  210         test    AX,4                    ;
  211         test    EAX,5                   ;
  212         test    [RCX][RDI*8],EDI        ;
  213 L1:
  214         pop     RBX                     ;
  215         mov     p[RBP],RBX              ;
  216     }
  217     for (i = 0; i < data.length; i++)
  218     {
  219         assert(p[i] == data[i]);
  220     }
  221 }
  222 
  223 /****************************************************/
  224 /+
  225 void test7()
  226 {
  227     ubyte *p;
  228     static ubyte[] data =
  229     [
  230         0x26,0xA1,0x24,0x13,0x00,0x00,          // mov  EAX,ES:[01324h]
  231         0x36,0x66,0xA1,0x78,0x56,0x00,0x00,     // mov  AX,SS:[05678h]
  232         0xA0,0x78,0x56,0x00,0x00,               // mov  AL,[05678h]
  233         0x2E,0x8A,0x25,0x78,0x56,0x00,0x00,     // mov  AH,CS:[05678h]
  234         0x64,0x8A,0x1D,0x78,0x56,0x00,0x00,     // mov  BL,FS:[05678h]
  235         0x65,0x8A,0x3D,0x78,0x56,0x00,0x00,     // mov  BH,GS:[05678h]
  236     ];
  237     int i;
  238 
  239     asm
  240     {
  241         call    L1                      ;
  242 
  243         mov     EAX,ES:[0x1324]         ;
  244         mov     AX,SS:[0x5678]          ;
  245         mov     AL,DS:[0x5678]          ;
  246         mov     AH,CS:[0x5678]          ;
  247         mov     BL,FS:[0x5678]          ;
  248         mov     BH,GS:[0x5678]          ;
  249 
  250 L1:                                     ;
  251         pop     RBX                     ;
  252         mov     p[RBP],RBX              ;
  253     }
  254     for (i = 0; i < data.length; i++)
  255     {
  256         assert(p[i] == data[i]);
  257     }
  258 }
  259 +/
  260 /****************************************************/
  261 
  262 void test8()
  263 {
  264     ubyte *p;
  265     static ubyte[] data =
  266     [
  267         0x8C,0xD0,              // mov  AX,SS
  268         0x8C,0xDB,              // mov  BX,DS
  269         0x8C,0xC1,              // mov  CX,ES
  270         0x8C,0xCA,              // mov  DX,CS
  271         0x8C,0xE6,              // mov  SI,FS
  272         0x8C,0xEF,              // mov  DI,GS
  273         0x8E,0xD0,              // mov  SS,AX
  274         0x8E,0xDB,              // mov  DS,BX
  275         0x8E,0xC1,              // mov  ES,CX
  276         0x8E,0xCA,              // mov  CS,DX
  277         0x8E,0xE6,              // mov  FS,SI
  278         0x8E,0xEF,              // mov  GS,DI
  279         0x0F,0x22,0xC0,         // mov  CR0,EAX
  280         0x0F,0x22,0xD3,         // mov  CR2,EBX
  281         0x0F,0x22,0xD9,         // mov  CR3,ECX
  282         0x0F,0x22,0xE2,         // mov  CR4,EDX
  283         0x0F,0x20,0xC0,         // mov  EAX,CR0
  284         0x0F,0x20,0xD3,         // mov  EBX,CR2
  285         0x0F,0x20,0xD9,         // mov  ECX,CR3
  286         0x0F,0x20,0xE2,         // mov  EDX,CR4
  287         0x0F,0x23,0xC0,         // mov  DR0,EAX
  288         0x0F,0x23,0xCE,         // mov  DR1,ESI
  289         0x0F,0x23,0xD3,         // mov  DR2,EBX
  290         0x0F,0x23,0xD9,         // mov  DR3,ECX
  291         0x0F,0x23,0xE2,         // mov  DR4,EDX
  292         0x0F,0x23,0xEF,         // mov  DR5,EDI
  293         0x0F,0x23,0xF4,         // mov  DR6,ESP
  294         0x0F,0x23,0xFD,         // mov  DR7,EBP
  295         0x0F,0x21,0xC4,         // mov  ESP,DR0
  296         0x0F,0x21,0xCD,         // mov  EBP,DR1
  297         0x0F,0x21,0xD0,         // mov  EAX,DR2
  298         0x0F,0x21,0xDB,         // mov  EBX,DR3
  299         0x0F,0x21,0xE1,         // mov  ECX,DR4
  300         0x0F,0x21,0xEA,         // mov  EDX,DR5
  301         0x0F,0x21,0xF6,         // mov  ESI,DR6
  302         0x0F,0x21,0xFF,         // mov  EDI,DR7
  303         0xA4,                   // movsb
  304         0x66,0xA5,              // movsw
  305         0xA5,                   // movsd
  306     ];
  307     int i;
  308 
  309     asm
  310     {
  311         call    L1                      ;
  312 
  313         mov     AX,SS                   ;
  314         mov     BX,DS                   ;
  315         mov     CX,ES                   ;
  316         mov     DX,CS                   ;
  317         mov     SI,FS                   ;
  318         mov     DI,GS                   ;
  319 
  320         mov     SS,AX                   ;
  321         mov     DS,BX                   ;
  322         mov     ES,CX                   ;
  323         mov     CS,DX                   ;
  324         mov     FS,SI                   ;
  325         mov     GS,DI                   ;
  326 
  327         mov     CR0,EAX                 ;
  328         mov     CR2,EBX                 ;
  329         mov     CR3,ECX                 ;
  330         mov     CR4,EDX                 ;
  331 
  332         mov     EAX,CR0                 ;
  333         mov     EBX,CR2                 ;
  334         mov     ECX,CR3                 ;
  335         mov     EDX,CR4                 ;
  336 
  337         mov     DR0,EAX                 ;
  338         mov     DR1,ESI                 ;
  339         mov     DR2,EBX                 ;
  340         mov     DR3,ECX                 ;
  341         mov     DR4,EDX                 ;
  342         mov     DR5,EDI                 ;
  343         mov     DR6,ESP                 ;
  344         mov     DR7,EBP                 ;
  345 
  346         mov     ESP,DR0                 ;
  347         mov     EBP,DR1                 ;
  348         mov     EAX,DR2                 ;
  349         mov     EBX,DR3                 ;
  350         mov     ECX,DR4                 ;
  351         mov     EDX,DR5                 ;
  352         mov     ESI,DR6                 ;
  353         mov     EDI,DR7                 ;
  354 
  355         movsb                           ;
  356         movsw                           ;
  357         movsd                           ;
  358 L1:                                     ;
  359         pop     RBX                     ;
  360         mov     p[RBP],RBX              ;
  361     }
  362     for (i = 0; i < data.length; i++)
  363     {
  364         assert(p[i] == data[i]);
  365     }
  366 }
  367 
  368 /****************************************************/
  369 
  370 void test9()
  371 {
  372     ubyte *p;
  373     static ubyte[] data =
  374     [
  375         0x67,0x66,0x8B,0x00,            // mov  AX,[BX+SI]
  376         0x67,0x66,0x8B,0x01,            // mov  AX,[BX+DI]
  377         0x67,0x66,0x8B,0x02,            // mov  AX,[BP+SI]
  378         0x67,0x66,0x8B,0x03,            // mov  AX,[BP+DI]
  379         0x67,0x66,0x8B,0x04,            // mov  AX,[SI]
  380         0x67,0x66,0x8B,0x05,            // mov  AX,[DI]
  381         0x66,0xB8,0xD2,0x04,            // mov  AX,04D2h
  382         0x67,0x66,0x8B,0x07,            // mov  AX,[BX]
  383         0x67,0x66,0x8B,0x40,0x01,       // mov  AX,1[BX+SI]
  384         0x67,0x66,0x8B,0x41,0x02,       // mov  AX,2[BX+DI]
  385         0x67,0x66,0x8B,0x42,0x03,       // mov  AX,3[BP+SI]
  386         0x67,0x66,0x8B,0x43,0x04,       // mov  AX,4[BP+DI]
  387         0x67,0x66,0x8B,0x44,0x05,       // mov  AX,5[SI]
  388         0x67,0x66,0x8B,0x45,0x06,       // mov  AX,6[DI]
  389         0x67,0x66,0x8B,0x43,0x07,       // mov  AX,7[BP+DI]
  390         0x67,0x66,0x8B,0x47,0x08,       // mov  AX,8[BX]
  391         0x67,0x8B,0x80,0x21,0x01,       // mov  EAX,0121h[BX+SI]
  392         0x67,0x66,0x8B,0x81,0x22,0x01,  // mov  AX,0122h[BX+DI]
  393         0x67,0x66,0x8B,0x82,0x43,0x23,  // mov  AX,02343h[BP+SI]
  394         0x67,0x66,0x8B,0x83,0x54,0x45,  // mov  AX,04554h[BP+DI]
  395         0x67,0x66,0x8B,0x84,0x45,0x66,  // mov  AX,06645h[SI]
  396         0x67,0x66,0x8B,0x85,0x36,0x12,  // mov  AX,01236h[DI]
  397         0x67,0x66,0x8B,0x86,0x67,0x45,  // mov  AX,04567h[BP]
  398         0x67,0x8A,0x87,0x08,0x01,       // mov  AL,0108h[BX]
  399     ];
  400     int i;
  401 
  402     asm
  403     {
  404         call    L1                      ;
  405 
  406         mov     AX,[BX+SI]              ;
  407         mov     AX,[BX+DI]              ;
  408         mov     AX,[BP+SI]              ;
  409         mov     AX,[BP+DI]              ;
  410         mov     AX,[SI]                 ;
  411 //      mov     AX,[DI]                 ; Internal error: backend/cod3.c 4652
  412         mov     AX,[1234]               ;
  413         mov     AX,[BX]                 ;
  414 
  415         mov     AX,1[BX+SI]             ;
  416         mov     AX,2[BX+DI]             ;
  417         mov     AX,3[BP+SI]             ;
  418         mov     AX,4[BP+DI]             ;
  419         mov     AX,5[SI]                ;
  420         mov     AX,6[DI]                ;
  421         mov     AX,7[DI+BP]             ;
  422         mov     AX,8[BX]                ;
  423 
  424         mov     EAX,0x121[BX+SI]        ;
  425         mov     AX,0x122[BX+DI]         ;
  426         mov     AX,0x2343[BP+SI]        ;
  427         mov     AX,0x4554[BP+DI]        ;
  428         mov     AX,0x6645[SI]           ;
  429         mov     AX,0x1236[DI]           ;
  430         mov     AX,0x4567[BP]           ;
  431         mov     AL,0x108[BX]            ;
  432 
  433 L1:                                     ;
  434         pop     RBX                     ;
  435         mov     p[RBP],RBX              ;
  436     }
  437     for (i = 0; i < data.length; i++)
  438     {
  439         assert(p[i] == data[i]);
  440     }
  441 }
  442 
  443 /****************************************************/
  444 
  445 shared int bar10 = 78;
  446 shared int[2] baz10;
  447 
  448 void test10()
  449 {
  450     ubyte *p;
  451     int foo;
  452     static ubyte[] data =
  453     [
  454     ];
  455     int i;
  456 
  457     asm
  458     {
  459         mov     bar10,0x12              ;
  460 //      mov     baz10,0x13              ;// does not compile, ( should it? )
  461         mov     int ptr baz10,0x13      ;// but this does
  462         mov     ESI,1                   ;
  463         mov     baz10[RSI*4],0x14       ;
  464     }
  465     assert(bar10 == 0x12);
  466     assert(baz10[0] == 0x13);
  467     assert(baz10[1] == 0x14);
  468 }
  469 
  470 /****************************************************/
  471 
  472 struct Foo11
  473 {
  474     int c;
  475     int a;
  476     int b;
  477 }
  478 
  479 void test11()
  480 {
  481     ubyte *p;
  482     int x1;
  483     int x2;
  484     int x3;
  485     int x4;
  486 
  487     asm
  488     {
  489         mov     x1,Foo11.a.sizeof       ;
  490         mov     x2,Foo11.b.offsetof     ;
  491         mov     x3,Foo11.sizeof         ;
  492         mov     x4,Foo11.sizeof + 7     ;
  493     }
  494     assert(x1 == int.sizeof);
  495     assert(x2 == 8);
  496     assert(x3 == 12);
  497     assert(x4 == 19);
  498 }
  499 
  500 /****************************************************/
  501 
  502 void test12()
  503 {
  504     ubyte *p;
  505     static ubyte[] data =
  506     [
  507         0x14,0x05,                      // adc  AL,5
  508         0x83,0xD0,0x14,                 // adc  EAX,014h
  509         0x80,0x55,0xF8,0x17,            // adc  byte ptr -8[RBP],017h
  510         0x83,0x55,0xFC,0x17,            // adc  dword ptr -4[RBP],017h
  511         0x81,0x55,0xFC,0x34,0x12,0x00,0x00,     // adc  dword ptr -4[RBP],01234h
  512         0x10,0x7D,0xF8,                 // adc  -8[RBP],BH
  513         0x11,0x5D,0xFC,                 // adc  -4[RBP],EBX
  514         0x12,0x5D,0xF8,                 // adc  BL,-8[RBP]
  515         0x13,0x55,0xFC,                 // adc  EDX,-4[RBP]
  516         0x04,0x05,                      // add  AL,5
  517         0x83,0xC0,0x14,                 // add  EAX,014h
  518         0x80,0x45,0xF8,0x17,            // add  byte ptr -8[RBP],017h
  519         0x83,0x45,0xFC,0x17,            // add  dword ptr -4[RBP],017h
  520         0x81,0x45,0xFC,0x34,0x12,0x00,0x00,     // add  dword ptr -4[RBP],01234h
  521         0x00,0x7D,0xF8,                 // add  -8[RBP],BH
  522         0x01,0x5D,0xFC,                 // add  -4[RBP],EBX
  523         0x02,0x5D,0xF8,                 // add  BL,-8[RBP]
  524         0x03,0x55,0xFC,                 // add  EDX,-4[RBP]
  525         0x24,0x05,                      // and  AL,5
  526         0x83,0xE0,0x14,                 // and  EAX,014h
  527         0x80,0x65,0xF8,0x17,            // and  byte ptr -8[RBP],017h
  528         0x83,0x65,0xFC,0x17,            // and  dword ptr -4[RBP],017h
  529         0x81,0x65,0xFC,0x34,0x12,0x00,0x00,     // and  dword ptr -4[RBP],01234h
  530         0x20,0x7D,0xF8,                 // and  -8[RBP],BH
  531         0x21,0x5D,0xFC,                 // and  -4[RBP],EBX
  532         0x22,0x5D,0xF8,                 // and  BL,-8[RBP]
  533         0x23,0x55,0xFC,                 // and  EDX,-4[RBP]
  534         0x3C,0x05,                      // cmp  AL,5
  535         0x83,0xF8,0x14,                 // cmp  EAX,014h
  536         0x80,0x7D,0xF8,0x17,            // cmp  byte ptr -8[RBP],017h
  537         0x83,0x7D,0xFC,0x17,            // cmp  dword ptr -4[RBP],017h
  538         0x81,0x7D,0xFC,0x34,0x12,0x00,0x00,     // cmp  dword ptr -4[RBP],01234h
  539         0x38,0x7D,0xF8,                 // cmp  -8[RBP],BH
  540         0x39,0x5D,0xFC,                 // cmp  -4[RBP],EBX
  541         0x3A,0x5D,0xF8,                 // cmp  BL,-8[RBP]
  542         0x3B,0x55,0xFC,                 // cmp  EDX,-4[RBP]
  543         0x0C,0x05,                      // or   AL,5
  544         0x83,0xC8,0x14,                 // or   EAX,014h
  545         0x80,0x4D,0xF8,0x17,            // or   byte ptr -8[RBP],017h
  546         0x83,0x4D,0xFC,0x17,            // or   dword ptr -4[RBP],017h
  547         0x81,0x4D,0xFC,0x34,0x12,0x00,0x00,     // or   dword ptr -4[RBP],01234h
  548         0x08,0x7D,0xF8,                 // or   -8[RBP],BH
  549         0x09,0x5D,0xFC,                 // or   -4[RBP],EBX
  550         0x0A,0x5D,0xF8,                 // or   BL,-8[RBP]
  551         0x0B,0x55,0xFC,                 // or   EDX,-4[RBP]
  552         0x1C,0x05,                      // sbb  AL,5
  553         0x83,0xD8,0x14,                 // sbb  EAX,014h
  554         0x80,0x5D,0xF8,0x17,            // sbb  byte ptr -8[RBP],017h
  555         0x83,0x5D,0xFC,0x17,            // sbb  dword ptr -4[RBP],017h
  556         0x81,0x5D,0xFC,0x34,0x12,0x00,0x00,     // sbb  dword ptr -4[RBP],01234h
  557         0x18,0x7D,0xF8,                 // sbb  -8[RBP],BH
  558         0x19,0x5D,0xFC,                 // sbb  -4[RBP],EBX
  559         0x1A,0x5D,0xF8,                 // sbb  BL,-8[RBP]
  560         0x1B,0x55,0xFC,                 // sbb  EDX,-4[RBP]
  561         0x2C,0x05,                      // sub  AL,5
  562         0x83,0xE8,0x14,                 // sub  EAX,014h
  563         0x80,0x6D,0xF8,0x17,            // sub  byte ptr -8[RBP],017h
  564         0x83,0x6D,0xFC,0x17,            // sub  dword ptr -4[RBP],017h
  565         0x81,0x6D,0xFC,0x34,0x12,0x00,0x00,     // sub  dword ptr -4[RBP],01234h
  566         0x28,0x7D,0xF8,                 // sub  -8[RBP],BH
  567         0x29,0x5D,0xFC,                 // sub  -4[RBP],EBX
  568         0x2A,0x5D,0xF8,                 // sub  BL,-8[RBP]
  569         0x2B,0x55,0xFC,                 // sub  EDX,-4[RBP]
  570         0xA8,0x05,                      // test AL,5
  571         0xA9,0x14,0x00,0x00,0x00,       // test EAX,014h
  572         0xF6,0x45,0xF8,0x17,            // test byte ptr -8[RBP],017h
  573         0xF7,0x45,0xFC,0x17,0x00,0x00,0x00,     // test dword ptr -4[RBP],017h
  574         0xF7,0x45,0xFC,0x34,0x12,0x00,0x00,     // test dword ptr -4[RBP],01234h
  575         0x84,0x7D,0xF8,                 // test -8[RBP],BH
  576         0x85,0x5D,0xFC,                 // test -4[RBP],EBX
  577         0x34,0x05,                      // xor  AL,5
  578         0x83,0xF0,0x14,                 // xor  EAX,014h
  579         0x80,0x75,0xF8,0x17,            // xor  byte ptr -8[RBP],017h
  580         0x83,0x75,0xFC,0x17,            // xor  dword ptr -4[RBP],017h
  581         0x81,0x75,0xFC,0x34,0x12,0x00,0x00,     // xor  dword ptr -4[RBP],01234h
  582         0x30,0x7D,0xF8,                 // xor  -8[RBP],BH
  583         0x31,0x5D,0xFC,                 // xor  -4[RBP],EBX
  584         0x32,0x5D,0xF8,                 // xor  BL,-8[RBP]
  585         0x33,0x55,0xFC,                 // xor  EDX,-4[RBP]
  586     ];
  587     int i;
  588     int padding;
  589     byte rm8;
  590     int rm32;
  591     static int m32;
  592 
  593     asm
  594     {
  595         call    L1                      ;
  596         /*
  597         aaa                             ;
  598         aad                             ;
  599         aam                             ;
  600         aas                             ;
  601         arpl    [SI],DI                 ;
  602         */
  603 
  604         adc     AL,5                    ;
  605         adc     EAX,20                  ;
  606         adc     rm8[RBP],23             ;
  607         adc     rm32[RBP],23            ;
  608         adc     rm32[RBP],0x1234        ;
  609         adc     rm8[RBP],BH             ;
  610         adc     rm32[RBP],EBX           ;
  611         adc     BL,rm8[RBP]             ;
  612         adc     EDX,rm32[RBP]           ;
  613 
  614         add     AL,5                    ;
  615         add     EAX,20                  ;
  616         add     rm8[RBP],23             ;
  617         add     rm32[RBP],23            ;
  618         add     rm32[RBP],0x1234        ;
  619         add     rm8[RBP],BH             ;
  620         add     rm32[RBP],EBX           ;
  621         add     BL,rm8[RBP]             ;
  622         add     EDX,rm32[RBP]           ;
  623 
  624         and     AL,5                    ;
  625         and     EAX,20                  ;
  626         and     rm8[RBP],23             ;
  627         and     rm32[RBP],23            ;
  628         and     rm32[RBP],0x1234        ;
  629         and     rm8[RBP],BH             ;
  630         and     rm32[RBP],EBX           ;
  631         and     BL,rm8[RBP]             ;
  632         and     EDX,rm32[RBP]           ;
  633 
  634         cmp     AL,5                    ;
  635         cmp     EAX,20                  ;
  636         cmp     rm8[RBP],23             ;
  637         cmp     rm32[RBP],23            ;
  638         cmp     rm32[RBP],0x1234        ;
  639         cmp     rm8[RBP],BH             ;
  640         cmp     rm32[RBP],EBX           ;
  641         cmp     BL,rm8[RBP]             ;
  642         cmp     EDX,rm32[RBP]           ;
  643 
  644         or      AL,5                    ;
  645         or      EAX,20                  ;
  646         or      rm8[RBP],23             ;
  647         or      rm32[RBP],23            ;
  648         or      rm32[RBP],0x1234        ;
  649         or      rm8[RBP],BH             ;
  650         or      rm32[RBP],EBX           ;
  651         or      BL,rm8[RBP]             ;
  652         or      EDX,rm32[RBP]           ;
  653 
  654         sbb     AL,5                    ;
  655         sbb     EAX,20                  ;
  656         sbb     rm8[RBP],23             ;
  657         sbb     rm32[RBP],23            ;
  658         sbb     rm32[RBP],0x1234        ;
  659         sbb     rm8[RBP],BH             ;
  660         sbb     rm32[RBP],EBX           ;
  661         sbb     BL,rm8[RBP]             ;
  662         sbb     EDX,rm32[RBP]           ;
  663 
  664         sub     AL,5                    ;
  665         sub     EAX,20                  ;
  666         sub     rm8[RBP],23             ;
  667         sub     rm32[RBP],23            ;
  668         sub     rm32[RBP],0x1234        ;
  669         sub     rm8[RBP],BH             ;
  670         sub     rm32[RBP],EBX           ;
  671         sub     BL,rm8[RBP]             ;
  672         sub     EDX,rm32[RBP]           ;
  673 
  674         test    AL,5                    ;
  675         test    EAX,20                  ;
  676         test    rm8[RBP],23             ;
  677         test    rm32[RBP],23            ;
  678         test    rm32[RBP],0x1234        ;
  679         test    rm8[RBP],BH             ;
  680         test    rm32[RBP],EBX           ;
  681 
  682         xor     AL,5                    ;
  683         xor     EAX,20                  ;
  684         xor     rm8[RBP],23             ;
  685         xor     rm32[RBP],23            ;
  686         xor     rm32[RBP],0x1234        ;
  687         xor     rm8[RBP],BH             ;
  688         xor     rm32[RBP],EBX           ;
  689         xor     BL,rm8[RBP]             ;
  690         xor     EDX,rm32[RBP]           ;
  691 L1:                                     ;
  692         pop     RBX                     ;
  693         mov     p[RBP],RBX              ;
  694     }
  695     for (i = 0; i < data.length; i++)
  696     {
  697         //printf("p[%d] = x%02x, data = x%02x\n", i, p[i], data[i]);
  698         assert(p[i] == data[i]);
  699     }
  700 }
  701 
  702 /****************************************************/
  703 
  704 void test13()
  705 {
  706     int m32;
  707     long m64;
  708     M128 m128;
  709     ubyte *p;
  710     static ubyte[] data =
  711     [
  712         0x0F,0x0B,                      // ud2
  713         0x0F,0x05,                      // syscall
  714         0x0F,0x34,                      // sysenter
  715         0x0F,0x35,                      // sysexit
  716         0x0F,0x07,                      // sysret
  717         0x0F,0xAE,0xE8,                 // lfence
  718         0x0F,0xAE,0xF0,                 // mfence
  719         0x0F,0xAE,0xF8,                 // sfence
  720         0x0F,0xAE,0x00,                 // fxsave       [RAX]
  721         0x0F,0xAE,0x08,                 // fxrstor      [RAX]
  722         0x0F,0xAE,0x10,                 // ldmxcsr      [RAX]
  723         0x0F,0xAE,0x18,                 // stmxcsr      [RAX]
  724         0x0F,0xAE,0x38,                 // clflush      [RAX]
  725 
  726         0x0F,0x58,0x08,                 // addps        XMM1,[RAX]
  727         0x0F,0x58,0xCA,                 // addps        XMM1,XMM2
  728 0x66,   0x0F,0x58,0x03,                 // addpd        XMM0,[RBX]
  729 0x66,   0x0F,0x58,0xD1,                 // addpd        XMM2,XMM1
  730         0xF2,0x0F,0x58,0x08,            // addsd        XMM1,[RAX]
  731         0xF2,0x0F,0x58,0xCA,            // addsd        XMM1,XMM2
  732         0xF3,0x0F,0x58,0x2E,            // addss        XMM5,[RSI]
  733         0xF3,0x0F,0x58,0xF7,            // addss        XMM6,XMM7
  734         0x0F,0x54,0x08,                 // andps        XMM1,[RAX]
  735         0x0F,0x54,0xCA,                 // andps        XMM1,XMM2
  736 0x66,   0x0F,0x54,0x03,                 // andpd        XMM0,[RBX]
  737 0x66,   0x0F,0x54,0xD1,                 // andpd        XMM2,XMM1
  738         0x0F,0x55,0x08,                 // andnps       XMM1,[RAX]
  739         0x0F,0x55,0xCA,                 // andnps       XMM1,XMM2
  740 0x66,   0x0F,0x55,0x03,                 // andnpd       XMM0,[RBX]
  741 0x66,   0x0F,0x55,0xD1,                 // andnpd       XMM2,XMM1
  742         0xA7,                           // cmpsd
  743         0x0F,0xC2,0x08,0x01,            // cmpps        XMM1,[RAX],1
  744         0x0F,0xC2,0xCA,0x02,            // cmpps        XMM1,XMM2,2
  745 0x66,   0x0F,0xC2,0x03,0x03,            // cmppd        XMM0,[RBX],3
  746 0x66,   0x0F,0xC2,0xD1,0x04,            // cmppd        XMM2,XMM1,4
  747         0xF2,0x0F,0xC2,0x08,0x05,       // cmpsd        XMM1,[RAX],5
  748         0xF2,0x0F,0xC2,0xCA,0x06,       // cmpsd        XMM1,XMM2,6
  749         0xF3,0x0F,0xC2,0x2E,0x07,       // cmpss        XMM5,[RSI],7
  750         0xF3,0x0F,0xC2,0xF7,0x00,       // cmpss        XMM6,XMM7,0
  751 0x66,   0x0F,0x2F,0x08,                 // comisd       XMM1,[RAX]
  752 0x66,   0x0F,0x2F,0x4D,0xD8,            // comisd       XMM1,-028h[RBP]
  753 0x66,   0x0F,0x2F,0xCA,                 // comisd       XMM1,XMM2
  754         0x0F,0x2F,0x2E,                 // comiss       XMM5,[RSI]
  755         0x0F,0x2F,0xF7,                 // comiss       XMM6,XMM7
  756         0xF3,0x0F,0xE6,0xDC,            // cvtdq2pd     XMM3,XMM4
  757         0xF3,0x0F,0xE6,0x5D,0xD8,       // cvtdq2pd     XMM3,-028h[RBP]
  758         0x0F,0x5B,0xDC,                 // cvtdq2ps     XMM3,XMM4
  759         0x0F,0x5B,0x5D,0xE0,            // cvtdq2ps     XMM3,-020h[RBP]
  760         0xF2,0x0F,0xE6,0xDC,            // cvtpd2dq     XMM3,XMM4
  761         0xF2,0x0F,0xE6,0x5D,0xE0,       // cvtpd2dq     XMM3,-020h[RBP]
  762 0x66,   0x0F,0x2D,0xDC,                 // cvtpd2pi     MM3,XMM4
  763 0x66,   0x0F,0x2D,0x5D,0xE0,            // cvtpd2pi     MM3,-020h[RBP]
  764 0x66,   0x0F,0x5A,0xDC,                 // cvtpd2ps     XMM3,XMM4
  765 0x66,   0x0F,0x5A,0x5D,0xE0,            // cvtpd2ps     XMM3,-020h[RBP]
  766 0x66,   0x0F,0x2A,0xDC,                 // cvtpi2pd     XMM3,MM4
  767 0x66,   0x0F,0x2A,0x5D,0xD8,            // cvtpi2pd     XMM3,-028h[RBP]
  768         0x0F,0x2A,0xDC,                 // cvtpi2ps     XMM3,MM4
  769         0x0F,0x2A,0x5D,0xD8,            // cvtpi2ps     XMM3,-028h[RBP]
  770 0x66,   0x0F,0x5B,0xDC,                 // cvtps2dq     XMM3,XMM4
  771 0x66,   0x0F,0x5B,0x5D,0xE0,            // cvtps2dq     XMM3,-020h[RBP]
  772         0x0F,0x5A,0xDC,                 // cvtps2pd     XMM3,XMM4
  773         0x0F,0x5A,0x5D,0xD8,            // cvtps2pd     XMM3,-028h[RBP]
  774         0x0F,0x2D,0xDC,                 // cvtps2pi     MM3,XMM4
  775         0x0F,0x2D,0x5D,0xD8,            // cvtps2pi     MM3,-030h[RBP]
  776         0xF2,0x0F,0x2D,0xCC,            // cvtsd2si     XMM1,XMM4
  777         0xF2,0x0F,0x2D,0x55,0xD8,       // cvtsd2si     XMM2,-028h[RBP]
  778         0xF2,0x0F,0x5A,0xDC,            // cvtsd2ss     XMM3,XMM4
  779         0xF2,0x0F,0x5A,0x5D,0xD8,       // cvtsd2ss     XMM3,-028h[RBP]
  780         0xF2,0x0F,0x2A,0xDA,            // cvtsi2sd     XMM3,EDX
  781         0xF2,0x0F,0x2A,0x5D,0xD0,       // cvtsi2sd     XMM3,-030h[RBP]
  782         0xF3,0x0F,0x2A,0xDA,            // cvtsi2ss     XMM3,EDX
  783         0xF3,0x0F,0x2A,0x5D,0xD0,       // cvtsi2ss     XMM3,-030h[RBP]
  784         0xF3,0x0F,0x5A,0xDC,            // cvtss2sd     XMM3,XMM4
  785         0xF3,0x0F,0x5A,0x5D,0xD0,       // cvtss2sd     XMM3,-030h[RBP]
  786         0xF3,0x0F,0x2D,0xFC,            // cvtss2si     XMM7,XMM4
  787         0xF3,0x0F,0x2D,0x7D,0xD0,       // cvtss2si     XMM7,-030h[RBP]
  788 0x66,   0x0F,0x2C,0xDC,                 // cvttpd2pi    MM3,XMM4
  789 0x66,   0x0F,0x2C,0x7D,0xE0,            // cvttpd2pi    MM7,-020h[RBP]
  790 0x66,   0x0F,0xE6,0xDC,                 // cvttpd2dq    XMM3,XMM4
  791 0x66,   0x0F,0xE6,0x7D,0xE0,            // cvttpd2dq    XMM7,-020h[RBP]
  792         0xF3,0x0F,0x5B,0xDC,            // cvttps2dq    XMM3,XMM4
  793         0xF3,0x0F,0x5B,0x7D,0xE0,       // cvttps2dq    XMM7,-020h[RBP]
  794         0x0F,0x2C,0xDC,                 // cvttps2pi    MM3,XMM4
  795         0x0F,0x2C,0x7D,0xD8,            // cvttps2pi    MM7,-028h[RBP]
  796         0xF2,0x0F,0x2C,0xC4,            // cvttsd2si    EAX,XMM4
  797         0xF2,0x0F,0x2C,0x4D,0xE0,       // cvttsd2si    ECX,-020h[RBP]
  798         0xF3,0x0F,0x2C,0xC4,            // cvttss2si    EAX,XMM4
  799         0xF3,0x0F,0x2C,0x4D,0xD0,       // cvttss2si    ECX,-030h[RBP]
  800 0x66,   0x0F,0x5E,0xE8,                 // divpd        XMM5,XMM0
  801 0x66,   0x0F,0x5E,0x6D,0xE0,            // divpd        XMM5,-020h[RBP]
  802         0x0F,0x5E,0xE8,                 // divps        XMM5,XMM0
  803         0x0F,0x5E,0x6D,0xE0,            // divps        XMM5,-020h[RBP]
  804         0xF2,0x0F,0x5E,0xE8,            // divsd        XMM5,XMM0
  805         0xF2,0x0F,0x5E,0x6D,0xD8,       // divsd        XMM5,-028h[RBP]
  806         0xF3,0x0F,0x5E,0xE8,            // divss        XMM5,XMM0
  807         0xF3,0x0F,0x5E,0x6D,0xD0,       // divss        XMM5,-030h[RBP]
  808 0x66,   0x0F,0xF7,0xD1,                 // maskmovdqu   XMM2,XMM1
  809         0x0F,0xF7,0xE3,                 // maskmovq     MM4,MM3
  810 0x66,   0x0F,0x5F,0xC0,                 // maxpd        XMM0,XMM0
  811 0x66,   0x0F,0x5F,0x4D,0xE0,            // maxpd        XMM1,-020h[RBP]
  812         0x0F,0x5F,0xD1,                 // maxps        XMM2,XMM1
  813         0x0F,0x5F,0x5D,0xE0,            // maxps        XMM3,-020h[RBP]
  814         0xF2,0x0F,0x5F,0xE2,            // maxsd        XMM4,XMM2
  815         0xF2,0x0F,0x5F,0x6D,0xD8,       // maxsd        XMM5,-028h[RBP]
  816         0xF3,0x0F,0x5F,0xF3,            // maxss        XMM6,XMM3
  817         0xF3,0x0F,0x5F,0x7D,0xD0,       // maxss        XMM7,-030h[RBP]
  818 0x66,   0x0F,0x5D,0xC0,                 // minpd        XMM0,XMM0
  819 0x66,   0x0F,0x5D,0x4D,0xE0,            // minpd        XMM1,-020h[RBP]
  820         0x0F,0x5D,0xD1,                 // minps        XMM2,XMM1
  821         0x0F,0x5D,0x5D,0xE0,            // minps        XMM3,-020h[RBP]
  822         0xF2,0x0F,0x5D,0xE2,            // minsd        XMM4,XMM2
  823         0xF2,0x0F,0x5D,0x6D,0xD8,       // minsd        XMM5,-028h[RBP]
  824         0xF3,0x0F,0x5D,0xF3,            // minss        XMM6,XMM3
  825         0xF3,0x0F,0x5D,0x7D,0xD0,       // minss        XMM7,-030h[RBP]
  826 0x66,   0x0F,0x28,0xCA,                 // movapd       XMM1,XMM2
  827 0x66,   0x0F,0x28,0x5D,0xE0,            // movapd       XMM3,-020h[RBP]
  828 0x66,   0x0F,0x29,0x65,0xE0,            // movapd       -020h[RBP],XMM4
  829         0x0F,0x28,0xCA,                 // movaps       XMM1,XMM2
  830         0x0F,0x28,0x5D,0xE0,            // movaps       XMM3,-020h[RBP]
  831         0x0F,0x29,0x65,0xE0,            // movaps       -020h[RBP],XMM4
  832         0x0F,0x6E,0xCB,                 // movd         MM1,EBX
  833         0x0F,0x6E,0x55,0xD0,            // movd         MM2,-030h[RBP]
  834         0x0F,0x7E,0xDB,                 // movd         EBX,MM3
  835         0x0F,0x7E,0x65,0xD0,            // movd         -030h[RBP],MM4
  836 0x66,   0x0F,0x6E,0xCB,                 // movd         XMM1,EBX
  837 0x66,   0x0F,0x6E,0x55,0xD0,            // movd         XMM2,-030h[RBP]
  838 0x66,   0x0F,0x7E,0xDB,                 // movd         EBX,XMM3
  839 0x66,   0x0F,0x7E,0x65,0xD0,            // movd         -030h[RBP],XMM4
  840 0x66,   0x0F,0x6F,0xCA,                 // movdqa       XMM1,XMM2
  841 0x66,   0x0F,0x6F,0x55,0xE0,            // movdqa       XMM2,-020h[RBP]
  842 0x66,   0x0F,0x7F,0x65,0xE0,            // movdqa       -020h[RBP],XMM4
  843         0xF3,0x0F,0x6F,0xCA,            // movdqu       XMM1,XMM2
  844         0xF3,0x0F,0x6F,0x55,0xE0,       // movdqu       XMM2,-020h[RBP]
  845         0xF3,0x0F,0x7F,0x65,0xE0,       // movdqu       -020h[RBP],XMM4
  846         0xF2,0x0F,0xD6,0xDC,            // movdq2q      MM4,XMM3
  847         0x0F,0x12,0xDC,                 // movhlps      XMM4,XMM3
  848 0x66,   0x0F,0x16,0x55,0xD8,            // movhpd       XMM2,-028h[RBP]
  849 0x66,   0x0F,0x17,0x7D,0xD8,            // movhpd       -028h[RBP],XMM7
  850         0x0F,0x16,0x55,0xD8,            // movhps       XMM2,-028h[RBP]
  851         0x0F,0x17,0x7D,0xD8,            // movhps       -028h[RBP],XMM7
  852         0x0F,0x16,0xDC,                 // movlhps      XMM4,XMM3
  853 0x66,   0x0F,0x12,0x55,0xD8,            // movlpd       XMM2,-028h[RBP]
  854 0x66,   0x0F,0x13,0x7D,0xD8,            // movlpd       -028h[RBP],XMM7
  855         0x0F,0x12,0x55,0xD8,            // movlps       XMM2,-028h[RBP]
  856         0x0F,0x13,0x7D,0xD8,            // movlps       -028h[RBP],XMM7
  857 0x66,   0x0F,0x50,0xF3,                 // movmskpd     ESI,XMM3
  858         0x0F,0x50,0xF3,                 // movmskps     ESI,XMM3
  859 0x66,   0x0F,0x59,0xC0,                 // mulpd        XMM0,XMM0
  860 0x66,   0x0F,0x59,0x4D,0xE0,            // mulpd        XMM1,-020h[RBP]
  861         0x0F,0x59,0xD1,                 // mulps        XMM2,XMM1
  862         0x0F,0x59,0x5D,0xE0,            // mulps        XMM3,-020h[RBP]
  863         0xF2,0x0F,0x59,0xE2,            // mulsd        XMM4,XMM2
  864         0xF2,0x0F,0x59,0x6D,0xD8,       // mulsd        XMM5,-028h[RBP]
  865         0xF3,0x0F,0x59,0xF3,            // mulss        XMM6,XMM3
  866         0xF3,0x0F,0x59,0x7D,0xD0,       // mulss        XMM7,-030h[RBP]
  867 0x66,   0x0F,0x51,0xC4,                 // sqrtpd       XMM0,XMM4
  868 0x66,   0x0F,0x51,0x4D,0xE0,            // sqrtpd       XMM1,-020h[RBP]
  869         0x0F,0x51,0xD5,                 // sqrtps       XMM2,XMM5
  870         0x0F,0x51,0x5D,0xE0,            // sqrtps       XMM3,-020h[RBP]
  871         0xF2,0x0F,0x51,0xE6,            // sqrtsd       XMM4,XMM6
  872         0xF2,0x0F,0x51,0x6D,0xD8,       // sqrtsd       XMM5,-028h[RBP]
  873         0xF3,0x0F,0x51,0xF7,            // sqrtss       XMM6,XMM7
  874         0xF3,0x0F,0x51,0x7D,0xD0,       // sqrtss       XMM7,-030h[RBP]
  875 0x66,   0x0F,0x5C,0xC4,                 // subpd        XMM0,XMM4
  876 0x66,   0x0F,0x5C,0x4D,0xE0,            // subpd        XMM1,-020h[RBP]
  877         0x0F,0x5C,0xD5,                 // subps        XMM2,XMM5
  878         0x0F,0x5C,0x5D,0xE0,            // subps        XMM3,-020h[RBP]
  879         0xF2,0x0F,0x5C,0xE6,            // subsd        XMM4,XMM6
  880         0xF2,0x0F,0x5C,0x6D,0xD8,       // subsd        XMM5,-028h[RBP]
  881         0xF3,0x0F,0x5C,0xF7,            // subss        XMM6,XMM7
  882         0xF3,0x0F,0x5C,0x7D,0xD0,       // subss        XMM7,-030h[RBP]
  883         0x0F,0x01,0xE0,                 // smsw EAX
  884     ];
  885     int i;
  886 
  887     asm
  888     {
  889         call    L1                      ;
  890         ud2                             ;
  891         syscall                         ;
  892         sysenter                        ;
  893         sysexit                         ;
  894         sysret                          ;
  895         lfence                          ;
  896         mfence                          ;
  897         sfence                          ;
  898         fxsave  [RAX]                   ;
  899         fxrstor [RAX]                   ;
  900         ldmxcsr [RAX]                   ;
  901         stmxcsr [RAX]                   ;
  902         clflush [RAX]                   ;
  903 
  904         addps XMM1,[RAX]                ;
  905         addps XMM1,XMM2                 ;
  906         addpd XMM0,[RBX]                ;
  907         addpd XMM2,XMM1                 ;
  908         addsd XMM1,[RAX]                ;
  909         addsd XMM1,XMM2                 ;
  910         addss XMM5,[RSI]                ;
  911         addss XMM6,XMM7                 ;
  912 
  913         andps XMM1,[RAX]                ;
  914         andps XMM1,XMM2                 ;
  915         andpd XMM0,[RBX]                ;
  916         andpd XMM2,XMM1                 ;
  917 
  918         andnps XMM1,[RAX]               ;
  919         andnps XMM1,XMM2                ;
  920         andnpd XMM0,[RBX]               ;
  921         andnpd XMM2,XMM1                ;
  922 
  923         cmpsd                           ;
  924         cmpps XMM1,[RAX],1              ;
  925         cmpps XMM1,XMM2,2               ;
  926         cmppd XMM0,[RBX],3              ;
  927         cmppd XMM2,XMM1,4               ;
  928         cmpsd XMM1,[RAX],5              ;
  929         cmpsd XMM1,XMM2,6               ;
  930         cmpss XMM5,[RSI],7              ;
  931         cmpss XMM6,XMM7,0               ;
  932 
  933         comisd XMM1,[RAX]               ;
  934         comisd XMM1,m64[RBP]            ;
  935         comisd XMM1,XMM2                ;
  936         comiss XMM5,[RSI]               ;
  937         comiss XMM6,XMM7                ;
  938 
  939         cvtdq2pd XMM3,XMM4              ;
  940         cvtdq2pd XMM3,m64[RBP]          ;
  941 
  942         cvtdq2ps XMM3,XMM4              ;
  943         cvtdq2ps XMM3,m128[RBP]         ;
  944 
  945         cvtpd2dq XMM3,XMM4              ;
  946         cvtpd2dq XMM3,m128[RBP]         ;
  947 
  948         cvtpd2pi MM3,XMM4               ;
  949         cvtpd2pi MM3,m128[RBP]          ;
  950 
  951         cvtpd2ps XMM3,XMM4              ;
  952         cvtpd2ps XMM3,m128[RBP]         ;
  953 
  954         cvtpi2pd XMM3,MM4               ;
  955         cvtpi2pd XMM3,m64[RBP]          ;
  956 
  957         cvtpi2ps XMM3,MM4               ;
  958         cvtpi2ps XMM3,m64[RBP]          ;
  959 
  960         cvtps2dq XMM3,XMM4              ;
  961         cvtps2dq XMM3,m128[RBP]         ;
  962 
  963         cvtps2pd XMM3,XMM4              ;
  964         cvtps2pd XMM3,m64[RBP]          ;
  965 
  966         cvtps2pi MM3,XMM4               ;
  967         cvtps2pi MM3,m64[RBP]           ;
  968 
  969         cvtsd2si ECX,XMM4               ;
  970         cvtsd2si EDX,m64[RBP]           ;
  971 
  972         cvtsd2ss XMM3,XMM4              ;
  973         cvtsd2ss XMM3,m64[RBP]          ;
  974 
  975         cvtsi2sd XMM3,EDX               ;
  976         cvtsi2sd XMM3,m32[RBP]          ;
  977 
  978         cvtsi2ss XMM3,EDX               ;
  979         cvtsi2ss XMM3,m32[RBP]          ;
  980 
  981         cvtss2sd XMM3,XMM4              ;
  982         cvtss2sd XMM3,m32[RBP]          ;
  983 
  984         cvtss2si EDI,XMM4               ;
  985         cvtss2si EDI,m32[RBP]           ;
  986 
  987         cvttpd2pi MM3,XMM4              ;
  988         cvttpd2pi MM7,m128[RBP]         ;
  989 
  990         cvttpd2dq XMM3,XMM4             ;
  991         cvttpd2dq XMM7,m128[RBP]        ;
  992 
  993         cvttps2dq XMM3,XMM4             ;
  994         cvttps2dq XMM7,m128[RBP]        ;
  995 
  996         cvttps2pi MM3,XMM4              ;
  997         cvttps2pi MM7,m64[RBP]          ;
  998 
  999         cvttsd2si EAX,XMM4              ;
 1000         cvttsd2si ECX,m128[RBP]         ;
 1001 
 1002         cvttss2si EAX,XMM4              ;
 1003         cvttss2si ECX,m32[RBP]          ;
 1004 
 1005         divpd   XMM5,XMM0               ;
 1006         divpd   XMM5,m128[RBP]          ;
 1007         divps   XMM5,XMM0               ;
 1008         divps   XMM5,m128[RBP]          ;
 1009         divsd   XMM5,XMM0               ;
 1010         divsd   XMM5,m64[RBP]           ;
 1011         divss   XMM5,XMM0               ;
 1012         divss   XMM5,m32[RBP]           ;
 1013 
 1014         maskmovdqu XMM1,XMM2            ;
 1015         maskmovq   MM3,MM4              ;
 1016 
 1017         maxpd   XMM0,XMM0               ;
 1018         maxpd   XMM1,m128[RBP]          ;
 1019         maxps   XMM2,XMM1               ;
 1020         maxps   XMM3,m128[RBP]          ;
 1021         maxsd   XMM4,XMM2               ;
 1022         maxsd   XMM5,m64[RBP]           ;
 1023         maxss   XMM6,XMM3               ;
 1024         maxss   XMM7,m32[RBP]           ;
 1025 
 1026         minpd   XMM0,XMM0               ;
 1027         minpd   XMM1,m128[RBP]          ;
 1028         minps   XMM2,XMM1               ;
 1029         minps   XMM3,m128[RBP]          ;
 1030         minsd   XMM4,XMM2               ;
 1031         minsd   XMM5,m64[RBP]           ;
 1032         minss   XMM6,XMM3               ;
 1033         minss   XMM7,m32[RBP]           ;
 1034 
 1035         movapd  XMM1,XMM2               ;
 1036         movapd  XMM3,m128[RBP]          ;
 1037         movapd  m128[RBP],XMM4          ;
 1038 
 1039         movaps  XMM1,XMM2               ;
 1040         movaps  XMM3,m128[RBP]          ;
 1041         movaps  m128[RBP],XMM4          ;
 1042 
 1043         movd    MM1,EBX                 ;
 1044         movd    MM2,m32[RBP]            ;
 1045         movd    EBX,MM3                 ;
 1046         movd    m32[RBP],MM4            ;
 1047 
 1048         movd    XMM1,EBX                ;
 1049         movd    XMM2,m32[RBP]           ;
 1050         movd    EBX,XMM3                ;
 1051         movd    m32[RBP],XMM4           ;
 1052 
 1053         movdqa  XMM1,XMM2               ;
 1054         movdqa  XMM2,m128[RBP]          ;
 1055         movdqa  m128[RBP],XMM4          ;
 1056 
 1057         movdqu  XMM1,XMM2               ;
 1058         movdqu  XMM2,m128[RBP]          ;
 1059         movdqu  m128[RBP],XMM4          ;
 1060 
 1061         movdq2q MM3,XMM4                ;
 1062         movhlps XMM3,XMM4               ;
 1063         movhpd  XMM2,m64[RBP]           ;
 1064         movhpd  m64[RBP],XMM7           ;
 1065         movhps  XMM2,m64[RBP]           ;
 1066         movhps  m64[RBP],XMM7           ;
 1067         movlhps XMM3,XMM4               ;
 1068         movlpd  XMM2,m64[RBP]           ;
 1069         movlpd  m64[RBP],XMM7           ;
 1070         movlps  XMM2,m64[RBP]           ;
 1071         movlps  m64[RBP],XMM7           ;
 1072 
 1073         movmskpd ESI,XMM3               ;
 1074         movmskps ESI,XMM3               ;
 1075 
 1076         mulpd   XMM0,XMM0               ;
 1077         mulpd   XMM1,m128[RBP]          ;
 1078         mulps   XMM2,XMM1               ;
 1079         mulps   XMM3,m128[RBP]          ;
 1080         mulsd   XMM4,XMM2               ;
 1081         mulsd   XMM5,m64[RBP]           ;
 1082         mulss   XMM6,XMM3               ;
 1083         mulss   XMM7,m32[RBP]           ;
 1084 
 1085         sqrtpd  XMM0,XMM4               ;
 1086         sqrtpd  XMM1,m128[RBP]          ;
 1087         sqrtps  XMM2,XMM5               ;
 1088         sqrtps  XMM3,m128[RBP]          ;
 1089         sqrtsd  XMM4,XMM6               ;
 1090         sqrtsd  XMM5,m64[RBP]           ;
 1091         sqrtss  XMM6,XMM7               ;
 1092         sqrtss  XMM7,m32[RBP]           ;
 1093 
 1094         subpd   XMM0,XMM4               ;
 1095         subpd   XMM1,m128[RBP]          ;
 1096         subps   XMM2,XMM5               ;
 1097         subps   XMM3,m128[RBP]          ;
 1098         subsd   XMM4,XMM6               ;
 1099         subsd   XMM5,m64[RBP]           ;
 1100         subss   XMM6,XMM7               ;
 1101         subss   XMM7,m32[RBP]           ;
 1102 
 1103         smsw    EAX                     ;
 1104 
 1105 L1:                                     ;
 1106         pop     RBX                     ;
 1107         mov     p[RBP],RBX              ;
 1108     }
 1109     for (i = 0; i < data.length; i++)
 1110     {
 1111         //printf("[%d] = %02x %02x\n", i, p[i], data[i]);
 1112         assert(p[i] == data[i]);
 1113     }
 1114 }
 1115 
 1116 /****************************************************/
 1117 
 1118 void test14()
 1119 {
 1120     byte m8;
 1121     short m16;
 1122     int m32;
 1123     long m64;
 1124     M128 m128;
 1125     ubyte *p;
 1126     static ubyte[] data =
 1127     [
 1128 0x66,   0x0F,0x50,0xF3,                 // movmskpd     ESI,XMM3
 1129         0x0F,0x50,0xF3,                 // movmskps     ESI,XMM3
 1130 0x66,   0x0F,0xE7,0x55,0xE0,            // movntdq      -020h[RBP],XMM2
 1131         0x0F,0xC3,0x4D,0xD4,            // movnti       -02Ch[RBP],ECX
 1132 0x66,   0x0F,0x2B,0x5D,0xE0,            // movntpd      -020h[RBP],XMM3
 1133         0x0F,0x2B,0x65,0xE0,            // movntps      -020h[RBP],XMM4
 1134         0x0F,0xE7,0x6D,0xD8,            // movntq       -028h[RBP],MM5
 1135         0x0F,0x6F,0xCA,                 // movq         MM1,MM2
 1136         0x0F,0x6F,0x55,0xD8,            // movq         MM2,-028h[RBP]
 1137         0x0F,0x7F,0x5D,0xD8,            // movq         -028h[RBP],MM3
 1138         0xF3,0x0F,0x7E,0xCA,            // movq         XMM1,XMM2
 1139         0xF3,0x0F,0x7E,0x55,0xD8,       // movq         XMM2,-028h[RBP]
 1140 0x66,   0x0F,0xD6,0x5D,0xD8,            // movq         -028h[RBP],XMM3
 1141         0xF3,0x0F,0xD6,0xDA,            // movq2dq      XMM3,MM2
 1142         0xA5,                           // movsd
 1143         0xF2,0x0F,0x10,0xCA,            // movsd        XMM1,XMM2
 1144         0xF2,0x0F,0x10,0x5D,0xD8,       // movsd        XMM3,-028h[RBP]
 1145         0xF2,0x0F,0x11,0x65,0xD8,       // movsd        -028h[RBP],XMM4
 1146         0xF3,0x0F,0x10,0xCA,            // movss        XMM1,XMM2
 1147         0xF3,0x0F,0x10,0x5D,0xD4,       // movss        XMM3,-02Ch[RBP]
 1148         0xF3,0x0F,0x11,0x65,0xD4,       // movss        -02Ch[RBP],XMM4
 1149 0x66,   0x0F,0x10,0xCA,                 // movupd       XMM1,XMM2
 1150 0x66,   0x0F,0x10,0x5D,0xE0,            // movupd       XMM3,-020h[RBP]
 1151 0x66,   0x0F,0x11,0x65,0xE0,            // movupd       -020h[RBP],XMM4
 1152         0x0F,0x10,0xCA,                 // movups       XMM1,XMM2
 1153         0x0F,0x10,0x5D,0xE0,            // movups       XMM3,-020h[RBP]
 1154         0x0F,0x11,0x65,0xE0,            // movups       -020h[RBP],XMM4
 1155 0x66,   0x0F,0x56,0xCA,                 // orpd         XMM1,XMM2
 1156 0x66,   0x0F,0x56,0x5D,0xE0,            // orpd         XMM3,-020h[RBP]
 1157         0x0F,0x56,0xCA,                 // orps         XMM1,XMM2
 1158         0x0F,0x56,0x5D,0xE0,            // orps         XMM3,-020h[RBP]
 1159         0x0F,0x63,0xCA,                 // packsswb     MM1,MM2
 1160         0x0F,0x63,0x5D,0xD8,            // packsswb     MM3,-028h[RBP]
 1161 0x66,   0x0F,0x63,0xCA,                 // packsswb     XMM1,XMM2
 1162 0x66,   0x0F,0x63,0x5D,0xE0,            // packsswb     XMM3,-020h[RBP]
 1163         0x0F,0x6B,0xCA,                 // packssdw     MM1,MM2
 1164         0x0F,0x6B,0x5D,0xD8,            // packssdw     MM3,-028h[RBP]
 1165 0x66,   0x0F,0x6B,0xCA,                 // packssdw     XMM1,XMM2
 1166 0x66,   0x0F,0x6B,0x5D,0xE0,            // packssdw     XMM3,-020h[RBP]
 1167         0x0F,0x67,0xCA,                 // packuswb     MM1,MM2
 1168         0x0F,0x67,0x5D,0xD8,            // packuswb     MM3,-028h[RBP]
 1169 0x66,   0x0F,0x67,0xCA,                 // packuswb     XMM1,XMM2
 1170 0x66,   0x0F,0x67,0x5D,0xE0,            // packuswb     XMM3,-020h[RBP]
 1171         0x0F,0xFC,0xCA,                 // paddb        MM1,MM2
 1172         0x0F,0xFC,0x5D,0xD8,            // paddb        MM3,-028h[RBP]
 1173 0x66,   0x0F,0xFC,0xCA,                 // paddb        XMM1,XMM2
 1174 0x66,   0x0F,0xFC,0x5D,0xE0,            // paddb        XMM3,-020h[RBP]
 1175         0x0F,0xFD,0xCA,                 // paddw        MM1,MM2
 1176         0x0F,0xFD,0x5D,0xD8,            // paddw        MM3,-028h[RBP]
 1177 0x66,   0x0F,0xFD,0xCA,                 // paddw        XMM1,XMM2
 1178 0x66,   0x0F,0xFD,0x5D,0xE0,            // paddw        XMM3,-020h[RBP]
 1179         0x0F,0xFE,0xCA,                 // paddd        MM1,MM2
 1180         0x0F,0xFE,0x5D,0xD8,            // paddd        MM3,-028h[RBP]
 1181 0x66,   0x0F,0xFE,0xCA,                 // paddd        XMM1,XMM2
 1182 0x66,   0x0F,0xFE,0x5D,0xE0,            // paddd        XMM3,-020h[RBP]
 1183         0x0F,0xD4,0xCA,                 // paddq        MM1,MM2
 1184         0x0F,0xD4,0x5D,0xD8,            // paddq        MM3,-028h[RBP]
 1185 0x66,   0x0F,0xD4,0xCA,                 // paddq        XMM1,XMM2
 1186 0x66,   0x0F,0xD4,0x5D,0xE0,            // paddq        XMM3,-020h[RBP]
 1187         0x0F,0xEC,0xCA,                 // paddsb       MM1,MM2
 1188         0x0F,0xEC,0x5D,0xD8,            // paddsb       MM3,-028h[RBP]
 1189 0x66,   0x0F,0xEC,0xCA,                 // paddsb       XMM1,XMM2
 1190 0x66,   0x0F,0xEC,0x5D,0xE0,            // paddsb       XMM3,-020h[RBP]
 1191         0x0F,0xED,0xCA,                 // paddsw       MM1,MM2
 1192         0x0F,0xED,0x5D,0xD8,            // paddsw       MM3,-028h[RBP]
 1193 0x66,   0x0F,0xED,0xCA,                 // paddsw       XMM1,XMM2
 1194 0x66,   0x0F,0xED,0x5D,0xE0,            // paddsw       XMM3,-020h[RBP]
 1195         0x0F,0xDC,0xCA,                 // paddusb      MM1,MM2
 1196         0x0F,0xDC,0x5D,0xD8,            // paddusb      MM3,-028h[RBP]
 1197 0x66,   0x0F,0xDC,0xCA,                 // paddusb      XMM1,XMM2
 1198 0x66,   0x0F,0xDC,0x5D,0xE0,            // paddusb      XMM3,-020h[RBP]
 1199         0x0F,0xDD,0xCA,                 // paddusw      MM1,MM2
 1200         0x0F,0xDD,0x5D,0xD8,            // paddusw      MM3,-028h[RBP]
 1201 0x66,   0x0F,0xDD,0xCA,                 // paddusw      XMM1,XMM2
 1202 0x66,   0x0F,0xDD,0x5D,0xE0,            // paddusw      XMM3,-020h[RBP]
 1203         0x0F,0xDB,0xCA,                 // pand         MM1,MM2
 1204         0x0F,0xDB,0x5D,0xD8,            // pand         MM3,-028h[RBP]
 1205 0x66,   0x0F,0xDB,0xCA,                 // pand         XMM1,XMM2
 1206 0x66,   0x0F,0xDB,0x5D,0xE0,            // pand         XMM3,-020h[RBP]
 1207         0x0F,0xDF,0xCA,                 // pandn        MM1,MM2
 1208         0x0F,0xDF,0x5D,0xD8,            // pandn        MM3,-028h[RBP]
 1209 0x66,   0x0F,0xDF,0xCA,                 // pandn        XMM1,XMM2
 1210 0x66,   0x0F,0xDF,0x5D,0xE0,            // pandn        XMM3,-020h[RBP]
 1211         0x0F,0xE0,0xCA,                 // pavgb        MM1,MM2
 1212         0x0F,0xE0,0x5D,0xD8,            // pavgb        MM3,-028h[RBP]
 1213 0x66,   0x0F,0xE0,0xCA,                 // pavgb        XMM1,XMM2
 1214 0x66,   0x0F,0xE0,0x5D,0xE0,            // pavgb        XMM3,-020h[RBP]
 1215         0x0F,0xE3,0xCA,                 // pavgw        MM1,MM2
 1216         0x0F,0xE3,0x5D,0xD8,            // pavgw        MM3,-028h[RBP]
 1217 0x66,   0x0F,0xE3,0xCA,                 // pavgw        XMM1,XMM2
 1218 0x66,   0x0F,0xE3,0x5D,0xE0,            // pavgw        XMM3,-020h[RBP]
 1219         0x0F,0x74,0xCA,                 // pcmpeqb      MM1,MM2
 1220         0x0F,0x74,0x5D,0xD8,            // pcmpeqb      MM3,-028h[RBP]
 1221 0x66,   0x0F,0x74,0xCA,                 // pcmpeqb      XMM1,XMM2
 1222 0x66,   0x0F,0x74,0x5D,0xE0,            // pcmpeqb      XMM3,-020h[RBP]
 1223         0x0F,0x75,0xCA,                 // pcmpeqw      MM1,MM2
 1224         0x0F,0x75,0x5D,0xD8,            // pcmpeqw      MM3,-028h[RBP]
 1225 0x66,   0x0F,0x75,0xCA,                 // pcmpeqw      XMM1,XMM2
 1226 0x66,   0x0F,0x75,0x5D,0xE0,            // pcmpeqw      XMM3,-020h[RBP]
 1227         0x0F,0x76,0xCA,                 // pcmpeqd      MM1,MM2
 1228         0x0F,0x76,0x5D,0xD8,            // pcmpeqd      MM3,-028h[RBP]
 1229 0x66,   0x0F,0x76,0xCA,                 // pcmpeqd      XMM1,XMM2
 1230 0x66,   0x0F,0x76,0x5D,0xE0,            // pcmpeqd      XMM3,-020h[RBP]
 1231         0x0F,0x64,0xCA,                 // pcmpgtb      MM1,MM2
 1232         0x0F,0x64,0x5D,0xD8,            // pcmpgtb      MM3,-028h[RBP]
 1233 0x66,   0x0F,0x64,0xCA,                 // pcmpgtb      XMM1,XMM2
 1234 0x66,   0x0F,0x64,0x5D,0xE0,            // pcmpgtb      XMM3,-020h[RBP]
 1235         0x0F,0x65,0xCA,                 // pcmpgtw      MM1,MM2
 1236         0x0F,0x65,0x5D,0xD8,            // pcmpgtw      MM3,-028h[RBP]
 1237 0x66,   0x0F,0x65,0xCA,                 // pcmpgtw      XMM1,XMM2
 1238 0x66,   0x0F,0x65,0x5D,0xE0,            // pcmpgtw      XMM3,-020h[RBP]
 1239         0x0F,0x66,0xCA,                 // pcmpgtd      MM1,MM2
 1240         0x0F,0x66,0x5D,0xD8,            // pcmpgtd      MM3,-028h[RBP]
 1241 0x66,   0x0F,0x66,0xCA,                 // pcmpgtd      XMM1,XMM2
 1242 0x66,   0x0F,0x66,0x5D,0xE0,            // pcmpgtd      XMM3,-020h[RBP]
 1243         0x0F,0xC5,0xD6,0x07,            // pextrw       EDX,MM6,7
 1244 0x66,   0x0F,0xC5,0xD6,0x07,            // pextrw       EDX,XMM6,7
 1245         0x0F,0xC4,0xF2,0x07,            // pinsrw       MM6,EDX,7
 1246         0x0F,0xC4,0x75,0xD2,0x07,       // pinsrw       MM6,-02Eh[RBP],7
 1247 0x66,   0x0F,0xC4,0xF2,0x07,            // pinsrw       XMM6,EDX,7
 1248 0x66,   0x0F,0xC4,0x75,0xD2,0x07,       // pinsrw       XMM6,-02Eh[RBP],7
 1249         0x0F,0xF5,0xCA,                 // pmaddwd      MM1,MM2
 1250         0x0F,0xF5,0x5D,0xD8,            // pmaddwd      MM3,-028h[RBP]
 1251 0x66,   0x0F,0xF5,0xCA,                 // pmaddwd      XMM1,XMM2
 1252 0x66,   0x0F,0xF5,0x5D,0xE0,            // pmaddwd      XMM3,-020h[RBP]
 1253         0x0F,0xEE,0xCA,                 // pmaxsw       MM1,XMM2
 1254         0x0F,0xEE,0x5D,0xD8,            // pmaxsw       MM3,-028h[RBP]
 1255 0x66,   0x0F,0xEE,0xCA,                 // pmaxsw       XMM1,XMM2
 1256 0x66,   0x0F,0xEE,0x5D,0xE0,            // pmaxsw       XMM3,-020h[RBP]
 1257         0x0F,0xDE,0xCA,                 // pmaxub       MM1,XMM2
 1258         0x0F,0xDE,0x5D,0xD8,            // pmaxub       MM3,-028h[RBP]
 1259 0x66,   0x0F,0xDE,0xCA,                 // pmaxub       XMM1,XMM2
 1260 0x66,   0x0F,0xDE,0x5D,0xE0,            // pmaxub       XMM3,-020h[RBP]
 1261         0x0F,0xEA,0xCA,                 // pminsw       MM1,MM2
 1262         0x0F,0xEA,0x5D,0xD8,            // pminsw       MM3,-028h[RBP]
 1263 0x66,   0x0F,0xEA,0xCA,                 // pminsw       XMM1,XMM2
 1264 0x66,   0x0F,0xEA,0x5D,0xE0,            // pminsw       XMM3,-020h[RBP]
 1265         0x0F,0xDA,0xCA,                 // pminub       MM1,MM2
 1266         0x0F,0xDA,0x5D,0xD8,            // pminub       MM3,-028h[RBP]
 1267 0x66,   0x0F,0xDA,0xCA,                 // pminub       XMM1,XMM2
 1268 0x66,   0x0F,0xDA,0x5D,0xE0,            // pminub       XMM3,-020h[RBP]
 1269         0x0F,0xD7,0xC8,                 // pmovmskb     ECX,MM0
 1270 0x66,   0x0F,0xD7,0xCE,                 // pmovmskb     ECX,XMM6
 1271         0x0F,0xE4,0xCA,                 // pmulhuw      MM1,MM2
 1272         0x0F,0xE4,0x5D,0xD8,            // pmulhuw      MM3,-028h[RBP]
 1273 0x66,   0x0F,0xE4,0xCA,                 // pmulhuw      XMM1,XMM2
 1274 0x66,   0x0F,0xE4,0x5D,0xE0,            // pmulhuw      XMM3,-020h[RBP]
 1275         0x0F,0xE5,0xCA,                 // pmulhw       MM1,MM2
 1276         0x0F,0xE5,0x5D,0xD8,            // pmulhw       MM3,-028h[RBP]
 1277 0x66,   0x0F,0xE5,0xCA,                 // pmulhw       XMM1,XMM2
 1278 0x66,   0x0F,0xE5,0x5D,0xE0,            // pmulhw       XMM3,-020h[RBP]
 1279         0x0F,0xD5,0xCA,                 // pmullw       MM1,MM2
 1280         0x0F,0xD5,0x5D,0xD8,            // pmullw       MM3,-028h[RBP]
 1281 0x66,   0x0F,0xD5,0xCA,                 // pmullw       XMM1,XMM2
 1282 0x66,   0x0F,0xD5,0x5D,0xE0,            // pmullw       XMM3,-020h[RBP]
 1283         0x0F,0xF4,0xCA,                 // pmuludq      MM1,MM2
 1284         0x0F,0xF4,0x5D,0xD8,            // pmuludq      MM3,-028h[RBP]
 1285 0x66,   0x0F,0xF4,0xCA,                 // pmuludq      XMM1,XMM2
 1286 0x66,   0x0F,0xF4,0x5D,0xE0,            // pmuludq      XMM3,-020h[RBP]
 1287         0x0F,0xEB,0xCA,                 // por          MM1,MM2
 1288         0x0F,0xEB,0x5D,0xD8,            // por          MM3,-028h[RBP]
 1289 0x66,   0x0F,0xEB,0xCA,                 // por          XMM1,XMM2
 1290 0x66,   0x0F,0xEB,0x5D,0xE0,            // por          XMM3,-020h[RBP]
 1291         0x0F,0x18,0x4D,0xD0,            // prefetcht0   -030h[RBP]
 1292         0x0F,0x18,0x55,0xD0,            // prefetcht1   -030h[RBP]
 1293         0x0F,0x18,0x5D,0xD0,            // prefetcht2   -030h[RBP]
 1294         0x0F,0x18,0x45,0xD0,            // prefetchnta  -030h[RBP]
 1295         0x0F,0x0D,0x4D,0xD0,            // prefetchw    -030h[RBP]
 1296         0x0F,0x0D,0x55,0xD0,            // prefetchwt1  -030h[RBP]
 1297         0x0F,0xF6,0xCA,                 // psadbw       MM1,MM2
 1298         0x0F,0xF6,0x5D,0xD8,            // psadbw       MM3,-028h[RBP]
 1299 0x66,   0x0F,0xF6,0xCA,                 // psadbw       XMM1,XMM2
 1300 0x66,   0x0F,0xF6,0x5D,0xE0,            // psadbw       XMM3,-020h[RBP]
 1301 0x66,   0x0F,0x70,0xCA,0x03,            // pshufd       XMM1,XMM2,3
 1302 0x66,   0x0F,0x70,0x5D,0xE0,0x03,       // pshufd       XMM3,-020h[RBP],3
 1303         0xF3,0x0F,0x70,0xCA,0x03,       // pshufhw      XMM1,XMM2,3
 1304         0xF3,0x0F,0x70,0x5D,0xE0,0x03,  // pshufhw      XMM3,-020h[RBP],3
 1305         0xF2,0x0F,0x70,0xCA,0x03,       // pshuflw      XMM1,XMM2,3
 1306         0xF2,0x0F,0x70,0x5D,0xE0,0x03,  // pshuflw      XMM3,-020h[RBP],3
 1307         0x0F,0x70,0xCA,0x03,            // pshufw       MM1,MM2,3
 1308         0x0F,0x70,0x5D,0xD8,0x03,       // pshufw       MM3,-028h[RBP],3
 1309 0x66,   0x0F,0x73,0xF9,0x18,            // pslldq       XMM1,020h
 1310         0x0F,0xF1,0xCA,                 // psllw        MM1,MM2
 1311         0x0F,0xF1,0x4D,0xD8,            // psllw        MM1,-028h[RBP]
 1312 0x66,   0x0F,0xF1,0xCA,                 // psllw        XMM1,XMM2
 1313 0x66,   0x0F,0xF1,0x4D,0xE0,            // psllw        XMM1,-020h[RBP]
 1314         0x0F,0x71,0xF1,0x15,            // psraw        MM1,015h
 1315 0x66,   0x0F,0x71,0xF1,0x15,            // psraw        XMM1,015h
 1316         0x0F,0xF2,0xCA,                 // pslld        MM1,MM2
 1317         0x0F,0xF2,0x4D,0xD8,            // pslld        MM1,-028h[RBP]
 1318 0x66,   0x0F,0xF2,0xCA,                 // pslld        XMM1,XMM2
 1319 0x66,   0x0F,0xF2,0x4D,0xE0,            // pslld        XMM1,-020h[RBP]
 1320         0x0F,0x72,0xF1,0x15,            // psrad        MM1,015h
 1321 0x66,   0x0F,0x72,0xF1,0x15,            // psrad        XMM1,015h
 1322         0x0F,0xF3,0xCA,                 // psllq        MM1,MM2
 1323         0x0F,0xF3,0x4D,0xD8,            // psllq        MM1,-028h[RBP]
 1324 0x66,   0x0F,0xF3,0xCA,                 // psllq        XMM1,XMM2
 1325 0x66,   0x0F,0xF3,0x4D,0xE0,            // psllq        XMM1,-020h[RBP]
 1326         0x0F,0x73,0xF1,0x15,            // psllq        MM1,015h
 1327 0x66,   0x0F,0x73,0xF1,0x15,            // psllq        XMM1,015h
 1328         0x0F,0xE1,0xCA,                 // psraw        MM1,MM2
 1329         0x0F,0xE1,0x4D,0xD8,            // psraw        MM1,-028h[RBP]
 1330 0x66,   0x0F,0xE1,0xCA,                 // psraw        XMM1,XMM2
 1331 0x66,   0x0F,0xE1,0x4D,0xE0,            // psraw        XMM1,-020h[RBP]
 1332         0x0F,0x71,0xE1,0x15,            // psraw        MM1,015h
 1333 0x66,   0x0F,0x71,0xE1,0x15,            // psraw        XMM1,015h
 1334         0x0F,0xE2,0xCA,                 // psrad        MM1,MM2
 1335         0x0F,0xE2,0x4D,0xD8,            // psrad        MM1,-028h[RBP]
 1336 0x66,   0x0F,0xE2,0xCA,                 // psrad        XMM1,XMM2
 1337 0x66,   0x0F,0xE2,0x4D,0xE0,            // psrad        XMM1,-020h[RBP]
 1338         0x0F,0x72,0xE1,0x15,            // psrad        MM1,015h
 1339 0x66,   0x0F,0x72,0xE1,0x15,            // psrad        XMM1,015h
 1340 0x66,   0x0F,0x73,0xD9,0x18,            // psrldq       XMM1,020h
 1341         0x0F,0xD1,0xCA,                 // psrlw        MM1,MM2
 1342         0x0F,0xD1,0x4D,0xD8,            // psrlw        MM1,-028h[RBP]
 1343 0x66,   0x0F,0xD1,0xCA,                 // psrlw        XMM1,XMM2
 1344 0x66,   0x0F,0xD1,0x4D,0xE0,            // psrlw        XMM1,-020h[RBP]
 1345         0x0F,0x71,0xD1,0x15,            // psrlw        MM1,015h
 1346 0x66,   0x0F,0x71,0xD1,0x15,            // psrlw        XMM1,015h
 1347         0x0F,0xD2,0xCA,                 // psrld        MM1,MM2
 1348         0x0F,0xD2,0x4D,0xD8,            // psrld        MM1,-028h[RBP]
 1349 0x66,   0x0F,0xD2,0xCA,                 // psrld        XMM1,XMM2
 1350 0x66,   0x0F,0xD2,0x4D,0xE0,            // psrld        XMM1,-020h[RBP]
 1351         0x0F,0x72,0xD1,0x15,            // psrld        MM1,015h
 1352 0x66,   0x0F,0x72,0xD1,0x15,            // psrld        XMM1,015h
 1353         0x0F,0xD3,0xCA,                 // psrlq        MM1,MM2
 1354         0x0F,0xD3,0x4D,0xD8,            // psrlq        MM1,-028h[RBP]
 1355 0x66,   0x0F,0xD3,0xCA,                 // psrlq        XMM1,XMM2
 1356 0x66,   0x0F,0xD3,0x4D,0xE0,            // psrlq        XMM1,-020h[RBP]
 1357         0x0F,0x73,0xD1,0x15,            // psrlq        MM1,015h
 1358 0x66,   0x0F,0x73,0xD1,0x15,            // psrlq        XMM1,015h
 1359         0x0F,0xF8,0xCA,                 // psubb        MM1,MM2
 1360         0x0F,0xF8,0x4D,0xD8,            // psubb        MM1,-028h[RBP]
 1361 0x66,   0x0F,0xF8,0xCA,                 // psubb        XMM1,XMM2
 1362 0x66,   0x0F,0xF8,0x4D,0xE0,            // psubb        XMM1,-020h[RBP]
 1363         0x0F,0xF9,0xCA,                 // psubw        MM1,MM2
 1364         0x0F,0xF9,0x4D,0xD8,            // psubw        MM1,-028h[RBP]
 1365 0x66,   0x0F,0xF9,0xCA,                 // psubw        XMM1,XMM2
 1366 0x66,   0x0F,0xF9,0x4D,0xE0,            // psubw        XMM1,-020h[RBP]
 1367         0x0F,0xFA,0xCA,                 // psubd        MM1,MM2
 1368         0x0F,0xFA,0x4D,0xD8,            // psubd        MM1,-028h[RBP]
 1369 0x66,   0x0F,0xFA,0xCA,                 // psubd        XMM1,XMM2
 1370 0x66,   0x0F,0xFA,0x4D,0xE0,            // psubd        XMM1,-020h[RBP]
 1371         0x0F,0xFB,0xCA,                 // psubq        MM1,MM2
 1372         0x0F,0xFB,0x4D,0xD8,            // psubq        MM1,-028h[RBP]
 1373 0x66,   0x0F,0xFB,0xCA,                 // psubq        XMM1,XMM2
 1374 0x66,   0x0F,0xFB,0x4D,0xE0,            // psubq        XMM1,-020h[RBP]
 1375         0x0F,0xE8,0xCA,                 // psubsb       MM1,MM2
 1376         0x0F,0xE8,0x4D,0xD8,            // psubsb       MM1,-028h[RBP]
 1377 0x66,   0x0F,0xE8,0xCA,                 // psubsb       XMM1,XMM2
 1378 0x66,   0x0F,0xE8,0x4D,0xE0,            // psubsb       XMM1,-020h[RBP]
 1379         0x0F,0xE9,0xCA,                 // psubsw       MM1,MM2
 1380         0x0F,0xE9,0x4D,0xD8,            // psubsw       MM1,-028h[RBP]
 1381 0x66,   0x0F,0xE9,0xCA,                 // psubsw       XMM1,XMM2
 1382 0x66,   0x0F,0xE9,0x4D,0xE0,            // psubsw       XMM1,-020h[RBP]
 1383         0x0F,0xD8,0xCA,                 // psubusb      MM1,MM2
 1384         0x0F,0xD8,0x4D,0xD8,            // psubusb      MM1,-028h[RBP]
 1385 0x66,   0x0F,0xD8,0xCA,                 // psubusb      XMM1,XMM2
 1386 0x66,   0x0F,0xD8,0x4D,0xE0,            // psubusb      XMM1,-020h[RBP]
 1387         0x0F,0xD9,0xCA,                 // psubusw      MM1,MM2
 1388         0x0F,0xD9,0x4D,0xD8,            // psubusw      MM1,-028h[RBP]
 1389 0x66,   0x0F,0xD9,0xCA,                 // psubusw      XMM1,XMM2
 1390 0x66,   0x0F,0xD9,0x4D,0xE0,            // psubusw      XMM1,-020h[RBP]
 1391         0x0F,0x68,0xCA,                 // punpckhbw    MM1,MM2
 1392         0x0F,0x68,0x4D,0xD8,            // punpckhbw    MM1,-028h[RBP]
 1393 0x66,   0x0F,0x68,0xCA,                 // punpckhbw    XMM1,XMM2
 1394 0x66,   0x0F,0x68,0x4D,0xE0,            // punpckhbw    XMM1,-020h[RBP]
 1395         0x0F,0x69,0xCA,                 // punpckhwd    MM1,MM2
 1396         0x0F,0x69,0x4D,0xD8,            // punpckhwd    MM1,-028h[RBP]
 1397 0x66,   0x0F,0x69,0xCA,                 // punpckhwd    XMM1,XMM2
 1398 0x66,   0x0F,0x69,0x4D,0xE0,            // punpckhwd    XMM1,-020h[RBP]
 1399         0x0F,0x6A,0xCA,                 // punpckhdq    MM1,MM2
 1400         0x0F,0x6A,0x4D,0xD8,            // punpckhdq    MM1,-028h[RBP]
 1401 0x66,   0x0F,0x6A,0xCA,                 // punpckhdq    XMM1,XMM2
 1402 0x66,   0x0F,0x6A,0x4D,0xE0,            // punpckhdq    XMM1,-020h[RBP]
 1403 0x66,   0x0F,0x6D,0xCA,                 // punpckhqdq   XMM1,XMM2
 1404 0x66,   0x0F,0x6D,0x4D,0xE0,            // punpckhqdq   XMM1,-020h[RBP]
 1405         0x0F,0x60,0xCA,                 // punpcklbw    MM1,MM2
 1406         0x0F,0x60,0x4D,0xD8,            // punpcklbw    MM1,-028h[RBP]
 1407 0x66,   0x0F,0x60,0xCA,                 // punpcklbw    XMM1,XMM2
 1408 0x66,   0x0F,0x60,0x4D,0xE0,            // punpcklbw    XMM1,-020h[RBP]
 1409         0x0F,0x61,0xCA,                 // punpcklwd    MM1,MM2
 1410         0x0F,0x61,0x4D,0xD8,            // punpcklwd    MM1,-028h[RBP]
 1411 0x66,   0x0F,0x61,0xCA,                 // punpcklwd    XMM1,XMM2
 1412 0x66,   0x0F,0x61,0x4D,0xE0,            // punpcklwd    XMM1,-020h[RBP]
 1413         0x0F,0x62,0xCA,                 // punpckldq    MM1,MM2
 1414         0x0F,0x62,0x4D,0xD8,            // punpckldq    MM1,-028h[RBP]
 1415 0x66,   0x0F,0x62,0xCA,                 // punpckldq    XMM1,XMM2
 1416 0x66,   0x0F,0x62,0x4D,0xE0,            // punpckldq    XMM1,-020h[RBP]
 1417 0x66,   0x0F,0x6C,0xCA,                 // punpcklqdq   XMM1,XMM2
 1418 0x66,   0x0F,0x6C,0x4D,0xE0,            // punpcklqdq   XMM1,-020h[RBP]
 1419         0x0F,0xEF,0xCA,                 // pxor         MM1,MM2
 1420         0x0F,0xEF,0x4D,0xD8,            // pxor         MM1,-028h[RBP]
 1421 0x66,   0x0F,0xEF,0xCA,                 // pxor         XMM1,XMM2
 1422 0x66,   0x0F,0xEF,0x4D,0xE0,            // pxor         XMM1,-020h[RBP]
 1423         0x0F,0x53,0xCA,                 // rcpps        XMM1,XMM2
 1424         0x0F,0x53,0x4D,0xE0,            // rcpps        XMM1,-020h[RBP]
 1425         0xF3,0x0F,0x53,0xCA,            // rcpss        XMM1,XMM2
 1426         0xF3,0x0F,0x53,0x4D,0xD4,       // rcpss        XMM1,-02Ch[RBP]
 1427         0x0F,0x52,0xCA,                 // rsqrtps      XMM1,XMM2
 1428         0x0F,0x52,0x4D,0xE0,            // rsqrtps      XMM1,-020h[RBP]
 1429         0xF3,0x0F,0x52,0xCA,            // rsqrtss      XMM1,XMM2
 1430         0xF3,0x0F,0x52,0x4D,0xD4,       // rsqrtss      XMM1,-02Ch[RBP]
 1431 0x66,   0x0F,0xC6,0xCA,0x03,            // shufpd       XMM1,XMM2,3
 1432 0x66,   0x0F,0xC6,0x4D,0xE0,0x04,       // shufpd       XMM1,-020h[RBP],4
 1433         0x0F,0xC6,0xCA,0x03,            // shufps       XMM1,XMM2,3
 1434         0x0F,0xC6,0x4D,0xE0,0x04,       // shufps       XMM1,-020h[RBP],4
 1435 0x66,   0x0F,0x2E,0xE6,                 // ucimisd      XMM4,XMM6
 1436 0x66,   0x0F,0x2E,0x6D,0xD8,            // ucimisd      XMM5,-028h[RBP]
 1437         0x0F,0x2E,0xF7,                 // ucomiss      XMM6,XMM7
 1438         0x0F,0x2E,0x7D,0xD4,            // ucomiss      XMM7,-02Ch[RBP]
 1439 0x66,   0x0F,0x15,0xE6,                 // uppckhpd     XMM4,XMM6
 1440 0x66,   0x0F,0x15,0x6D,0xE0,            // uppckhpd     XMM5,-020h[RBP]
 1441         0x0F,0x15,0xE6,                 // unpckhps     XMM4,XMM6
 1442         0x0F,0x15,0x6D,0xE0,            // unpckhps     XMM5,-020h[RBP]
 1443 0x66,   0x0F,0x14,0xE6,                 // uppcklpd     XMM4,XMM6
 1444 0x66,   0x0F,0x14,0x6D,0xE0,            // uppcklpd     XMM5,-020h[RBP]
 1445         0x0F,0x14,0xE6,                 // unpcklps     XMM4,XMM6
 1446         0x0F,0x14,0x6D,0xE0,            // unpcklps     XMM5,-020h[RBP]
 1447 0x66,   0x0F,0x57,0xCA,                 // xorpd        XMM1,XMM2
 1448 0x66,   0x0F,0x57,0x4D,0xE0,            // xorpd        XMM1,-020h[RBP]
 1449         0x0F,0x57,0xCA,                 // xorps        XMM1,XMM2
 1450         0x0F,0x57,0x4D,0xE0,            // xorps        XMM1,-020h[RBP]
 1451     ];
 1452     int i;
 1453 
 1454     asm
 1455     {
 1456         call    L1                      ;
 1457 
 1458         movmskpd ESI,XMM3               ;
 1459         movmskps ESI,XMM3               ;
 1460 
 1461         movntdq m128[RBP],XMM2          ;
 1462         movnti  m32[RBP],ECX            ;
 1463         movntpd m128[RBP],XMM3          ;
 1464         movntps m128[RBP],XMM4          ;
 1465         movntq  m64[RBP],MM5            ;
 1466 
 1467         movq    MM1,MM2                 ;
 1468         movq    MM2,m64[RBP]            ;
 1469         movq    m64[RBP],MM3            ;
 1470         movq    XMM1,XMM2               ;
 1471         movq    XMM2,m64[RBP]           ;
 1472         movq    m64[RBP],XMM3           ;
 1473 
 1474         movq2dq XMM3,MM2                ;
 1475 
 1476         movsd                           ;
 1477         movsd   XMM1,XMM2               ;
 1478         movsd   XMM3,m64[RBP]           ;
 1479         movsd   m64[RBP],XMM4           ;
 1480 
 1481         movss   XMM1,XMM2               ;
 1482         movss   XMM3,m32[RBP]           ;
 1483         movss   m32[RBP],XMM4           ;
 1484 
 1485         movupd  XMM1,XMM2               ;
 1486         movupd  XMM3,m128[RBP]          ;
 1487         movupd  m128[RBP],XMM4          ;
 1488 
 1489         movups  XMM1,XMM2               ;
 1490         movups  XMM3,m128[RBP]          ;
 1491         movups  m128[RBP],XMM4          ;
 1492 
 1493         orpd    XMM1,XMM2               ;
 1494         orpd    XMM3,m128[RBP]          ;
 1495         orps    XMM1,XMM2               ;
 1496         orps    XMM3,m128[RBP]          ;
 1497 
 1498         packsswb MM1,MM2                ;
 1499         packsswb MM3,m64[RBP]           ;
 1500         packsswb XMM1,XMM2              ;
 1501         packsswb XMM3,m128[RBP]         ;
 1502 
 1503         packssdw MM1,MM2                ;
 1504         packssdw MM3,m64[RBP]           ;
 1505         packssdw XMM1,XMM2              ;
 1506         packssdw XMM3,m128[RBP]         ;
 1507 
 1508         packuswb MM1,MM2                ;
 1509         packuswb MM3,m64[RBP]           ;
 1510         packuswb XMM1,XMM2              ;
 1511         packuswb XMM3,m128[RBP]         ;
 1512 
 1513         paddb   MM1,MM2                 ;
 1514         paddb   MM3,m64[RBP]            ;
 1515         paddb   XMM1,XMM2               ;
 1516         paddb   XMM3,m128[RBP]          ;
 1517 
 1518         paddw   MM1,MM2                 ;
 1519         paddw   MM3,m64[RBP]            ;
 1520         paddw   XMM1,XMM2               ;
 1521         paddw   XMM3,m128[RBP]          ;
 1522 
 1523         paddd   MM1,MM2                 ;
 1524         paddd   MM3,m64[RBP]            ;
 1525         paddd   XMM1,XMM2               ;
 1526         paddd   XMM3,m128[RBP]          ;
 1527 
 1528         paddq   MM1,MM2                 ;
 1529         paddq   MM3,m64[RBP]            ;
 1530         paddq   XMM1,XMM2               ;
 1531         paddq   XMM3,m128[RBP]          ;
 1532 
 1533         paddsb  MM1,MM2                 ;
 1534         paddsb  MM3,m64[RBP]            ;
 1535         paddsb  XMM1,XMM2               ;
 1536         paddsb  XMM3,m128[RBP]          ;
 1537 
 1538         paddsw  MM1,MM2                 ;
 1539         paddsw  MM3,m64[RBP]            ;
 1540         paddsw  XMM1,XMM2               ;
 1541         paddsw  XMM3,m128[RBP]          ;
 1542 
 1543         paddusb MM1,MM2                 ;
 1544         paddusb MM3,m64[RBP]            ;
 1545         paddusb XMM1,XMM2               ;
 1546         paddusb XMM3,m128[RBP]          ;
 1547 
 1548         paddusw MM1,MM2                 ;
 1549         paddusw MM3,m64[RBP]            ;
 1550         paddusw XMM1,XMM2               ;
 1551         paddusw XMM3,m128[RBP]          ;
 1552 
 1553         pand    MM1,MM2                 ;
 1554         pand    MM3,m64[RBP]            ;
 1555         pand    XMM1,XMM2               ;
 1556         pand    XMM3,m128[RBP]          ;
 1557 
 1558         pandn   MM1,MM2                 ;
 1559         pandn   MM3,m64[RBP]            ;
 1560         pandn   XMM1,XMM2               ;
 1561         pandn   XMM3,m128[RBP]          ;
 1562 
 1563         pavgb   MM1,MM2                 ;
 1564         pavgb   MM3,m64[RBP]            ;
 1565         pavgb   XMM1,XMM2               ;
 1566         pavgb   XMM3,m128[RBP]          ;
 1567 
 1568         pavgw   MM1,MM2                 ;
 1569         pavgw   MM3,m64[RBP]            ;
 1570         pavgw   XMM1,XMM2               ;
 1571         pavgw   XMM3,m128[RBP]          ;
 1572 
 1573         pcmpeqb MM1,MM2                 ;
 1574         pcmpeqb MM3,m64[RBP]            ;
 1575         pcmpeqb XMM1,XMM2               ;
 1576         pcmpeqb XMM3,m128[RBP]          ;
 1577 
 1578         pcmpeqw MM1,MM2                 ;
 1579         pcmpeqw MM3,m64[RBP]            ;
 1580         pcmpeqw XMM1,XMM2               ;
 1581         pcmpeqw XMM3,m128[RBP]          ;
 1582 
 1583         pcmpeqd MM1,MM2                 ;
 1584         pcmpeqd MM3,m64[RBP]            ;
 1585         pcmpeqd XMM1,XMM2               ;
 1586         pcmpeqd XMM3,m128[RBP]          ;
 1587 
 1588         pcmpgtb MM1,MM2                 ;
 1589         pcmpgtb MM3,m64[RBP]            ;
 1590         pcmpgtb XMM1,XMM2               ;
 1591         pcmpgtb XMM3,m128[RBP]          ;
 1592 
 1593         pcmpgtw MM1,MM2                 ;
 1594         pcmpgtw MM3,m64[RBP]            ;
 1595         pcmpgtw XMM1,XMM2               ;
 1596         pcmpgtw XMM3,m128[RBP]          ;
 1597 
 1598         pcmpgtd MM1,MM2                 ;
 1599         pcmpgtd MM3,m64[RBP]            ;
 1600         pcmpgtd XMM1,XMM2               ;
 1601         pcmpgtd XMM3,m128[RBP]          ;
 1602 
 1603         pextrw  EDX,MM6,7               ;
 1604         pextrw  EDX,XMM6,7              ;
 1605 
 1606         pinsrw  MM6,EDX,7               ;
 1607         pinsrw  MM6,m16[RBP],7          ;
 1608         pinsrw  XMM6,EDX,7              ;
 1609         pinsrw  XMM6,m16[RBP],7         ;
 1610 
 1611         pmaddwd MM1,MM2                 ;
 1612         pmaddwd MM3,m64[RBP]            ;
 1613         pmaddwd XMM1,XMM2               ;
 1614         pmaddwd XMM3,m128[RBP]          ;
 1615 
 1616         pmaxsw  MM1,MM2                 ;
 1617         pmaxsw  MM3,m64[RBP]            ;
 1618         pmaxsw  XMM1,XMM2               ;
 1619         pmaxsw  XMM3,m128[RBP]          ;
 1620 
 1621         pmaxub  MM1,MM2                 ;
 1622         pmaxub  MM3,m64[RBP]            ;
 1623         pmaxub  XMM1,XMM2               ;
 1624         pmaxub  XMM3,m128[RBP]          ;
 1625 
 1626         pminsw  MM1,MM2                 ;
 1627         pminsw  MM3,m64[RBP]            ;
 1628         pminsw  XMM1,XMM2               ;
 1629         pminsw  XMM3,m128[RBP]          ;
 1630 
 1631         pminub  MM1,MM2                 ;
 1632         pminub  MM3,m64[RBP]            ;
 1633         pminub  XMM1,XMM2               ;
 1634         pminub  XMM3,m128[RBP]          ;
 1635 
 1636         pmovmskb ECX,MM0                ;
 1637         pmovmskb ECX,XMM6               ;
 1638 
 1639         pmulhuw MM1,MM2                 ;
 1640         pmulhuw MM3,m64[RBP]            ;
 1641         pmulhuw XMM1,XMM2               ;
 1642         pmulhuw XMM3,m128[RBP]          ;
 1643 
 1644         pmulhw  MM1,MM2                 ;
 1645         pmulhw  MM3,m64[RBP]            ;
 1646         pmulhw  XMM1,XMM2               ;
 1647         pmulhw  XMM3,m128[RBP]          ;
 1648 
 1649         pmullw  MM1,MM2                 ;
 1650         pmullw  MM3,m64[RBP]            ;
 1651         pmullw  XMM1,XMM2               ;
 1652         pmullw  XMM3,m128[RBP]          ;
 1653 
 1654         pmuludq MM1,MM2                 ;
 1655         pmuludq MM3,m64[RBP]            ;
 1656         pmuludq XMM1,XMM2               ;
 1657         pmuludq XMM3,m128[RBP]          ;
 1658 
 1659         por     MM1,MM2                 ;
 1660         por     MM3,m64[RBP]            ;
 1661         por     XMM1,XMM2               ;
 1662         por     XMM3,m128[RBP]          ;
 1663 
 1664         prefetcht0  m8[RBP]             ;
 1665         prefetcht1  m8[RBP]             ;
 1666         prefetcht2  m8[RBP]             ;
 1667         prefetchnta m8[RBP]             ;
 1668         prefetchw   m8[EBP]             ;
 1669         prefetchwt1 m8[EBP]             ;
 1670 
 1671         psadbw  MM1,MM2                 ;
 1672         psadbw  MM3,m64[RBP]            ;
 1673         psadbw  XMM1,XMM2               ;
 1674         psadbw  XMM3,m128[RBP]          ;
 1675 
 1676         pshufd  XMM1,XMM2,3             ;
 1677         pshufd  XMM3,m128[RBP],3        ;
 1678         pshufhw XMM1,XMM2,3             ;
 1679         pshufhw XMM3,m128[RBP],3        ;
 1680         pshuflw XMM1,XMM2,3             ;
 1681         pshuflw XMM3,m128[RBP],3        ;
 1682         pshufw  MM1,MM2,3               ;
 1683         pshufw  MM3,m64[RBP],3          ;
 1684 
 1685         pslldq  XMM1,0x18               ;
 1686 
 1687         psllw   MM1,MM2                 ;
 1688         psllw   MM1,m64[RBP]            ;
 1689         psllw   XMM1,XMM2               ;
 1690         psllw   XMM1,m128[RBP]          ;
 1691         psllw   MM1,0x15                ;
 1692         psllw   XMM1,0x15               ;
 1693 
 1694         pslld   MM1,MM2                 ;
 1695         pslld   MM1,m64[RBP]            ;
 1696         pslld   XMM1,XMM2               ;
 1697         pslld   XMM1,m128[RBP]          ;
 1698         pslld   MM1,0x15                ;
 1699         pslld   XMM1,0x15               ;
 1700 
 1701         psllq   MM1,MM2                 ;
 1702         psllq   MM1,m64[RBP]            ;
 1703         psllq   XMM1,XMM2               ;
 1704         psllq   XMM1,m128[RBP]          ;
 1705         psllq   MM1,0x15                ;
 1706         psllq   XMM1,0x15               ;
 1707 
 1708         psraw   MM1,MM2                 ;
 1709         psraw   MM1,m64[RBP]            ;
 1710         psraw   XMM1,XMM2               ;
 1711         psraw   XMM1,m128[RBP]          ;
 1712         psraw   MM1,0x15                ;
 1713         psraw   XMM1,0x15               ;
 1714 
 1715         psrad   MM1,MM2                 ;
 1716         psrad   MM1,m64[RBP]            ;
 1717         psrad   XMM1,XMM2               ;
 1718         psrad   XMM1,m128[RBP]          ;
 1719         psrad   MM1,0x15                ;
 1720         psrad   XMM1,0x15               ;
 1721 
 1722         psrldq  XMM1,0x18               ;
 1723 
 1724         psrlw   MM1,MM2                 ;
 1725         psrlw   MM1,m64[RBP]            ;
 1726         psrlw   XMM1,XMM2               ;
 1727         psrlw   XMM1,m128[RBP]          ;
 1728         psrlw   MM1,0x15                ;
 1729         psrlw   XMM1,0x15               ;
 1730 
 1731         psrld   MM1,MM2                 ;
 1732         psrld   MM1,m64[RBP]            ;
 1733         psrld   XMM1,XMM2               ;
 1734         psrld   XMM1,m128[RBP]          ;
 1735         psrld   MM1,0x15                ;
 1736         psrld   XMM1,0x15               ;
 1737 
 1738         psrlq   MM1,MM2                 ;
 1739         psrlq   MM1,m64[RBP]            ;
 1740         psrlq   XMM1,XMM2               ;
 1741         psrlq   XMM1,m128[RBP]          ;
 1742         psrlq   MM1,0x15                ;
 1743         psrlq   XMM1,0x15               ;
 1744 
 1745         psubb   MM1,MM2                 ;
 1746         psubb   MM1,m64[RBP]            ;
 1747         psubb   XMM1,XMM2               ;
 1748         psubb   XMM1,m128[RBP]          ;
 1749 
 1750         psubw   MM1,MM2                 ;
 1751         psubw   MM1,m64[RBP]            ;
 1752         psubw   XMM1,XMM2               ;
 1753         psubw   XMM1,m128[RBP]          ;
 1754 
 1755         psubd   MM1,MM2                 ;
 1756         psubd   MM1,m64[RBP]            ;
 1757         psubd   XMM1,XMM2               ;
 1758         psubd   XMM1,m128[RBP]          ;
 1759 
 1760         psubq   MM1,MM2                 ;
 1761         psubq   MM1,m64[RBP]            ;
 1762         psubq   XMM1,XMM2               ;
 1763         psubq   XMM1,m128[RBP]          ;
 1764 
 1765         psubsb  MM1,MM2                 ;
 1766         psubsb  MM1,m64[RBP]            ;
 1767         psubsb  XMM1,XMM2               ;
 1768         psubsb  XMM1,m128[RBP]          ;
 1769 
 1770         psubsw  MM1,MM2                 ;
 1771         psubsw  MM1,m64[RBP]            ;
 1772         psubsw  XMM1,XMM2               ;
 1773         psubsw  XMM1,m128[RBP]          ;
 1774 
 1775         psubusb MM1,MM2                 ;
 1776         psubusb MM1,m64[RBP]            ;
 1777         psubusb XMM1,XMM2               ;
 1778         psubusb XMM1,m128[RBP]          ;
 1779 
 1780         psubusw MM1,MM2                 ;
 1781         psubusw MM1,m64[RBP]            ;
 1782         psubusw XMM1,XMM2               ;
 1783         psubusw XMM1,m128[RBP]          ;
 1784 
 1785         punpckhbw MM1,MM2               ;
 1786         punpckhbw MM1,m64[RBP]          ;
 1787         punpckhbw XMM1,XMM2             ;
 1788         punpckhbw XMM1,m128[RBP]        ;
 1789 
 1790         punpckhwd MM1,MM2               ;
 1791         punpckhwd MM1,m64[RBP]          ;
 1792         punpckhwd XMM1,XMM2             ;
 1793         punpckhwd XMM1,m128[RBP]        ;
 1794 
 1795         punpckhdq MM1,MM2               ;
 1796         punpckhdq MM1,m64[RBP]          ;
 1797         punpckhdq XMM1,XMM2             ;
 1798         punpckhdq XMM1,m128[RBP]        ;
 1799 
 1800         punpckhqdq XMM1,XMM2            ;
 1801         punpckhqdq XMM1,m128[RBP]       ;
 1802 
 1803         punpcklbw MM1,MM2               ;
 1804         punpcklbw MM1,m64[RBP]          ;
 1805         punpcklbw XMM1,XMM2             ;
 1806         punpcklbw XMM1,m128[RBP]        ;
 1807 
 1808         punpcklwd MM1,MM2               ;
 1809         punpcklwd MM1,m64[RBP]          ;
 1810         punpcklwd XMM1,XMM2             ;
 1811         punpcklwd XMM1,m128[RBP]        ;
 1812 
 1813         punpckldq MM1,MM2               ;
 1814         punpckldq MM1,m64[RBP]          ;
 1815         punpckldq XMM1,XMM2             ;
 1816         punpckldq XMM1,m128[RBP]        ;
 1817 
 1818         punpcklqdq XMM1,XMM2            ;
 1819         punpcklqdq XMM1,m128[RBP]       ;
 1820 
 1821         pxor    MM1,MM2                 ;
 1822         pxor    MM1,m64[RBP]            ;
 1823         pxor    XMM1,XMM2               ;
 1824         pxor    XMM1,m128[RBP]          ;
 1825 
 1826         rcpps   XMM1,XMM2               ;
 1827         rcpps   XMM1,m128[RBP]          ;
 1828         rcpss   XMM1,XMM2               ;
 1829         rcpss   XMM1,m32[RBP]           ;
 1830 
 1831         rsqrtps XMM1,XMM2               ;
 1832         rsqrtps XMM1,m128[RBP]          ;
 1833         rsqrtss XMM1,XMM2               ;
 1834         rsqrtss XMM1,m32[RBP]           ;
 1835 
 1836         shufpd  XMM1,XMM2,3             ;
 1837         shufpd  XMM1,m128[RBP],4        ;
 1838         shufps  XMM1,XMM2,3             ;
 1839         shufps  XMM1,m128[RBP],4        ;
 1840 
 1841         ucomisd XMM4,XMM6               ;
 1842         ucomisd XMM5,m64[RBP]           ;
 1843         ucomiss XMM6,XMM7               ;
 1844         ucomiss XMM7,m32[RBP]           ;
 1845 
 1846         unpckhpd XMM4,XMM6              ;
 1847         unpckhpd XMM5,m128[RBP]         ;
 1848         unpckhps XMM4,XMM6              ;
 1849         unpckhps XMM5,m128[RBP]         ;
 1850         unpcklpd XMM4,XMM6              ;
 1851         unpcklpd XMM5,m128[RBP]         ;
 1852         unpcklps XMM4,XMM6              ;
 1853         unpcklps XMM5,m128[RBP]         ;
 1854 
 1855         xorpd   XMM1,XMM2               ;
 1856         xorpd   XMM1,m128[RBP]          ;
 1857         xorps   XMM1,XMM2               ;
 1858         xorps   XMM1,m128[RBP]          ;
 1859 L1:                                     ;
 1860         pop     RBX                     ;
 1861         mov     p[RBP],RBX              ;
 1862     }
 1863     for (i = 0; i < data.length; i++)
 1864     {
 1865         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], data[i]);
 1866         assert(p[i] == data[i]);
 1867     }
 1868 }
 1869 
 1870 /****************************************************/
 1871 
 1872 void test15()
 1873 {
 1874     int m32;
 1875     long m64;
 1876     M128 m128;
 1877     ubyte *p;
 1878     static ubyte[] data =
 1879     [
 1880         0x0F,0x0F,0xDC,0xBF,            // pavgusb      MM3,MM4
 1881         0x0F,0x0F,0x5D,0xD8,0xBF,       // pavgusb      MM3,-028h[RBP]
 1882         0x0F,0x0F,0xDC,0x1D,            // pf2id        MM3,MM4
 1883         0x0F,0x0F,0x5D,0xD8,0x1D,       // pf2id        MM3,-028h[RBP]
 1884         0x0F,0x0F,0xDC,0xAE,            // pfacc        MM3,MM4
 1885         0x0F,0x0F,0x5D,0xD8,0xAE,       // pfacc        MM3,-028h[RBP]
 1886         0x0F,0x0F,0xDC,0x9E,            // pfadd        MM3,MM4
 1887         0x0F,0x0F,0x5D,0xD8,0x9E,       // pfadd        MM3,-028h[RBP]
 1888         0x0F,0x0F,0xDC,0xB0,            // pfcmpeq      MM3,MM4
 1889         0x0F,0x0F,0x5D,0xD8,0xB0,       // pfcmpeq      MM3,-028h[RBP]
 1890         0x0F,0x0F,0xDC,0x90,            // pfcmpge      MM3,MM4
 1891         0x0F,0x0F,0x5D,0xD8,0x90,       // pfcmpge      MM3,-028h[RBP]
 1892         0x0F,0x0F,0xDC,0xA0,            // pfcmpgt      MM3,MM4
 1893         0x0F,0x0F,0x5D,0xD8,0xA0,       // pfcmpgt      MM3,-028h[RBP]
 1894         0x0F,0x0F,0xDC,0xA4,            // pfmax        MM3,MM4
 1895         0x0F,0x0F,0x5D,0xD8,0x94,       // pfmin        MM3,-028h[RBP]
 1896         0x0F,0x0F,0xDC,0xB4,            // pfmul        MM3,MM4
 1897         0x0F,0x0F,0x5D,0xD8,0xB4,       // pfmul        MM3,-028h[RBP]
 1898         0x0F,0x0F,0xDC,0x8A,            // pfnacc       MM3,MM4
 1899         0x0F,0x0F,0x5D,0xD8,0x8E,       // pfpnacc      MM3,-028h[RBP]
 1900         0x0F,0x0F,0xDC,0x96,            // pfrcp        MM3,MM4
 1901         0x0F,0x0F,0x5D,0xD8,0x96,       // pfrcp        MM3,-028h[RBP]
 1902         0x0F,0x0F,0xDC,0xA6,            // pfrcpit1     MM3,MM4
 1903         0x0F,0x0F,0x5D,0xD8,0xA6,       // pfrcpit1     MM3,-028h[RBP]
 1904         0x0F,0x0F,0xDC,0xB6,            // pfrcpit2     MM3,MM4
 1905         0x0F,0x0F,0x5D,0xD8,0xB6,       // pfrcpit2     MM3,-028h[RBP]
 1906         0x0F,0x0F,0xDC,0x97,            // pfrsqrt      MM3,MM4
 1907         0x0F,0x0F,0x5D,0xD8,0xA7,       // pfrsqit1     MM3,-028h[RBP]
 1908         0x0F,0x0F,0xDC,0x9A,            // pfsub        MM3,MM4
 1909         0x0F,0x0F,0x5D,0xD8,0x9A,       // pfsub        MM3,-028h[RBP]
 1910         0x0F,0x0F,0xDC,0xAA,            // pfsubr       MM3,MM4
 1911         0x0F,0x0F,0x5D,0xD8,0xAA,       // pfsubr       MM3,-028h[RBP]
 1912         0x0F,0x0F,0xDC,0x0D,            // pi2fd        MM3,MM4
 1913         0x0F,0x0F,0x5D,0xD8,0x0D,       // pi2fd        MM3,-028h[RBP]
 1914         0x0F,0x0F,0xDC,0xB7,            // pmulhrw      MM3,MM4
 1915         0x0F,0x0F,0x5D,0xD8,0xB7,       // pmulhrw      MM3,-028h[RBP]
 1916         0x0F,0x0F,0xDC,0xBB,            // pswapd       MM3,MM4
 1917         0x0F,0x0F,0x5D,0xD8,0xBB,       // pswapd       MM3,-028h[RBP]
 1918     ];
 1919     int i;
 1920 
 1921     asm
 1922     {
 1923         call    L1                      ;
 1924 
 1925         pavgusb MM3,MM4                 ;
 1926         pavgusb MM3,m64[RBP]            ;
 1927 
 1928         pf2id   MM3,MM4                 ;
 1929         pf2id   MM3,m64[RBP]            ;
 1930 
 1931         pfacc   MM3,MM4                 ;
 1932         pfacc   MM3,m64[RBP]            ;
 1933 
 1934         pfadd   MM3,MM4                 ;
 1935         pfadd   MM3,m64[RBP]            ;
 1936 
 1937         pfcmpeq MM3,MM4                 ;
 1938         pfcmpeq MM3,m64[RBP]            ;
 1939 
 1940         pfcmpge MM3,MM4                 ;
 1941         pfcmpge MM3,m64[RBP]            ;
 1942 
 1943         pfcmpgt MM3,MM4                 ;
 1944         pfcmpgt MM3,m64[RBP]            ;
 1945 
 1946         pfmax   MM3,MM4                 ;
 1947         pfmin   MM3,m64[RBP]            ;
 1948 
 1949         pfmul   MM3,MM4                 ;
 1950         pfmul   MM3,m64[RBP]            ;
 1951 
 1952         pfnacc  MM3,MM4                 ;
 1953         pfpnacc MM3,m64[RBP]            ;
 1954 
 1955         pfrcp   MM3,MM4                 ;
 1956         pfrcp   MM3,m64[RBP]            ;
 1957 
 1958         pfrcpit1 MM3,MM4                ;
 1959         pfrcpit1 MM3,m64[RBP]           ;
 1960 
 1961         pfrcpit2 MM3,MM4                ;
 1962         pfrcpit2 MM3,m64[RBP]           ;
 1963 
 1964         pfrsqrt  MM3,MM4                ;
 1965         pfrsqit1 MM3,m64[RBP]           ;
 1966 
 1967         pfsub   MM3,MM4                 ;
 1968         pfsub   MM3,m64[RBP]            ;
 1969 
 1970         pfsubr  MM3,MM4                 ;
 1971         pfsubr  MM3,m64[RBP]            ;
 1972 
 1973         pi2fd   MM3,MM4                 ;
 1974         pi2fd   MM3,m64[RBP]            ;
 1975 
 1976         pmulhrw MM3,MM4                 ;
 1977         pmulhrw MM3,m64[RBP]            ;
 1978 
 1979         pswapd  MM3,MM4                 ;
 1980         pswapd  MM3,m64[RBP]            ;
 1981 L1:                                     ;
 1982         pop     RBX                     ;
 1983         mov     p[RBP],RBX              ;
 1984     }
 1985     for (i = 0; i < data.length; i++)
 1986     {
 1987         assert(p[i] == data[i]);
 1988     }
 1989 }
 1990 
 1991 /****************************************************/
 1992 
 1993 struct S17 { char[6] x; }
 1994 __gshared S17 xx17;
 1995 
 1996 void test17()
 1997 {
 1998     ubyte *p;
 1999     static ubyte[] data =
 2000     [
 2001         0x0F, 0x01, 0x10,       // lgdt [EAX]
 2002         0x0F, 0x01, 0x18,       // lidt [EAX]
 2003         0x0F, 0x01, 0x00,       // sgdt [EAX]
 2004         0x0F, 0x01, 0x08,       // sidt [EAX]
 2005     ];
 2006     int i;
 2007 
 2008     asm
 2009     {
 2010         call    L1                      ;
 2011 
 2012         lgdt [RAX]                      ;
 2013         lidt [RAX]                      ;
 2014         sgdt [RAX]                      ;
 2015         sidt [RAX]                      ;
 2016 
 2017         lgdt xx17                       ;
 2018         lidt xx17                       ;
 2019         sgdt xx17                       ;
 2020         sidt xx17                       ;
 2021 
 2022 L1:
 2023         pop     RBX                     ;
 2024         mov     p[RBP],RBX              ;
 2025     }
 2026     for (i = 0; i < data.length; i++)
 2027     {
 2028         assert(p[i] == data[i]);
 2029     }
 2030 }
 2031 
 2032 /****************************************************/
 2033 
 2034 void test18()
 2035 {
 2036     ubyte *p;
 2037     static ubyte[] data =
 2038     [
 2039         0xDB, 0xF1,             // fcomi ST,ST(1)
 2040         0xDB, 0xF0,             // fcomi ST,ST(0)
 2041         0xDB, 0xF2,             // fcomi ST,ST(2)
 2042 
 2043         0xDF, 0xF1,             // fcomip ST,ST(1)
 2044         0xDF, 0xF0,             // fcomip ST,ST(0)
 2045         0xDF, 0xF2,             // fcomip ST,ST(2)
 2046 
 2047         0xDB, 0xE9,             // fucomi ST,ST(1)
 2048         0xDB, 0xE8,             // fucomi ST,ST(0)
 2049         0xDB, 0xEB,             // fucomi ST,ST(3)
 2050 
 2051         0xDF, 0xE9,             // fucomip ST,ST(1)
 2052         0xDF, 0xED,             // fucomip ST,ST(5)
 2053         0xDF, 0xEC,             // fucomip ST,ST(4)
 2054     ];
 2055     int i;
 2056 
 2057     asm
 2058     {
 2059         call    L1                      ;
 2060 
 2061         fcomi                           ;
 2062         fcomi   ST(0)                   ;
 2063         fcomi   ST,ST(2)                ;
 2064 
 2065         fcomip                          ;
 2066         fcomip  ST(0)                   ;
 2067         fcomip  ST,ST(2)                ;
 2068 
 2069         fucomi                          ;
 2070         fucomi  ST(0)                   ;
 2071         fucomi  ST,ST(3)                ;
 2072 
 2073         fucomip                         ;
 2074         fucomip ST(5)                   ;
 2075         fucomip ST,ST(4)                ;
 2076 
 2077 L1:
 2078         pop     RBX                     ;
 2079         mov     p[RBP],RBX              ;
 2080     }
 2081     for (i = 0; i < data.length; i++)
 2082     {
 2083         assert(p[i] == data[i]);
 2084     }
 2085 }
 2086 
 2087 /****************************************************/
 2088 
 2089 extern (C) {
 2090    void foo19() { }
 2091 }
 2092 
 2093 void test19()
 2094 {   void function() fp;
 2095     ulong  x;
 2096     ulong *p;
 2097 
 2098     asm
 2099     {
 2100         lea     RAX, qword ptr [foo19];
 2101         mov     fp, RAX;
 2102         mov     x, RAX;
 2103         mov     p, RAX;
 2104         call    fp;
 2105     }
 2106     (*fp)();
 2107 }
 2108 
 2109 /****************************************************/
 2110 /+
 2111 void test20()
 2112 {
 2113     ubyte *p;
 2114     static ubyte[] data =
 2115     [
 2116         0x9B, 0xDB, 0xE0,       // feni
 2117         0xDB, 0xE0,             // fneni
 2118 
 2119         0x9B, 0xDB, 0xE1,       // fdisi
 2120         0xDB, 0xE1,             // fndisi
 2121 
 2122         0x9B, 0xDB, 0xE2,       // fclex
 2123         0xDB, 0xE2,             // fnclex
 2124 
 2125         0x9B, 0xDB, 0xE3,       // finit
 2126         0xDB, 0xE3,             // fninit
 2127 
 2128         0xDB, 0xE4,             // fsetpm
 2129     ];
 2130     int i;
 2131 
 2132     asm
 2133     {
 2134         call    L1                      ;
 2135 
 2136         feni                            ;
 2137         fneni                           ;
 2138         fdisi                           ;
 2139         fndisi                          ;
 2140         finit                           ;
 2141         fninit                          ;
 2142         fclex                           ;
 2143         fnclex                          ;
 2144         finit                           ;
 2145         fninit                          ;
 2146         fsetpm                          ;
 2147 L1:
 2148         pop     RBX                     ;
 2149         mov     p[RBP],RBX              ;
 2150     }
 2151     for (i = 0; i < data.length; i++)
 2152     {
 2153         assert(p[i] == data[i]);
 2154     }
 2155 }
 2156 +/
 2157 /****************************************************/
 2158 
 2159 void test21()
 2160 {
 2161     ubyte *p;
 2162     static ubyte[] data =
 2163     [
 2164         0xE4, 0x06,             // in   AL,6
 2165         0x66, 0xE5, 0x07,       // in   AX,7
 2166         0xE5, 0x08,             // in   EAX,8
 2167         0xEC,                   // in   AL,DX
 2168         0x66, 0xED,             // in   AX,DX
 2169         0xED,                   // in   EAX,DX
 2170         0xE6, 0x06,             // out  6,AL
 2171         0x66, 0xE7, 0x07,       // out  7,AX
 2172         0xE7, 0x08,             // out  8,EAX
 2173         0xEE,                   // out  DX,AL
 2174         0x66, 0xEF,             // out  DX,AX
 2175         0xEF,                   // out  DX,EAX
 2176     ];
 2177     int i;
 2178 
 2179     asm
 2180     {
 2181         call    L1      ;
 2182 
 2183         in AL,6         ;
 2184         in AX,7         ;
 2185         in EAX,8        ;
 2186         in AL,DX        ;
 2187         in AX,DX        ;
 2188         in EAX,DX       ;
 2189 
 2190         out 6,AL        ;
 2191         out 7,AX        ;
 2192         out 8,EAX       ;
 2193         out DX,AL       ;
 2194         out DX,AX       ;
 2195         out DX,EAX      ;
 2196 L1:
 2197         pop     RBX             ;
 2198         mov     p[RBP],RBX      ;
 2199     }
 2200     for (i = 0; i < data.length; i++)
 2201     {
 2202         assert(p[i] == data[i]);
 2203     }
 2204 }
 2205 
 2206 /****************************************************/
 2207 
 2208 void test22()
 2209 {
 2210     ubyte *p;
 2211     static ubyte[] data =
 2212     [
 2213         0x0F, 0xC7, 0x4D, 0xE0, // cmpxchg8b
 2214 0x48,   0x0F, 0xC7, 0x4D, 0xF0, // cmpxchg16b
 2215 0x40,   0x0F, 0xB0, 0x3A        // cmpxchg [RDX],DIL
 2216     ];
 2217     int i;
 2218     M64  m64;
 2219     M128 m128;
 2220 
 2221     asm
 2222     {
 2223         call    L1                      ;
 2224 
 2225         cmpxchg8b  m64                  ;
 2226         cmpxchg16b m128                 ;
 2227         cmpxchg [RDX],DIL               ;
 2228 L1:
 2229         pop     RBX                     ;
 2230         mov     p[RBP],RBX              ;
 2231     }
 2232     for (i = 0; i < data.length; i++)
 2233     {
 2234         assert(p[i] == data[i]);
 2235     }
 2236 }
 2237 
 2238 /****************************************************/
 2239 
 2240 void test23()
 2241 {
 2242     short m16;
 2243     int m32;
 2244     long m64;
 2245     M128 m128;
 2246     ubyte *p;
 2247     static ubyte[] data =
 2248     [
 2249         0xD9, 0xC9,             // fxch         ST(1), ST(0)
 2250 
 2251         0xDF, 0x5D, 0xD0,       // fistp        word ptr -030h[RBP]
 2252         0xDB, 0x5D, 0xD4,       // fistp        dword ptr -02Ch[RBP]
 2253         0xDF, 0x7D, 0xD8,       // fistp        long64 ptr -028h[RBP]
 2254         0xDF, 0x4D, 0xD0,       // fisttp       short ptr -030h[RBP]
 2255         0xDB, 0x4D, 0xD4,       // fisttp       word ptr -02Ch[RBP]
 2256         0xDD, 0x4D, 0xD8,       // fisttp       long64 ptr -028h[RBP]
 2257         0x0F, 0x01, 0xC8,       // monitor
 2258         0x0F, 0x01, 0xC9,       // mwait
 2259         0x0F, 0x01, 0xD0,       // xgetbv
 2260 
 2261         0x66, 0x0F, 0xD0, 0xCA,         // addsubpd     XMM1,XMM2
 2262         0x66, 0x0F, 0xD0, 0x4D, 0xE0,   // addsubpd     XMM1,-020h[RBP]
 2263         0xF2, 0x0F, 0xD0, 0xCA,         // addsubps     XMM1,XMM2
 2264         0xF2, 0x0F, 0xD0, 0x4D, 0xE0,   // addsubps     XMM1,-020h[RBP]
 2265         0x66, 0x0F, 0x7C, 0xCA,         // haddpd       XMM1,XMM2
 2266         0x66, 0x0F, 0x7C, 0x4D, 0xE0,   // haddpd       XMM1,-020h[RBP]
 2267         0xF2, 0x0F, 0x7C, 0xCA,         // haddps       XMM1,XMM2
 2268         0xF2, 0x0F, 0x7C, 0x4D, 0xE0,   // haddps       XMM1,-020h[RBP]
 2269         0x66, 0x0F, 0x7D, 0xCA,         // hsubpd       XMM1,XMM2
 2270         0x66, 0x0F, 0x7D, 0x4D, 0xE0,   // hsubpd       XMM1,-020h[RBP]
 2271         0xF2, 0x0F, 0x7D, 0xCA,         // hsubps       XMM1,XMM2
 2272         0xF2, 0x0F, 0x7D, 0x4D, 0xE0,   // hsubps       XMM1,-020h[RBP]
 2273         0xF2, 0x0F, 0xF0, 0x4D, 0xE0,   // lddqu        XMM1,-020h[RBP]
 2274         0xF2, 0x0F, 0x12, 0xCA,         // movddup      XMM1,XMM2
 2275         0xF2, 0x0F, 0x12, 0x4D, 0xD8,   // movddup      XMM1,-028h[RBP]
 2276         0xF3, 0x0F, 0x16, 0xCA,         // movshdup     XMM1,XMM2
 2277         0xF3, 0x0F, 0x16, 0x4D, 0xE0,   // movshdup     XMM1,-020h[RBP]
 2278         0xF3, 0x0F, 0x12, 0xCA,         // movsldup     XMM1,XMM2
 2279         0xF3, 0x0F, 0x12, 0x4D, 0xE0,   // movsldup     XMM1,-020h[RBP]
 2280     ];
 2281     int i;
 2282 
 2283     asm
 2284     {
 2285         call    L1                      ;
 2286 
 2287         fxch    ST(1), ST(0)            ;
 2288 
 2289         fistp   m16[RBP]                ;
 2290         fistp   m32[RBP]                ;
 2291         fistp   m64[RBP]                ;
 2292 
 2293         fisttp  m16[RBP]                ;
 2294         fisttp  m32[RBP]                ;
 2295         fisttp  m64[RBP]                ;
 2296 
 2297         monitor                         ;
 2298         mwait                           ;
 2299         xgetbv                          ;
 2300 
 2301         addsubpd        XMM1,XMM2       ;
 2302         addsubpd        XMM1,m128[RBP]  ;
 2303 
 2304         addsubps        XMM1,XMM2       ;
 2305         addsubps        XMM1,m128[RBP]  ;
 2306 
 2307         haddpd          XMM1,XMM2       ;
 2308         haddpd          XMM1,m128[RBP]  ;
 2309 
 2310         haddps          XMM1,XMM2       ;
 2311         haddps          XMM1,m128[RBP]  ;
 2312 
 2313         hsubpd          XMM1,XMM2       ;
 2314         hsubpd          XMM1,m128[RBP]  ;
 2315 
 2316         hsubps          XMM1,XMM2       ;
 2317         hsubps          XMM1,m128[RBP]  ;
 2318 
 2319         lddqu           XMM1,m128[RBP]  ;
 2320 
 2321         movddup         XMM1,XMM2       ;
 2322         movddup         XMM1,m64[RBP]   ;
 2323 
 2324         movshdup        XMM1,XMM2       ;
 2325         movshdup        XMM1,m128[RBP]  ;
 2326 
 2327         movsldup        XMM1,XMM2       ;
 2328         movsldup        XMM1,m128[RBP]  ;
 2329 
 2330 L1:                                     ;
 2331         pop     RBX                     ;
 2332         mov     p[RBP],RBX              ;
 2333     }
 2334     for (i = 0; i < data.length; i++)
 2335     {
 2336         assert(p[i] == data[i]);
 2337     }
 2338 }
 2339 
 2340 /****************************************************/
 2341 
 2342 void test24()
 2343 {
 2344         ushort i;
 2345 
 2346         asm
 2347         {
 2348             lea AX, i;
 2349             mov i, AX;
 2350         }
 2351         assert(cast(ushort)&i == i);
 2352 }
 2353 
 2354 /****************************************************/
 2355 
 2356 void test25()
 2357 {
 2358     short m16;
 2359     int m32;
 2360     long m64;
 2361     M128 m128;
 2362     ubyte *p;
 2363     static ubyte[] data =
 2364     [
 2365         0x66, 0x0F, 0x7E, 0xC1,         // movd ECX,XMM0
 2366         0x66, 0x0F, 0x7E, 0xC9,         // movd ECX,XMM1
 2367         0x66, 0x0F, 0x7E, 0xD1,         // movd ECX,XMM2
 2368         0x66, 0x0F, 0x7E, 0xD9,         // movd ECX,XMM3
 2369         0x66, 0x0F, 0x7E, 0xE1,         // movd ECX,XMM4
 2370         0x66, 0x0F, 0x7E, 0xE9,         // movd ECX,XMM5
 2371         0x66, 0x0F, 0x7E, 0xF1,         // movd ECX,XMM6
 2372         0x66, 0x0F, 0x7E, 0xF9,         // movd ECX,XMM7
 2373         0x0F, 0x7E, 0xC1,               // movd ECX,MM0
 2374         0x0F, 0x7E, 0xC9,               // movd ECX,MM1
 2375         0x0F, 0x7E, 0xD1,               // movd ECX,MM2
 2376         0x0F, 0x7E, 0xD9,               // movd ECX,MM3
 2377         0x0F, 0x7E, 0xE1,               // movd ECX,MM4
 2378         0x0F, 0x7E, 0xE9,               // movd ECX,MM5
 2379         0x0F, 0x7E, 0xF1,               // movd ECX,MM6
 2380         0x0F, 0x7E, 0xF9,               // movd ECX,MM7
 2381         0x66, 0x0F, 0x6E, 0xC1,         // movd XMM0,ECX
 2382         0x66, 0x0F, 0x6E, 0xC9,         // movd XMM1,ECX
 2383         0x66, 0x0F, 0x6E, 0xD1,         // movd XMM2,ECX
 2384         0x66, 0x0F, 0x6E, 0xD9,         // movd XMM3,ECX
 2385         0x66, 0x0F, 0x6E, 0xE1,         // movd XMM4,ECX
 2386         0x66, 0x0F, 0x6E, 0xE9,         // movd XMM5,ECX
 2387         0x66, 0x0F, 0x6E, 0xF1,         // movd XMM6,ECX
 2388         0x66, 0x0F, 0x6E, 0xF9,         // movd XMM7,ECX
 2389         0x0F, 0x6E, 0xC1,               // movd MM0,ECX
 2390         0x0F, 0x6E, 0xC9,               // movd MM1,ECX
 2391         0x0F, 0x6E, 0xD1,               // movd MM2,ECX
 2392         0x0F, 0x6E, 0xD9,               // movd MM3,ECX
 2393         0x0F, 0x6E, 0xE1,               // movd MM4,ECX
 2394         0x0F, 0x6E, 0xE9,               // movd MM5,ECX
 2395         0x0F, 0x6E, 0xF1,               // movd MM6,ECX
 2396         0x0F, 0x6E, 0xF9,               // movd MM7,ECX
 2397         0x66, 0x0F, 0x7E, 0xC8,         // movd EAX,XMM1
 2398         0x66, 0x0F, 0x7E, 0xCB,         // movd EBX,XMM1
 2399         0x66, 0x0F, 0x7E, 0xC9,         // movd ECX,XMM1
 2400         0x66, 0x0F, 0x7E, 0xCA,         // movd EDX,XMM1
 2401         0x66, 0x0F, 0x7E, 0xCE,         // movd ESI,XMM1
 2402         0x66, 0x0F, 0x7E, 0xCF,         // movd EDI,XMM1
 2403         0x66, 0x0F, 0x7E, 0xCD,         // movd EBP,XMM1
 2404         0x66, 0x0F, 0x7E, 0xCC,         // movd ESP,XMM1
 2405         0x0F, 0x7E, 0xC8,               // movd EAX,MM1
 2406         0x0F, 0x7E, 0xCB,               // movd EBX,MM1
 2407         0x0F, 0x7E, 0xC9,               // movd ECX,MM1
 2408         0x0F, 0x7E, 0xCA,               // movd EDX,MM1
 2409         0x0F, 0x7E, 0xCE,               // movd ESI,MM1
 2410         0x0F, 0x7E, 0xCF,               // movd EDI,MM1
 2411         0x0F, 0x7E, 0xCD,               // movd EBP,MM1
 2412         0x0F, 0x7E, 0xCC,               // movd ESP,MM1
 2413         0x66, 0x0F, 0x6E, 0xC8,         // movd XMM1,EAX
 2414         0x66, 0x0F, 0x6E, 0xCB,         // movd XMM1,EBX
 2415         0x66, 0x0F, 0x6E, 0xC9,         // movd XMM1,ECX
 2416         0x66, 0x0F, 0x6E, 0xCA,         // movd XMM1,EDX
 2417         0x66, 0x0F, 0x6E, 0xCE,         // movd XMM1,ESI
 2418         0x66, 0x0F, 0x6E, 0xCF,         // movd XMM1,EDI
 2419         0x66, 0x0F, 0x6E, 0xCD,         // movd XMM1,EBP
 2420         0x66, 0x0F, 0x6E, 0xCC,         // movd XMM1,ESP
 2421         0x0F, 0x6E, 0xC8,               // movd MM1,EAX
 2422         0x0F, 0x6E, 0xCB,               // movd MM1,EBX
 2423         0x0F, 0x6E, 0xC9,               // movd MM1,ECX
 2424         0x0F, 0x6E, 0xCA,               // movd MM1,EDX
 2425         0x0F, 0x6E, 0xCE,               // movd MM1,ESI
 2426         0x0F, 0x6E, 0xCF,               // movd MM1,EDI
 2427         0x0F, 0x6E, 0xCD,               // movd MM1,EBP
 2428         0x0F, 0x6E, 0xCC,               // movd MM1,ESP
 2429     ];
 2430     int i;
 2431 
 2432     asm
 2433     {
 2434         call    L1                      ;
 2435 
 2436         movd ECX, XMM0;
 2437         movd ECX, XMM1;
 2438         movd ECX, XMM2;
 2439         movd ECX, XMM3;
 2440         movd ECX, XMM4;
 2441         movd ECX, XMM5;
 2442         movd ECX, XMM6;
 2443         movd ECX, XMM7;
 2444 
 2445         movd ECX, MM0;
 2446         movd ECX, MM1;
 2447         movd ECX, MM2;
 2448         movd ECX, MM3;
 2449         movd ECX, MM4;
 2450         movd ECX, MM5;
 2451         movd ECX, MM6;
 2452         movd ECX, MM7;
 2453 
 2454         movd XMM0, ECX;
 2455         movd XMM1, ECX;
 2456         movd XMM2, ECX;
 2457         movd XMM3, ECX;
 2458         movd XMM4, ECX;
 2459         movd XMM5, ECX;
 2460         movd XMM6, ECX;
 2461         movd XMM7, ECX;
 2462 
 2463         movd MM0, ECX;
 2464         movd MM1, ECX;
 2465         movd MM2, ECX;
 2466         movd MM3, ECX;
 2467         movd MM4, ECX;
 2468         movd MM5, ECX;
 2469         movd MM6, ECX;
 2470         movd MM7, ECX;
 2471 
 2472         movd EAX, XMM1;
 2473         movd EBX, XMM1;
 2474         movd ECX, XMM1;
 2475         movd EDX, XMM1;
 2476         movd ESI, XMM1;
 2477         movd EDI, XMM1;
 2478         movd EBP, XMM1;
 2479         movd ESP, XMM1;
 2480 
 2481         movd EAX, MM1;
 2482         movd EBX, MM1;
 2483         movd ECX, MM1;
 2484         movd EDX, MM1;
 2485         movd ESI, MM1;
 2486         movd EDI, MM1;
 2487         movd EBP, MM1;
 2488         movd ESP, MM1;
 2489 
 2490         movd XMM1, EAX;
 2491         movd XMM1, EBX;
 2492         movd XMM1, ECX;
 2493         movd XMM1, EDX;
 2494         movd XMM1, ESI;
 2495         movd XMM1, EDI;
 2496         movd XMM1, EBP;
 2497         movd XMM1, ESP;
 2498 
 2499         movd MM1, EAX;
 2500         movd MM1, EBX;
 2501         movd MM1, ECX;
 2502         movd MM1, EDX;
 2503         movd MM1, ESI;
 2504         movd MM1, EDI;
 2505         movd MM1, EBP;
 2506         movd MM1, ESP;
 2507 
 2508 L1:                                     ;
 2509         pop     RBX                     ;
 2510         mov     p[RBP],RBX              ;
 2511     }
 2512     for (i = 0; i < data.length; i++)
 2513     {
 2514         assert(p[i] == data[i]);
 2515     }
 2516 }
 2517 
 2518 /****************************************************/
 2519 
 2520 void fn26(ref byte val)
 2521 {
 2522     asm
 2523     {
 2524         mov RAX, val;
 2525         inc byte ptr [RAX];
 2526     }
 2527 }
 2528 
 2529 void test26()
 2530 {
 2531     byte b;
 2532     //printf( "%i\n", b );
 2533     assert(b == 0);
 2534     fn26(b);
 2535     //printf( "%i\n", b );
 2536     assert(b == 1);
 2537 }
 2538 
 2539 /****************************************************/
 2540 
 2541 void test27()
 2542 {
 2543     static const ubyte[16] a =
 2544     [0, 1, 2, 3, 4, 5, 6, 7, 8 ,9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF];
 2545 
 2546     version (Windows)
 2547     {
 2548     asm
 2549     {
 2550             movdqu XMM0, a;
 2551             pslldq XMM0, 2;
 2552     }
 2553     }
 2554 }
 2555 
 2556 /****************************************************/
 2557 
 2558 /*
 2559 PASS:
 2560         cfloat z;
 2561         cfloat[1] z;
 2562         double z;
 2563         double[1] b;
 2564         long z;
 2565         long[1] z;
 2566 
 2567 FAIL: (bad type/size of operands 'movq')
 2568         byte[8] z;
 2569         char[8] z;
 2570         dchar[2] z;
 2571         float[2] z;
 2572         int[2] z;
 2573         short[4] z;
 2574         wchar[4] z;
 2575 
 2576 XPASS: (too small, but accecpted by DMD)
 2577         cfloat[0] z;
 2578         double[0] z;
 2579         long[0] z;
 2580  */
 2581 
 2582 void test28()
 2583 {
 2584 //    version (Windows)
 2585 //    {
 2586         cfloat[4] z = void;
 2587         static const ubyte[8] A = [3, 4, 9, 0, 1, 3, 7, 2];
 2588         ubyte[8] b;
 2589 
 2590         asm{
 2591                 movq MM0, z;
 2592                 movq MM0, A;
 2593                 movq b, MM0;
 2594         }
 2595 
 2596         for(size_t i = 0; i < A.length; i++)
 2597         {
 2598                 if(A[i] != b[i])
 2599                 {
 2600                         assert(0);
 2601                 }
 2602         }
 2603 //    }
 2604 }
 2605 
 2606 /****************************************************/
 2607 /+
 2608 shared int[5] bar29 = [3, 4, 5, 6, 7];
 2609 
 2610 void test29()
 2611 {
 2612     int* x;
 2613     asm
 2614     {
 2615         push offsetof bar29;
 2616         pop EAX;
 2617         mov x, EAX;
 2618     }
 2619     assert(*x == 3);
 2620 
 2621     asm
 2622     {
 2623         mov EAX, offsetof bar29;
 2624         mov x, EAX;
 2625     }
 2626     assert(*x == 3);
 2627 }
 2628 +/
 2629 /****************************************************/
 2630 
 2631 const int CONST_OFFSET30 = 10;
 2632 
 2633 void foo30()
 2634 {
 2635         asm
 2636         {
 2637                 mov EDX, 10;
 2638                 mov EAX, [RDX + CONST_OFFSET30];
 2639         }
 2640 }
 2641 
 2642 void test30()
 2643 {
 2644 }
 2645 
 2646 /****************************************************/
 2647 
 2648 void test31()
 2649 {
 2650     ubyte *p;
 2651     static ubyte[] data =
 2652     [
 2653         0xF7, 0xD8,             // neg  EAX
 2654         0x74, 0x04,             // je   L8
 2655         0xF7, 0xD8,             // neg  EAX
 2656         0x75, 0xFC,             // jne  L4
 2657         0xFF, 0xC0,             // inc  EAX
 2658     ];
 2659     int i;
 2660 
 2661     asm
 2662     {
 2663         call    L1                      ;
 2664 
 2665         neg     EAX;
 2666         je      L2;
 2667     L3:
 2668         neg     EAX;
 2669         jne     L3;
 2670     L2:
 2671         inc     EAX;
 2672 
 2673 L1:                                     ;
 2674         pop     RBX                     ;
 2675         mov     p[RBP],RBX              ;
 2676     }
 2677     for (i = 0; i < data.length; i++)
 2678     {
 2679         assert(p[i] == data[i]);
 2680     }
 2681 }
 2682 
 2683 /****************************************************/
 2684 
 2685 void infiniteAsmLoops()
 2686 {
 2687 
 2688     /* This crashes DMD 0.162: */
 2689     for (;;) asm { inc EAX; }
 2690 
 2691     /* It doesn't seem to matter what you use. These all crash: */
 2692     //for (;;) asm { mov EAX, EBX; }
 2693     //for (;;) asm { xor EAX, EAX; }
 2694     //for (;;) asm { push 0; pop RAX; }
 2695     //for (;;) asm { jmp infiniteAsmLoops; }
 2696 
 2697     /* This is a workaround: */
 2698     for (bool a = true; a;) asm { hlt; }                    // compiles
 2699     /* But this isn't: */
 2700     //for (const bool a = true; a;) asm{ hlt; }             // crashes DMD
 2701 
 2702     /* It's not restricted to for-statements: */
 2703     //while(1) asm { hlt; }                                 // crashes DMD
 2704     /* This compiles: */
 2705     {
 2706         bool a = true;
 2707         while(a) asm { hlt; }
 2708     }
 2709     /* But again, this doesn't: */
 2710     /*
 2711     {
 2712         const bool a = true;    // note the const
 2713         while(a) asm { hlt; }
 2714     }
 2715     //*/
 2716 
 2717     //do { asm { hlt; } } while (1);                          // crashes DMD
 2718     /* This, of course, compiles: */
 2719     {
 2720         bool a = true;
 2721         do asm { hlt; } while (a);
 2722     }
 2723     /* But predicably, this doesn't: */
 2724     /*
 2725     {
 2726         const bool a = true;
 2727         do asm { hlt; } while (a);
 2728     }
 2729     //**/
 2730 
 2731     /* Not even hand-coding the loop works: */
 2732     /*
 2733     {
 2734 label:
 2735         asm { hlt; }   // commenting out this line to make it compile
 2736         goto label;
 2737     }
 2738     //*/
 2739     /* Unless you go all the way: (i.e. this compiles) */
 2740     asm
 2741     {
 2742 L1:
 2743         hlt;
 2744         jmp L1;
 2745     }
 2746 
 2747     /* or like this (also compiles): */
 2748     static void test()
 2749     {
 2750         asm { naked; hlt; jmp test; }
 2751     }
 2752     test();
 2753 
 2754 
 2755     /* Wait... it gets weirder: */
 2756 
 2757     /* This also doesn't compile: */
 2758     /*
 2759     for (;;)
 2760     {
 2761         printf("\n");
 2762         asm { hlt; }
 2763     }
 2764     //*/
 2765     /* But this does: */
 2766     //*
 2767     for (;;)
 2768     {
 2769         asm { hlt; }
 2770         printf("\n");
 2771     }
 2772     //*/
 2773     /* The same loop that doesn't compile above
 2774      * /does/ compile after previous one:
 2775      */
 2776     //*
 2777     for (;;)
 2778     {
 2779         printf("\n");
 2780         asm { hlt; }
 2781     }
 2782     //*/
 2783 
 2784 
 2785     /* Note: this one is at the end because it seems to also trigger the
 2786      * "now it works" event of the loop above.
 2787      */
 2788     /* There has to be /something/ in that asm block: */
 2789     for (;;) asm {}                                         // compiles
 2790 }
 2791 
 2792 void test32()
 2793 {
 2794 }
 2795 
 2796 /****************************************************/
 2797 
 2798 void test33()
 2799 {
 2800     int x = 1;
 2801 
 2802     alias x y;
 2803 
 2804     asm
 2805     {
 2806         mov EAX, x;
 2807         mov EAX, y;
 2808     }
 2809 }
 2810 
 2811 /****************************************************/
 2812 
 2813 int test34()
 2814 {
 2815     asm{
 2816        jmp label;
 2817     }
 2818 
 2819     return 0;
 2820  label:
 2821     return 1;
 2822 }
 2823 
 2824 /****************************************************/
 2825 /+
 2826 void foo35() { printf("hello\n"); }
 2827 
 2828 void test35()
 2829 {
 2830     void function() p;
 2831     ulong q;
 2832 
 2833     asm
 2834     {
 2835         mov ECX, foo35          ;
 2836         mov q, ECX              ;
 2837         lea EDX, foo35          ;
 2838         mov p, EDX              ;
 2839     }
 2840     assert(p == &foo35);
 2841     assert(q == *cast(ulong *)p);
 2842 }
 2843 
 2844 /****************************************************/
 2845 
 2846 void func36()
 2847 {
 2848 }
 2849 
 2850 int test36()
 2851 {
 2852   void*  a = &func36;
 2853   ulong* b = cast(ulong*) a;
 2854   ulong  f = *b;
 2855   ulong  g;
 2856 
 2857   asm{
 2858      mov RAX, func36;
 2859      mov g, RAX;
 2860   }
 2861 
 2862   if(f != g){
 2863      assert(0);
 2864   }
 2865 }
 2866 +/
 2867 /****************************************************/
 2868 
 2869 void a37(X...)(X expr)
 2870 {
 2871     alias expr[0] var1;
 2872     asm {
 2873         fld double ptr expr[0];
 2874         fstp double ptr var1;
 2875     }
 2876 }
 2877 
 2878 void test37()
 2879 {
 2880    a37(3.6);
 2881 }
 2882 
 2883 /****************************************************/
 2884 
 2885 int f38(X...)(X x)
 2886 {
 2887     asm {
 2888         mov EAX, int ptr x[1];
 2889     }
 2890 }
 2891 
 2892 int g38(X...)(X x)
 2893 {
 2894     asm {
 2895         mov EAX, x[1];
 2896     }
 2897 }
 2898 
 2899 void test38()
 2900 {
 2901     assert(456 == f38(123, 456));
 2902     assert(456 == g38(123, 456));
 2903 }
 2904 
 2905 /****************************************************/
 2906 
 2907 void test39()
 2908 {
 2909     const byte z = 35;
 2910     goto end;
 2911     asm { db z; }
 2912     end: ;
 2913 }
 2914 
 2915 /****************************************************/
 2916 
 2917 void test40()
 2918 {
 2919     printf("");
 2920     const string s = "abcdefghi";
 2921     asm
 2922     {   jmp L1;
 2923         ds s;
 2924     L1:;
 2925     }
 2926     end: ;
 2927 }
 2928 
 2929 /****************************************************/
 2930 
 2931 void test41()
 2932 {
 2933     ubyte *p;
 2934     static ubyte[] data =
 2935     [
 2936         0x66,0x0F,0x28,0x0C,0x06,       // movapd       XMM1,[RAX][RSI]
 2937         0x66,0x0F,0x28,0x0C,0x06,       // movapd       XMM1,[RAX][RSI]
 2938         0x66,0x0F,0x28,0x0C,0x46,       // movapd       XMM1,[RAX*2][RSI]
 2939         0x66,0x0F,0x28,0x0C,0x86,       // movapd       XMM1,[RAX*4][RSI]
 2940         0x66,0x0F,0x28,0x0C,0xC6,       // movapd       XMM1,[RAX*8][RSI]
 2941     ];
 2942     int i;
 2943 
 2944     asm
 2945     {
 2946         call    L1                      ;
 2947 
 2948         movapd XMM1, [RSI+RAX];
 2949         movapd XMM1, [RSI+1*RAX];
 2950         movapd XMM1, [RSI+2*RAX];
 2951         movapd XMM1, [RSI+4*RAX];
 2952         movapd XMM1, [RSI+8*RAX];
 2953 
 2954 L1:                                     ;
 2955         pop     RBX                     ;
 2956         mov     p[RBP],RBX              ;
 2957     }
 2958     for (i = 0; i < data.length; i++)
 2959     {
 2960         assert(p[i] == data[i]);
 2961     }
 2962 }
 2963 
 2964 
 2965 /****************************************************/
 2966 
 2967 enum
 2968 {
 2969     enumeration42 = 1,
 2970 }
 2971 
 2972 void test42()
 2973 {
 2974     asm
 2975     {
 2976         mov EAX, enumeration42;
 2977     }
 2978 }
 2979 
 2980 /****************************************************/
 2981 
 2982 void foo43()
 2983 {
 2984     asm {lea EAX, [0*4+EAX]; }
 2985     asm {lea EAX, [4*0+EAX]; }
 2986     asm {lea EAX, [EAX+4*0]; }
 2987     asm {lea EAX, [0+EAX]; }
 2988     asm {lea EAX, [7*7+EAX]; }
 2989 }
 2990 
 2991 void test43()
 2992 {
 2993 }
 2994 
 2995 /****************************************************/
 2996 
 2997 enum n1 = 42;
 2998 enum { n2 = 42 }
 2999 
 3000 uint retN1() {
 3001     asm {
 3002         mov EAX,n1; // No! - mov EAX,-4[EBP]
 3003     }
 3004 }
 3005 
 3006 uint retN2() {
 3007     asm {
 3008         mov EAX,n2; // OK - mov EAX,02Ah
 3009     }
 3010 }
 3011 
 3012 void test44()
 3013 {
 3014     assert(retN1() == 42);
 3015     assert(retN2() == 42);
 3016 }
 3017 
 3018 /****************************************************/
 3019 
 3020 void test45()
 3021 {
 3022     ubyte *p;
 3023     static ubyte[] data =
 3024     [
 3025         0xDA, 0xC0,       // fcmovb     ST(0)
 3026         0xDA, 0xC1,       // fcmovb
 3027         0xDA, 0xCA,       // fcmove     ST(2)
 3028         0xDA, 0xD3,       // fcmovbe    ST(3)
 3029         0xDA, 0xDC,       // fcmovu     ST(4)
 3030         0xDB, 0xC5,       // fcmovnb    ST(5)
 3031         0xDB, 0xCE,       // fcmovne    ST(6)
 3032         0xDB, 0xD7,       // fcmovnbe   ST(7)
 3033         0xDB, 0xD9,       // fcmovnu
 3034     ];
 3035     int i;
 3036 
 3037     asm
 3038     {
 3039         call    L1                      ;
 3040 
 3041         fcmovb   ST, ST(0);
 3042         fcmovb   ST, ST(1);
 3043         fcmove   ST, ST(2);
 3044         fcmovbe  ST, ST(3);
 3045         fcmovu   ST, ST(4);
 3046         fcmovnb  ST, ST(5);
 3047         fcmovne  ST, ST(6);
 3048         fcmovnbe ST, ST(7);
 3049         fcmovnu  ST, ST(1);
 3050 
 3051 L1:                                     ;
 3052         pop     RBX                     ;
 3053         mov     p[RBP],RBX              ;
 3054     }
 3055     for (i = 0; i < data.length; i++)
 3056     {
 3057         assert(p[i] == data[i]);
 3058     }
 3059 }
 3060 
 3061 /****************************************************/
 3062 
 3063 void test46()
 3064 {
 3065     ubyte *p;
 3066     static ubyte[] data =
 3067     [
 3068         0x66, 0x0F, 0x3A, 0x41, 0xCA, 0x08,     // dppd XMM1,XMM2,8
 3069         0x66, 0x0F, 0x3A, 0x40, 0xDC, 0x07,     // dpps XMM3,XMM4,7
 3070         0x66, 0x0F, 0x50, 0xF3,                 // movmskpd ESI,XMM3
 3071         0x66, 0x0F, 0x50, 0xC7,                 // movmskpd EAX,XMM7
 3072         0x0F, 0x50, 0xC7,                       // movmskps EAX,XMM7
 3073         0x0F, 0xD7, 0xC7,                       // pmovmskb EAX,MM7
 3074         0x66, 0x0F, 0xD7, 0xC7,                 // pmovmskb EAX,XMM7
 3075     ];
 3076     int i;
 3077 
 3078     asm
 3079     {
 3080         call    L1                      ;
 3081 
 3082         dppd    XMM1,XMM2,8             ;
 3083         dpps    XMM3,XMM4,7             ;
 3084         movmskpd ESI,XMM3               ;
 3085         movmskpd EAX,XMM7               ;
 3086         movmskps EAX,XMM7               ;
 3087         pmovmskb EAX,MM7                ;
 3088         pmovmskb EAX,XMM7               ;
 3089 
 3090 L1:                                     ;
 3091         pop     RBX                     ;
 3092         mov     p[RBP],RBX              ;
 3093     }
 3094     for (i = 0; i < data.length; i++)
 3095     {
 3096         assert(p[i] == data[i]);
 3097     }
 3098 }
 3099 
 3100 /****************************************************/
 3101 /+
 3102 struct Foo47
 3103 {
 3104     float x,y;
 3105 }
 3106 
 3107 void bar47(Foo47 f)
 3108 {
 3109   int i;
 3110   asm
 3111   {
 3112     mov EAX, offsetof f;
 3113     mov i, EAX;
 3114   }
 3115   printf("%d\n",i);
 3116   assert(i == 8);
 3117 }
 3118 
 3119 void test47()
 3120 {
 3121     Foo47 f;
 3122     bar47(f);
 3123 }
 3124 +/
 3125 /****************************************************/
 3126 
 3127 void func48(void delegate () callback)
 3128 {
 3129     callback();
 3130 }
 3131 
 3132 void test48()
 3133 {
 3134     func48(() { asm{ mov EAX,EAX; } });
 3135 }
 3136 
 3137 /****************************************************/
 3138 
 3139 void test49()
 3140 {
 3141     ubyte *p;
 3142     static ubyte[] data =
 3143     [
 3144         0x00, 0xC0,             // add  AL,AL
 3145         0x00, 0xD8,             // add  AL,BL
 3146         0x00, 0xC8,             // add  AL,CL
 3147         0x00, 0xD0,             // add  AL,DL
 3148         0x00, 0xE0,             // add  AL,AH
 3149         0x00, 0xF8,             // add  AL,BH
 3150         0x00, 0xE8,             // add  AL,CH
 3151         0x00, 0xF0,             // add  AL,DH
 3152         0x00, 0xC4,             // add  AH,AL
 3153         0x00, 0xDC,             // add  AH,BL
 3154         0x00, 0xCC,             // add  AH,CL
 3155         0x00, 0xD4,             // add  AH,DL
 3156         0x00, 0xE4,             // add  AH,AH
 3157         0x00, 0xFC,             // add  AH,BH
 3158         0x00, 0xEC,             // add  AH,CH
 3159         0x00, 0xF4,             // add  AH,DH
 3160         0x00, 0xC3,             // add  BL,AL
 3161         0x00, 0xDB,             // add  BL,BL
 3162         0x00, 0xCB,             // add  BL,CL
 3163         0x00, 0xD3,             // add  BL,DL
 3164         0x00, 0xE3,             // add  BL,AH
 3165         0x00, 0xFB,             // add  BL,BH
 3166         0x00, 0xEB,             // add  BL,CH
 3167         0x00, 0xF3,             // add  BL,DH
 3168         0x00, 0xC7,             // add  BH,AL
 3169         0x00, 0xDF,             // add  BH,BL
 3170         0x00, 0xCF,             // add  BH,CL
 3171         0x00, 0xD7,             // add  BH,DL
 3172         0x00, 0xE7,             // add  BH,AH
 3173         0x00, 0xFF,             // add  BH,BH
 3174         0x00, 0xEF,             // add  BH,CH
 3175         0x00, 0xF7,             // add  BH,DH
 3176         0x00, 0xC1,             // add  CL,AL
 3177         0x00, 0xD9,             // add  CL,BL
 3178         0x00, 0xC9,             // add  CL,CL
 3179         0x00, 0xD1,             // add  CL,DL
 3180         0x00, 0xE1,             // add  CL,AH
 3181         0x00, 0xF9,             // add  CL,BH
 3182         0x00, 0xE9,             // add  CL,CH
 3183         0x00, 0xF1,             // add  CL,DH
 3184         0x00, 0xC5,             // add  CH,AL
 3185         0x00, 0xDD,             // add  CH,BL
 3186         0x00, 0xCD,             // add  CH,CL
 3187         0x00, 0xD5,             // add  CH,DL
 3188         0x00, 0xE5,             // add  CH,AH
 3189         0x00, 0xFD,             // add  CH,BH
 3190         0x00, 0xED,             // add  CH,CH
 3191         0x00, 0xF5,             // add  CH,DH
 3192         0x00, 0xC2,             // add  DL,AL
 3193         0x00, 0xDA,             // add  DL,BL
 3194         0x00, 0xCA,             // add  DL,CL
 3195         0x00, 0xD2,             // add  DL,DL
 3196         0x00, 0xE2,             // add  DL,AH
 3197         0x00, 0xFA,             // add  DL,BH
 3198         0x00, 0xEA,             // add  DL,CH
 3199         0x00, 0xF2,             // add  DL,DH
 3200         0x00, 0xC6,             // add  DH,AL
 3201         0x00, 0xDE,             // add  DH,BL
 3202         0x00, 0xCE,             // add  DH,CL
 3203         0x00, 0xD6,             // add  DH,DL
 3204         0x00, 0xE6,             // add  DH,AH
 3205         0x00, 0xFE,             // add  DH,BH
 3206         0x00, 0xEE,             // add  DH,CH
 3207         0x00, 0xF6,             // add  DH,DH
 3208         0x66, 0x01, 0xC0,       // add  AX,AX
 3209         0x66, 0x01, 0xD8,       // add  AX,BX
 3210         0x66, 0x01, 0xC8,       // add  AX,CX
 3211         0x66, 0x01, 0xD0,       // add  AX,DX
 3212         0x66, 0x01, 0xF0,       // add  AX,SI
 3213         0x66, 0x01, 0xF8,       // add  AX,DI
 3214         0x66, 0x01, 0xE8,       // add  AX,BP
 3215         0x66, 0x01, 0xE0,       // add  AX,SP
 3216         0x66, 0x01, 0xC3,       // add  BX,AX
 3217         0x66, 0x01, 0xDB,       // add  BX,BX
 3218         0x66, 0x01, 0xCB,       // add  BX,CX
 3219         0x66, 0x01, 0xD3,       // add  BX,DX
 3220         0x66, 0x01, 0xF3,       // add  BX,SI
 3221         0x66, 0x01, 0xFB,       // add  BX,DI
 3222         0x66, 0x01, 0xEB,       // add  BX,BP
 3223         0x66, 0x01, 0xE3,       // add  BX,SP
 3224         0x66, 0x01, 0xC1,       // add  CX,AX
 3225         0x66, 0x01, 0xD9,       // add  CX,BX
 3226         0x66, 0x01, 0xC9,       // add  CX,CX
 3227         0x66, 0x01, 0xD1,       // add  CX,DX
 3228         0x66, 0x01, 0xF1,       // add  CX,SI
 3229         0x66, 0x01, 0xF9,       // add  CX,DI
 3230         0x66, 0x01, 0xE9,       // add  CX,BP
 3231         0x66, 0x01, 0xE1,       // add  CX,SP
 3232         0x66, 0x01, 0xC2,       // add  DX,AX
 3233         0x66, 0x01, 0xDA,       // add  DX,BX
 3234         0x66, 0x01, 0xCA,       // add  DX,CX
 3235         0x66, 0x01, 0xD2,       // add  DX,DX
 3236         0x66, 0x01, 0xF2,       // add  DX,SI
 3237         0x66, 0x01, 0xFA,       // add  DX,DI
 3238         0x66, 0x01, 0xEA,       // add  DX,BP
 3239         0x66, 0x01, 0xE2,       // add  DX,SP
 3240         0x66, 0x01, 0xC6,       // add  SI,AX
 3241         0x66, 0x01, 0xDE,       // add  SI,BX
 3242         0x66, 0x01, 0xCE,       // add  SI,CX
 3243         0x66, 0x01, 0xD6,       // add  SI,DX
 3244         0x66, 0x01, 0xF6,       // add  SI,SI
 3245         0x66, 0x01, 0xFE,       // add  SI,DI
 3246         0x66, 0x01, 0xEE,       // add  SI,BP
 3247         0x66, 0x01, 0xE6,       // add  SI,SP
 3248         0x66, 0x01, 0xC7,       // add  DI,AX
 3249         0x66, 0x01, 0xDF,       // add  DI,BX
 3250         0x66, 0x01, 0xCF,       // add  DI,CX
 3251         0x66, 0x01, 0xD7,       // add  DI,DX
 3252         0x66, 0x01, 0xF7,       // add  DI,SI
 3253         0x66, 0x01, 0xFF,       // add  DI,DI
 3254         0x66, 0x01, 0xEF,       // add  DI,BP
 3255         0x66, 0x01, 0xE7,       // add  DI,SP
 3256         0x66, 0x01, 0xC5,       // add  BP,AX
 3257         0x66, 0x01, 0xDD,       // add  BP,BX
 3258         0x66, 0x01, 0xCD,       // add  BP,CX
 3259         0x66, 0x01, 0xD5,       // add  BP,DX
 3260         0x66, 0x01, 0xF5,       // add  BP,SI
 3261         0x66, 0x01, 0xFD,       // add  BP,DI
 3262         0x66, 0x01, 0xED,       // add  BP,BP
 3263         0x66, 0x01, 0xE5,       // add  BP,SP
 3264         0x66, 0x01, 0xC4,       // add  SP,AX
 3265         0x66, 0x01, 0xDC,       // add  SP,BX
 3266         0x66, 0x01, 0xCC,       // add  SP,CX
 3267         0x66, 0x01, 0xD4,       // add  SP,DX
 3268         0x66, 0x01, 0xF4,       // add  SP,SI
 3269         0x66, 0x01, 0xFC,       // add  SP,DI
 3270         0x66, 0x01, 0xEC,       // add  SP,BP
 3271         0x66, 0x01, 0xE4,       // add  SP,SP
 3272         0x01, 0xC0,             // add  EAX,EAX
 3273         0x01, 0xD8,             // add  EAX,EBX
 3274         0x01, 0xC8,             // add  EAX,ECX
 3275         0x01, 0xD0,             // add  EAX,EDX
 3276         0x01, 0xF0,             // add  EAX,ESI
 3277         0x01, 0xF8,             // add  EAX,EDI
 3278         0x01, 0xE8,             // add  EAX,EBP
 3279         0x01, 0xE0,             // add  EAX,ESP
 3280         0x01, 0xC3,             // add  EBX,EAX
 3281         0x01, 0xDB,             // add  EBX,EBX
 3282         0x01, 0xCB,             // add  EBX,ECX
 3283         0x01, 0xD3,             // add  EBX,EDX
 3284         0x01, 0xF3,             // add  EBX,ESI
 3285         0x01, 0xFB,             // add  EBX,EDI
 3286         0x01, 0xEB,             // add  EBX,EBP
 3287         0x01, 0xE3,             // add  EBX,ESP
 3288         0x01, 0xC1,             // add  ECX,EAX
 3289         0x01, 0xD9,             // add  ECX,EBX
 3290         0x01, 0xC9,             // add  ECX,ECX
 3291         0x01, 0xD1,             // add  ECX,EDX
 3292         0x01, 0xF1,             // add  ECX,ESI
 3293         0x01, 0xF9,             // add  ECX,EDI
 3294         0x01, 0xE9,             // add  ECX,EBP
 3295         0x01, 0xE1,             // add  ECX,ESP
 3296         0x01, 0xC2,             // add  EDX,EAX
 3297         0x01, 0xDA,             // add  EDX,EBX
 3298         0x01, 0xCA,             // add  EDX,ECX
 3299         0x01, 0xD2,             // add  EDX,EDX
 3300         0x01, 0xF2,             // add  EDX,ESI
 3301         0x01, 0xFA,             // add  EDX,EDI
 3302         0x01, 0xEA,             // add  EDX,EBP
 3303         0x01, 0xE2,             // add  EDX,ESP
 3304         0x01, 0xC6,             // add  ESI,EAX
 3305         0x01, 0xDE,             // add  ESI,EBX
 3306         0x01, 0xCE,             // add  ESI,ECX
 3307         0x01, 0xD6,             // add  ESI,EDX
 3308         0x01, 0xF6,             // add  ESI,ESI
 3309         0x01, 0xFE,             // add  ESI,EDI
 3310         0x01, 0xEE,             // add  ESI,EBP
 3311         0x01, 0xE6,             // add  ESI,ESP
 3312         0x01, 0xC7,             // add  EDI,EAX
 3313         0x01, 0xDF,             // add  EDI,EBX
 3314         0x01, 0xCF,             // add  EDI,ECX
 3315         0x01, 0xD7,             // add  EDI,EDX
 3316         0x01, 0xF7,             // add  EDI,ESI
 3317         0x01, 0xFF,             // add  EDI,EDI
 3318         0x01, 0xEF,             // add  EDI,EBP
 3319         0x01, 0xE7,             // add  EDI,ESP
 3320         0x01, 0xC5,             // add  EBP,EAX
 3321         0x01, 0xDD,             // add  EBP,EBX
 3322         0x01, 0xCD,             // add  EBP,ECX
 3323         0x01, 0xD5,             // add  EBP,EDX
 3324         0x01, 0xF5,             // add  EBP,ESI
 3325         0x01, 0xFD,             // add  EBP,EDI
 3326         0x01, 0xED,             // add  EBP,EBP
 3327         0x01, 0xE5,             // add  EBP,ESP
 3328         0x01, 0xC4,             // add  ESP,EAX
 3329         0x01, 0xDC,             // add  ESP,EBX
 3330         0x01, 0xCC,             // add  ESP,ECX
 3331         0x01, 0xD4,             // add  ESP,EDX
 3332         0x01, 0xF4,             // add  ESP,ESI
 3333         0x01, 0xFC,             // add  ESP,EDI
 3334         0x01, 0xEC,             // add  ESP,EBP
 3335         0x01, 0xE4,             // add  ESP,ESP
 3336     ];
 3337     int i;
 3338 
 3339     asm
 3340     {
 3341         call    L1                      ;
 3342 
 3343         add     AL,AL   ;
 3344         add     AL,BL   ;
 3345         add     AL,CL   ;
 3346         add     AL,DL   ;
 3347 
 3348         add     AL,AH   ;
 3349         add     AL,BH   ;
 3350         add     AL,CH   ;
 3351         add     AL,DH   ;
 3352 
 3353         add     AH,AL   ;
 3354         add     AH,BL   ;
 3355         add     AH,CL   ;
 3356         add     AH,DL   ;
 3357 
 3358         add     AH,AH   ;
 3359         add     AH,BH   ;
 3360         add     AH,CH   ;
 3361         add     AH,DH   ;
 3362 
 3363         add     BL,AL   ;
 3364         add     BL,BL   ;
 3365         add     BL,CL   ;
 3366         add     BL,DL   ;
 3367 
 3368         add     BL,AH   ;
 3369         add     BL,BH   ;
 3370         add     BL,CH   ;
 3371         add     BL,DH   ;
 3372 
 3373         add     BH,AL   ;
 3374         add     BH,BL   ;
 3375         add     BH,CL   ;
 3376         add     BH,DL   ;
 3377 
 3378         add     BH,AH   ;
 3379         add     BH,BH   ;
 3380         add     BH,CH   ;
 3381         add     BH,DH   ;
 3382 
 3383         add     CL,AL   ;
 3384         add     CL,BL   ;
 3385         add     CL,CL   ;
 3386         add     CL,DL   ;
 3387 
 3388         add     CL,AH   ;
 3389         add     CL,BH   ;
 3390         add     CL,CH   ;
 3391         add     CL,DH   ;
 3392 
 3393         add     CH,AL   ;
 3394         add     CH,BL   ;
 3395         add     CH,CL   ;
 3396         add     CH,DL   ;
 3397 
 3398         add     CH,AH   ;
 3399         add     CH,BH   ;
 3400         add     CH,CH   ;
 3401         add     CH,DH   ;
 3402 
 3403         add     DL,AL   ;
 3404         add     DL,BL   ;
 3405         add     DL,CL   ;
 3406         add     DL,DL   ;
 3407 
 3408         add     DL,AH   ;
 3409         add     DL,BH   ;
 3410         add     DL,CH   ;
 3411         add     DL,DH   ;
 3412 
 3413         add     DH,AL   ;
 3414         add     DH,BL   ;
 3415         add     DH,CL   ;
 3416         add     DH,DL   ;
 3417 
 3418         add     DH,AH   ;
 3419         add     DH,BH   ;
 3420         add     DH,CH   ;
 3421         add     DH,DH   ;
 3422 
 3423         add     AX,AX   ;
 3424         add     AX,BX   ;
 3425         add     AX,CX   ;
 3426         add     AX,DX   ;
 3427         add     AX,SI   ;
 3428         add     AX,DI   ;
 3429         add     AX,BP   ;
 3430         add     AX,SP   ;
 3431 
 3432         add     BX,AX   ;
 3433         add     BX,BX   ;
 3434         add     BX,CX   ;
 3435         add     BX,DX   ;
 3436         add     BX,SI   ;
 3437         add     BX,DI   ;
 3438         add     BX,BP   ;
 3439         add     BX,SP   ;
 3440 
 3441         add     CX,AX   ;
 3442         add     CX,BX   ;
 3443         add     CX,CX   ;
 3444         add     CX,DX   ;
 3445         add     CX,SI   ;
 3446         add     CX,DI   ;
 3447         add     CX,BP   ;
 3448         add     CX,SP   ;
 3449 
 3450         add     DX,AX   ;
 3451         add     DX,BX   ;
 3452         add     DX,CX   ;
 3453         add     DX,DX   ;
 3454         add     DX,SI   ;
 3455         add     DX,DI   ;
 3456         add     DX,BP   ;
 3457         add     DX,SP   ;
 3458 
 3459         add     SI,AX   ;
 3460         add     SI,BX   ;
 3461         add     SI,CX   ;
 3462         add     SI,DX   ;
 3463         add     SI,SI   ;
 3464         add     SI,DI   ;
 3465         add     SI,BP   ;
 3466         add     SI,SP   ;
 3467 
 3468         add     DI,AX   ;
 3469         add     DI,BX   ;
 3470         add     DI,CX   ;
 3471         add     DI,DX   ;
 3472         add     DI,SI   ;
 3473         add     DI,DI   ;
 3474         add     DI,BP   ;
 3475         add     DI,SP   ;
 3476 
 3477         add     BP,AX   ;
 3478         add     BP,BX   ;
 3479         add     BP,CX   ;
 3480         add     BP,DX   ;
 3481         add     BP,SI   ;
 3482         add     BP,DI   ;
 3483         add     BP,BP   ;
 3484         add     BP,SP   ;
 3485 
 3486         add     SP,AX   ;
 3487         add     SP,BX   ;
 3488         add     SP,CX   ;
 3489         add     SP,DX   ;
 3490         add     SP,SI   ;
 3491         add     SP,DI   ;
 3492         add     SP,BP   ;
 3493         add     SP,SP   ;
 3494 
 3495         add     EAX,EAX ;
 3496         add     EAX,EBX ;
 3497         add     EAX,ECX ;
 3498         add     EAX,EDX ;
 3499         add     EAX,ESI ;
 3500         add     EAX,EDI ;
 3501         add     EAX,EBP ;
 3502         add     EAX,ESP ;
 3503 
 3504         add     EBX,EAX ;
 3505         add     EBX,EBX ;
 3506         add     EBX,ECX ;
 3507         add     EBX,EDX ;
 3508         add     EBX,ESI ;
 3509         add     EBX,EDI ;
 3510         add     EBX,EBP ;
 3511         add     EBX,ESP ;
 3512 
 3513         add     ECX,EAX ;
 3514         add     ECX,EBX ;
 3515         add     ECX,ECX ;
 3516         add     ECX,EDX ;
 3517         add     ECX,ESI ;
 3518         add     ECX,EDI ;
 3519         add     ECX,EBP ;
 3520         add     ECX,ESP ;
 3521 
 3522         add     EDX,EAX ;
 3523         add     EDX,EBX ;
 3524         add     EDX,ECX ;
 3525         add     EDX,EDX ;
 3526         add     EDX,ESI ;
 3527         add     EDX,EDI ;
 3528         add     EDX,EBP ;
 3529         add     EDX,ESP ;
 3530 
 3531         add     ESI,EAX ;
 3532         add     ESI,EBX ;
 3533         add     ESI,ECX ;
 3534         add     ESI,EDX ;
 3535         add     ESI,ESI ;
 3536         add     ESI,EDI ;
 3537         add     ESI,EBP ;
 3538         add     ESI,ESP ;
 3539 
 3540         add     EDI,EAX ;
 3541         add     EDI,EBX ;
 3542         add     EDI,ECX ;
 3543         add     EDI,EDX ;
 3544         add     EDI,ESI ;
 3545         add     EDI,EDI ;
 3546         add     EDI,EBP ;
 3547         add     EDI,ESP ;
 3548 
 3549         add     EBP,EAX ;
 3550         add     EBP,EBX ;
 3551         add     EBP,ECX ;
 3552         add     EBP,EDX ;
 3553         add     EBP,ESI ;
 3554         add     EBP,EDI ;
 3555         add     EBP,EBP ;
 3556         add     EBP,ESP ;
 3557 
 3558         add     ESP,EAX ;
 3559         add     ESP,EBX ;
 3560         add     ESP,ECX ;
 3561         add     ESP,EDX ;
 3562         add     ESP,ESI ;
 3563         add     ESP,EDI ;
 3564         add     ESP,EBP ;
 3565         add     ESP,ESP ;
 3566 
 3567 L1:                                     ;
 3568         pop     RBX                     ;
 3569         mov     p[RBP],RBX              ;
 3570     }
 3571     for (i = 0; i < data.length; i++)
 3572     {
 3573         assert(p[i] == data[i]);
 3574     }
 3575 }
 3576 
 3577 
 3578 /****************************************************/
 3579 
 3580 void test50()
 3581 {
 3582     ubyte *p;
 3583     static ubyte[] data =
 3584     [
 3585         0x66, 0x98,     // cbw
 3586         0xF8,           // clc
 3587         0xFC,           // cld
 3588         0xFA,           // cli
 3589         0xF5,           // cmc
 3590         0xA6,           // cmpsb
 3591         0x66, 0xA7,     // cmpsw
 3592         0xA7,           // cmpsd
 3593         0x66, 0x99,     // cwd
 3594 //      0x27,           // daa
 3595 //      0x2F,           // das
 3596         0xFF, 0xC8,     // dec  EAX
 3597         0xF6, 0xF1,     // div  CL
 3598         0x66, 0xF7, 0xF3,  // div       BX
 3599         0xF7, 0xF2,     // div  EDX
 3600         0xF4,           // hlt
 3601         0xF6, 0xFB,     // idiv BL
 3602         0x66, 0xF7, 0xFA,  // idiv      DX
 3603         0xF7, 0xFE,     // idiv ESI
 3604         0xF6, 0xEB,     // imul BL
 3605         0x66, 0xF7, 0xEA,  // imul      DX
 3606         0xF7, 0xEE,     // imul ESI
 3607         0xEC,           // in   AL,DX
 3608         0x66, 0xED,     // in   AX,DX
 3609         0xFF, 0xC3,     // inc  EBX
 3610         0xCC,           // int  3
 3611         0xCD, 0x67,     // int  067h
 3612 //      0xCE,           // into
 3613         0x66, 0xCF,     // iret
 3614         0x48, 0xCF,     // iretq
 3615         0x90, 0x90,     // nop;nop - put instructions above this or L10 changes
 3616                         //           address, which changes all the jump
 3617                         //           instructions and breaks the test case.
 3618         0x77, 0xFC,     // ja   L30
 3619         0x77, 0xFA,     // ja   L30
 3620         0x73, 0xF8,     // jae  L30
 3621         0x73, 0xF6,     // jae  L30
 3622         0x73, 0xF4,     // jae  L30
 3623         0x72, 0xF2,     // jb   L30
 3624         0x72, 0xF0,     // jb   L30
 3625         0x76, 0xEE,     // jbe  L30
 3626         0x76, 0xEC,     // jbe  L30
 3627         0x72, 0xEA,     // jb   L30
 3628 //      0x67, 0xE3, 0xE7,  // jcxz      L30
 3629         0x90, 0x90, 0x90,  // nop;nop;nop
 3630         0x74, 0xE5,     // je   L30
 3631         0x74, 0xE3,     // je   L30
 3632         0x7F, 0xE1,     // jg   L30
 3633         0x7F, 0xDF,     // jg   L30
 3634         0x7D, 0xDD,     // jge  L30
 3635         0x7D, 0xDB,     // jge  L30
 3636         0x7C, 0xD9,     // jl   L30
 3637         0x7C, 0xD7,     // jl   L30
 3638         0x7E, 0xD5,     // jle  L30
 3639         0x7E, 0xD3,     // jle  L30
 3640         0xEB, 0xD1,     // jmp short    L30
 3641         0x75, 0xCF,     // jne  L30
 3642         0x75, 0xCD,     // jne  L30
 3643         0x71, 0xCB,     // jno  L30
 3644         0x79, 0xC9,     // jns  L30
 3645         0x7B, 0xC7,     // jnp  L30
 3646         0x7B, 0xC5,     // jnp  L30
 3647         0x70, 0xC3,     // jo   L30
 3648         0x7A, 0xC1,     // jp   L30
 3649         0x7A, 0xBF,     // jp   L30
 3650         0x78, 0xBD,     // js   L30
 3651         0x9F,           // lahf
 3652 //      0xC5, 0x30,     // lds  ESI,[EAX]
 3653         0x90, 0x90,     // nop;nop
 3654         0x8B, 0xFB,     // mov  EDI,EBX
 3655 //      0xC4, 0x29,     // les  EBP,[ECX]
 3656         0x90, 0x90,     // nop;nop
 3657         0xF0,           // lock
 3658         0xAC,           // lodsb
 3659         0x66, 0xAD,     // lodsw
 3660         0xAD,           // lodsd
 3661         0xE2, 0xAF,     // loop L30
 3662         0xE1, 0xAD,     // loope        L30
 3663         0xE1, 0xAB,     // loope        L30
 3664         0xE0, 0xA9,     // loopne       L30
 3665         0xE0, 0xA7,     // loopne       L30
 3666         0xA4,           // movsb
 3667         0x66, 0xA5,     // movsw
 3668         0xA5,           // movsd
 3669         0xF6, 0xE4,     // mul  AH
 3670         0x66, 0xF7, 0xE1,  // mul       CX
 3671         0xF7, 0xE5,     // mul  EBP
 3672         0x90,           // nop
 3673         0xF7, 0xD7,     // not  EDI
 3674         0x66, 0xE7, 0x44,  // out       044h,AX
 3675         0xEE,           // out  DX,AL
 3676         0x66, 0x9D,     // popf
 3677         0x66, 0x9C,     // pushf
 3678         0xD1, 0xDB,     // rcr  EBX,1
 3679         0xF3,           // rep
 3680         0xF3,           // rep
 3681         0xF2,           // repne
 3682         0xF3,           // rep
 3683         0xF2,           // repne
 3684         0xC3,           // ret
 3685         0xC2, 0x04, 0x00,  // ret  4
 3686         0xD1, 0xC1,     // rol  ECX,1
 3687         0xD1, 0xCA,     // ror  EDX,1
 3688         0x9E,           // sahf
 3689         0xD1, 0xE5,     // shl  EBP,1
 3690         0xD1, 0xE4,     // shl  ESP,1
 3691         0xD1, 0xFF,     // sar  EDI,1
 3692         0xAE,           // scasb
 3693         0x66, 0xAF,     // scasw
 3694         0xAF,           // scasd
 3695         0xD1, 0xEE,     // shr  ESI,1
 3696         0xFD,           // std
 3697         0xF9,           // stc
 3698         0xFB,           // sti
 3699         0xAA,           // stosb
 3700         0x66, 0xAB,     // stosw
 3701         0xAB,           // stosd
 3702         0x9B,           // wait
 3703         0x91,           // xchg EAX,ECX
 3704         0xD7,           // xlat
 3705     ];
 3706     int i;
 3707 
 3708     asm
 3709     {
 3710         call    L1                      ;
 3711 
 3712         cbw     ;
 3713         clc     ;
 3714         cld     ;
 3715         cli     ;
 3716         cmc     ;
 3717         cmpsb   ;
 3718         cmpsw   ;
 3719         cmpsd   ;
 3720         cwd     ;
 3721         //daa   ;
 3722         //das   ;
 3723         dec     EAX     ;
 3724         div     CL      ;
 3725         div     BX      ;
 3726         div     EDX     ;
 3727         hlt             ;
 3728         idiv    BL      ;
 3729         idiv    DX      ;
 3730         idiv    ESI     ;
 3731         imul    BL      ;
 3732         imul    DX      ;
 3733         imul    ESI     ;
 3734         in      AL,DX   ;
 3735         in      AX,DX   ;
 3736         inc     EBX     ;
 3737         int     3       ;
 3738         int     0x67    ;
 3739         //into          ;
 3740         iret            ;
 3741         iretq           ;
 3742 L10:    nop; nop;         // put instructions above this or L10 changes
 3743                           // address, which changes all the jump instructions
 3744                           // and breaks the test case.
 3745         ja      L10     ;
 3746         jnbe    L10     ;
 3747         jae     L10     ;
 3748         jnb     L10     ;
 3749         jnc     L10     ;
 3750         jb      L10     ;
 3751         jnae    L10     ;
 3752         jbe     L10     ;
 3753         jna     L10     ;
 3754         jc      L10     ;
 3755         nop;nop;nop;    // jcxz L10;
 3756         je      L10     ;
 3757         jz      L10     ;
 3758         jg      L10     ;
 3759         jnle    L10     ;
 3760         jge     L10     ;
 3761         jnl     L10     ;
 3762         jl      L10     ;
 3763         jnge    L10     ;
 3764         jle     L10     ;
 3765         jng     L10     ;
 3766         jmp     short L10       ;
 3767         jne     L10     ;
 3768         jnz     L10     ;
 3769         jno     L10     ;
 3770         jns     L10     ;
 3771         jnp     L10     ;
 3772         jpo     L10     ;
 3773         jo      L10     ;
 3774         jp      L10     ;
 3775         jpe     L10     ;
 3776         js      L10     ;
 3777         lahf            ;
 3778         nop;nop;        //lds   ESI,[EAX];
 3779         lea     EDI,[EBX];
 3780         nop;nop;        //les   EBP,[ECX];
 3781         lock    ;
 3782         lodsb   ;
 3783         lodsw   ;
 3784         lodsd   ;
 3785         loop    L10     ;
 3786         loope   L10     ;
 3787         loopz   L10     ;
 3788         loopnz  L10     ;
 3789         loopne  L10     ;
 3790         movsb   ;
 3791         movsw   ;
 3792         movsd   ;
 3793         mul     AH      ;
 3794         mul     CX      ;
 3795         mul     EBP     ;
 3796         nop     ;
 3797         not     EDI     ;
 3798         out     0x44,AX ;
 3799         out     DX,AL   ;
 3800         popf    ;
 3801         pushf   ;
 3802         rcr     EBX,1   ;
 3803         rep     ;
 3804         repe    ;
 3805         repne   ;
 3806         repz    ;
 3807         repnz   ;
 3808         ret     ;
 3809         ret     4       ;
 3810         rol     ECX,1   ;
 3811         ror     EDX,1   ;
 3812         sahf    ;
 3813         sal     EBP,1   ;
 3814         shl     ESP,1   ;
 3815         sar     EDI,1   ;
 3816         scasb   ;
 3817         scasw   ;
 3818         scasd   ;
 3819         shr     ESI,1   ;
 3820         std     ;
 3821         stc     ;
 3822         sti     ;
 3823         stosb   ;
 3824         stosw   ;
 3825         stosd   ;
 3826         wait    ;
 3827         xchg    EAX,ECX ;
 3828         xlat    ;
 3829 
 3830 L1:                                     ;
 3831         pop     RBX                     ;
 3832         mov     p[RBP],RBX              ;
 3833     }
 3834     for (i = 0; i < data.length; i++)
 3835     {
 3836         assert(p[i] == data[i]);
 3837     }
 3838 }
 3839 
 3840 
 3841 /****************************************************/
 3842 
 3843 class Test51
 3844 {
 3845     void test(int n)
 3846     { asm {
 3847         mov RAX, this;
 3848         }
 3849     }
 3850 }
 3851 
 3852 /****************************************************/
 3853 
 3854 void test52()
 3855 {   int x;
 3856     ubyte* p;
 3857     static ubyte[] data =
 3858     [
 3859         0xF6, 0xD8,                     // neg  AL
 3860 0x66,   0xF7, 0xD8,                     // neg  AX
 3861         0xF7, 0xD8,                     // neg  EAX
 3862         0x48, 0xF7, 0xD8,               // neg  RAX
 3863         0xF6, 0xDC,                     // neg  AH
 3864         0x41, 0xF6, 0xDC,               // neg  R12B
 3865 0x66,   0x41, 0xF7, 0xDC,               // neg  12D
 3866         0x41, 0xF7, 0xDC,               // neg  R12D
 3867         0x49, 0xF7, 0xDB,               // neg  R11
 3868 //      0xF6, 0x1D, 0x00, 0x00, 0x00, 0x00,     // neg  byte ptr _D6iasm641bg@PC32[RIP]
 3869 //0x66,         0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00,     // neg  word ptr _D6iasm641ws@PC32[RIP]
 3870 //      0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00,     // neg  dword ptr _D6iasm641ii@PC32[RIP]
 3871 //      0x48, 0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00,       // neg  qword ptr _D6iasm641ll@PC32[RIP]
 3872         0xF7, 0x5D, 0xD0,               // neg  dword ptr -8[RBP]
 3873         0xF6, 0x1B,                     // neg  byte ptr [RBX]
 3874         0xF6, 0x1B,                     // neg  byte ptr [RBX]
 3875         0x49, 0xF7, 0xD8,               // neg  R8
 3876     ];
 3877 
 3878     asm
 3879     {
 3880         call    L1      ;
 3881 
 3882         neg     AL      ;
 3883         neg     AX      ;
 3884         neg     EAX     ;
 3885         neg     RAX     ;
 3886         neg     AH      ;
 3887         neg     R12B    ;
 3888         neg     R12W    ;
 3889         neg     R12D    ;
 3890         neg     R11     ;
 3891 //      neg     b       ;
 3892 //      neg     w       ;
 3893 //      neg     i       ;
 3894 //      neg     l       ;
 3895         neg     x       ;
 3896         neg     [EBX]   ;
 3897         neg     [RBX]   ;
 3898         neg     R8      ;
 3899 
 3900 L1:     pop     RAX     ;
 3901         mov     p[RBP],RAX ;
 3902     }
 3903 
 3904     foreach (ref i, b; data)
 3905     {
 3906         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b);
 3907         assert(p[i] == b);
 3908     }
 3909 }
 3910 
 3911 /****************************************************/
 3912 
 3913 void test53()
 3914 {   int x;
 3915     ubyte* p;
 3916     static ubyte[] data =
 3917     [
 3918         0x48, 0x8D, 0x04, 0x00,         // lea  RAX,[RAX][RAX]
 3919         0x48, 0x8D, 0x04, 0x08,         // lea  RAX,[RCX][RAX]
 3920         0x48, 0x8D, 0x04, 0x10,         // lea  RAX,[RDX][RAX]
 3921         0x48, 0x8D, 0x04, 0x18,         // lea  RAX,[RBX][RAX]
 3922         0x48, 0x8D, 0x04, 0x28,         // lea  RAX,[RBP][RAX]
 3923         0x48, 0x8D, 0x04, 0x30,         // lea  RAX,[RSI][RAX]
 3924         0x48, 0x8D, 0x04, 0x38,         // lea  RAX,[RDI][RAX]
 3925         0x4A, 0x8D, 0x04, 0x00,         // lea  RAX,[R8][RAX]
 3926         0x4A, 0x8D, 0x04, 0x08,         // lea  RAX,[R9][RAX]
 3927         0x4A, 0x8D, 0x04, 0x10,         // lea  RAX,[R10][RAX]
 3928         0x4A, 0x8D, 0x04, 0x18,         // lea  RAX,[R11][RAX]
 3929         0x4A, 0x8D, 0x04, 0x20,         // lea  RAX,[R12][RAX]
 3930         0x4A, 0x8D, 0x04, 0x28,         // lea  RAX,[R13][RAX]
 3931         0x4A, 0x8D, 0x04, 0x30,         // lea  RAX,[R14][RAX]
 3932         0x4A, 0x8D, 0x04, 0x38,         // lea  RAX,[R15][RAX]
 3933         0x48, 0x8D, 0x04, 0x00,         // lea  RAX,[RAX][RAX]
 3934         0x48, 0x8D, 0x04, 0x01,         // lea  RAX,[RAX][RCX]
 3935         0x48, 0x8D, 0x04, 0x02,         // lea  RAX,[RAX][RDX]
 3936         0x48, 0x8D, 0x04, 0x03,         // lea  RAX,[RAX][RBX]
 3937         0x48, 0x8D, 0x04, 0x04,         // lea  RAX,[RAX][RSP]
 3938         0x48, 0x8D, 0x44, 0x05, 0x00,   // lea  RAX,0[RAX][RBP]
 3939         0x48, 0x8D, 0x04, 0x06,         // lea  RAX,[RAX][RSI]
 3940         0x48, 0x8D, 0x04, 0x07,         // lea  RAX,[RAX][RDI]
 3941         0x49, 0x8D, 0x04, 0x00,         // lea  RAX,[RAX][R8]
 3942         0x49, 0x8D, 0x04, 0x01,         // lea  RAX,[RAX][R9]
 3943         0x49, 0x8D, 0x04, 0x02,         // lea  RAX,[RAX][R10]
 3944         0x49, 0x8D, 0x04, 0x03,         // lea  RAX,[RAX][R11]
 3945         0x49, 0x8D, 0x04, 0x04,         // lea  RAX,[RAX][R12]
 3946         0x49, 0x8D, 0x44, 0x05, 0x00,   // lea  RAX,0[RAX][R13]
 3947         0x49, 0x8D, 0x04, 0x06,         // lea  RAX,[RAX][R14]
 3948         0x49, 0x8D, 0x04, 0x07,         // lea  RAX,[RAX][R15]
 3949         0x4B, 0x8D, 0x04, 0x24,         // lea  RAX,[R12][R12]
 3950         0x4B, 0x8D, 0x44, 0x25, 0x00,   // lea  RAX,0[R12][R13]
 3951         0x4B, 0x8D, 0x04, 0x26,         // lea  RAX,[R12][R14]
 3952         0x4B, 0x8D, 0x04, 0x2C,         // lea  RAX,[R13][R12]
 3953         0x4B, 0x8D, 0x44, 0x2D, 0x00,   // lea  RAX,0[R13][R13]
 3954         0x4B, 0x8D, 0x04, 0x2E,         // lea  RAX,[R13][R14]
 3955         0x4B, 0x8D, 0x04, 0x34,         // lea  RAX,[R14][R12]
 3956         0x4B, 0x8D, 0x44, 0x35, 0x00,   // lea  RAX,0[R14][R13]
 3957         0x4B, 0x8D, 0x04, 0x36,         // lea  RAX,[R14][R14]
 3958         0x48, 0x8D, 0x44, 0x01, 0x12,                           // lea  RAX,012h[RAX][RCX]
 3959         0x48, 0x8D, 0x84, 0x01, 0x34, 0x12, 0x00, 0x00,         // lea  RAX,01234h[RAX][RCX]
 3960         0x48, 0x8D, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12,         // lea  RAX,012345678h[RAX][RCX]
 3961         0x48, 0x8D, 0x44, 0x05, 0x12,                           // lea  RAX,012h[RAX][RBP]
 3962         0x48, 0x8D, 0x84, 0x05, 0x34, 0x12, 0x00, 0x00,         // lea  RAX,01234h[RAX][RBP]
 3963         0x48, 0x8D, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12,         // lea  RAX,012345678h[RAX][RBP]
 3964         0x49, 0x8D, 0x44, 0x05, 0x12,                           // lea  RAX,012h[RAX][R13]
 3965         0x49, 0x8D, 0x84, 0x05, 0x34, 0x12, 0x00, 0x00,         // lea  RAX,01234h[RAX][R13]
 3966         0x49, 0x8D, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12,         // lea  RAX,012345678h[RAX][R13]
 3967 
 3968         0x48, 0x8D, 0x04, 0x24,         // lea  RAX,[RSP]
 3969         0x49, 0x8D, 0x04, 0x24,         // lea  RAX,[R12]
 3970     ];
 3971 
 3972     asm
 3973     {
 3974         call    L1      ;
 3975 
 3976         // Right
 3977         lea RAX, [RAX+RAX];
 3978         lea RAX, [RAX+RCX];
 3979         lea RAX, [RAX+RDX];
 3980         lea RAX, [RAX+RBX];
 3981         //lea RAX, [RAX+RSP]; RSP can't be on the right
 3982         lea RAX, [RAX+RBP];
 3983         lea RAX, [RAX+RSI];
 3984         lea RAX, [RAX+RDI];
 3985         lea RAX, [RAX+R8];
 3986         lea RAX, [RAX+R9];
 3987         lea RAX, [RAX+R10];
 3988         lea RAX, [RAX+R11];
 3989         lea RAX, [RAX+R12];
 3990         lea RAX, [RAX+R13];
 3991         lea RAX, [RAX+R14];
 3992         lea RAX, [RAX+R15];
 3993         // Left
 3994         lea RAX, [RAX+RAX];
 3995         lea RAX, [RCX+RAX];
 3996         lea RAX, [RDX+RAX];
 3997         lea RAX, [RBX+RAX];
 3998         lea RAX, [RSP+RAX];
 3999         lea RAX, [RBP+RAX]; // Good gets disp+8 correctly
 4000         lea RAX, [RSI+RAX];
 4001         lea RAX, [RDI+RAX];
 4002         lea RAX, [R8+RAX];
 4003         lea RAX, [R9+RAX];
 4004         lea RAX, [R10+RAX];
 4005         lea RAX, [R11+RAX];
 4006         lea RAX, [R12+RAX];
 4007         lea RAX, [R13+RAX]; // Good disp+8
 4008         lea RAX, [R14+RAX];
 4009         lea RAX, [R15+RAX];
 4010         // Right and Left
 4011         lea RAX, [R12+R12];
 4012         lea RAX, [R13+R12];
 4013         lea RAX, [R14+R12];
 4014         lea RAX, [R12+R13];
 4015         lea RAX, [R13+R13];
 4016         lea RAX, [R14+R13];
 4017         lea RAX, [R12+R14];
 4018         lea RAX, [R13+R14];
 4019         lea RAX, [R14+R14];
 4020 
 4021         // Disp8/32 checks
 4022         lea RAX, [RCX+RAX+0x12];
 4023         lea RAX, [RCX+RAX+0x1234];
 4024         lea RAX, [RCX+RAX+0x1234_5678];
 4025         lea RAX, [RBP+RAX+0x12];
 4026         lea RAX, [RBP+RAX+0x1234];
 4027         lea RAX, [RBP+RAX+0x1234_5678];
 4028         lea RAX, [R13+RAX+0x12];
 4029         lea RAX, [R13+RAX+0x1234];
 4030         lea RAX, [R13+RAX+0x1234_5678];
 4031 
 4032         lea RAX, [RSP];
 4033         lea RAX, [R12];
 4034 
 4035 L1:     pop     RAX     ;
 4036         mov     p[RBP],RAX ;
 4037     }
 4038 
 4039     foreach (ref i, b; data)
 4040     {
 4041         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b);
 4042         assert(p[i] == b);
 4043     }
 4044 }
 4045 
 4046 /****************************************************/
 4047 
 4048 void test54()
 4049 {   int x;
 4050     ubyte* p;
 4051     static ubyte[] data =
 4052     [
 4053               0xFE, 0xC8,                // dec    AL
 4054               0xFE, 0xCC,                // dec    AH
 4055         0x66, 0xFF, 0xC8,                // dec    AX
 4056               0xFF, 0xC8,                // dec    EAX
 4057         0x48, 0xFF, 0xC8,                // dec    RAX
 4058         0x49, 0xFF, 0xCA,                // dec    R10
 4059 
 4060               0xFE, 0xC0,                // inc    AL
 4061               0xFE, 0xC4,                // inc    AH
 4062         0x66, 0xFF, 0xC0,                // inc    AX
 4063               0xFF, 0xC0,                // inc    EAX
 4064         0x48, 0xFF, 0xC0,                // inc    RAX
 4065         0x49, 0xFF, 0xC2,                // inc    R10
 4066 
 4067   0x66, 0x44, 0x0F, 0xA4, 0xC0, 0x04,    // shld    AX, R8W, 4
 4068   0x66, 0x44, 0x0F, 0xA5, 0xC0,          // shld    AX, R8W, CL
 4069         0x44, 0x0F, 0xA4, 0xC0, 0x04,    // shld   EAX, R8D, 4
 4070         0x44, 0x0F, 0xA5, 0xC0,          // shld   EAX, R8D, CL
 4071         0x4C, 0x0F, 0xA4, 0xC0, 0x04,    // shld   RAX, R8 , 4
 4072         0x4C, 0x0F, 0xA5, 0xC0,          // shld   RAX, R8 , CL
 4073 
 4074   0x66, 0x44, 0x0F, 0xAC, 0xC0, 0x04,    // shrd    AX, R8W, 4
 4075   0x66, 0x44, 0x0F, 0xAD, 0xC0,          // shrd    AX, R8W, CL
 4076         0x44, 0x0F, 0xAC, 0xC0, 0x04,    // shrd   EAX, R8D, 4
 4077         0x44, 0x0F, 0xAD, 0xC0,          // shrd   EAX, R8D, CL
 4078         0x4C, 0x0F, 0xAC, 0xC0, 0x04,    // shrd   RAX, R8 , 4
 4079         0x4C, 0x0F, 0xAD, 0xC0           // shrd   RAX, R8 , CL
 4080     ];
 4081 
 4082     asm
 4083     {
 4084         call  L1;
 4085 
 4086         dec   AL;
 4087         dec   AH;
 4088         dec   AX;
 4089         dec   EAX;
 4090         dec   RAX;
 4091         dec   R10;
 4092 
 4093         inc   AL;
 4094         inc   AH;
 4095         inc   AX;
 4096         inc   EAX;
 4097         inc   RAX;
 4098         inc   R10;
 4099 
 4100         shld   AX, R8W, 4;
 4101         shld   AX, R8W, CL;
 4102         shld  EAX, R8D, 4;
 4103         shld  EAX, R8D, CL;
 4104         shld  RAX, R8 , 4;
 4105         shld  RAX, R8 , CL;
 4106 
 4107         shrd   AX, R8W, 4;
 4108         shrd   AX, R8W, CL;
 4109         shrd  EAX, R8D, 4;
 4110         shrd  EAX, R8D, CL;
 4111         shrd  RAX, R8 , 4;
 4112         shrd  RAX, R8 , CL;
 4113 
 4114 L1:     pop     RAX;
 4115         mov     p[RBP],RAX;
 4116     }
 4117 
 4118     foreach (ref i, b; data)
 4119     {
 4120         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b);
 4121         assert(p[i] == b);
 4122     }
 4123 }
 4124 
 4125 /****************************************************/
 4126 
 4127 void test55()
 4128 {   int x;
 4129     ubyte* p;
 4130     enum NOP = 0x9090_9090_9090_9090;
 4131     static ubyte[] data =
 4132     [
 4133         0x0F, 0x87, 0xFF, 0xFF, 0, 0,    //    ja    $ + 0xFFFF
 4134         0x72, 0x18,                      //    jb    Lb
 4135         0x0F, 0x82, 0x92, 0x00, 0, 0,    //    jc    Lc
 4136         0x0F, 0x84, 0x0C, 0x01, 0, 0,    //    je    Le
 4137         0xEB, 0x0A,                      //    jmp   Lb
 4138         0xE9, 0x85, 0x00, 0x00, 0,       //    jmp   Lc
 4139         0xE9, 0x00, 0x01, 0x00, 0,       //    jmp   Le
 4140     ];
 4141 
 4142     asm
 4143     {
 4144         call  L1;
 4145 
 4146         ja  $+0x0_FFFF;
 4147         jb  Lb;
 4148         jc  Lc;
 4149         je  Le;
 4150         jmp Lb;
 4151         jmp Lc;
 4152         jmp Le;
 4153 
 4154     Lb: dq NOP,NOP,NOP,NOP;    //  32
 4155         dq NOP,NOP,NOP,NOP;    //  64
 4156         dq NOP,NOP,NOP,NOP;    //  96
 4157         dq NOP,NOP,NOP,NOP;    // 128
 4158     Lc: dq NOP,NOP,NOP,NOP;    // 160
 4159         dq NOP,NOP,NOP,NOP;    // 192
 4160         dq NOP,NOP,NOP,NOP;    // 224
 4161         dq NOP,NOP,NOP,NOP;    // 256
 4162     Le: nop;
 4163 
 4164 L1:     pop     RAX;
 4165         mov     p[RBP],RAX;
 4166     }
 4167 
 4168     foreach (ref i, b; data)
 4169     {
 4170         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b);
 4171         assert(p[i] == b);
 4172     }
 4173 }
 4174 
 4175 /****************************************************/
 4176 
 4177 void test56()
 4178 {   int x;
 4179 
 4180     x = foo56();
 4181 
 4182     assert(x == 42);
 4183 }
 4184 
 4185 int foo56()
 4186 {
 4187     asm
 4188     {   naked;
 4189         xor  EAX,EAX;
 4190         jz   bar56;
 4191         ret;
 4192     }
 4193 }
 4194 void bar56()
 4195 {
 4196     asm
 4197     {   naked;
 4198         mov EAX, 42;
 4199         ret;
 4200     }
 4201 }
 4202 
 4203 /****************************************************/
 4204 
 4205 /* ======================= SSSE3 ======================= */
 4206 
 4207 void test57()
 4208 {
 4209     ubyte* p;
 4210     M64  m64;
 4211     M128 m128;
 4212     static ubyte[] data =
 4213     [
 4214         0x0F, 0x3A, 0x0F, 0xCA,       0x03,    // palignr   MM1,  MM2, 3
 4215   0x66, 0x0F, 0x3A, 0x0F, 0xCA,       0x03,    // palignr  XMM1, XMM2, 3
 4216         0x0F, 0x3A, 0x0F, 0x5D, 0xC8, 0x03,    // palignr   MM3, -0x38[RBP], 3
 4217   0x66, 0x0F, 0x3A, 0x0F, 0x5D, 0xD0, 0x03,    // palignr  XMM3, -0x30[RBP], 3
 4218         0x0F, 0x38, 0x02, 0xCA,                // phaddd    MM1,  MM2
 4219   0x66, 0x0F, 0x38, 0x02, 0xCA,                // phaddd   XMM1, XMM2
 4220         0x0F, 0x38, 0x02, 0x5D, 0xC8,          // phaddd    MM3, -0x38[RBP]
 4221   0x66, 0x0F, 0x38, 0x02, 0x5D, 0xD0,          // phaddd   XMM3, -0x30[RBP]
 4222         0x0F, 0x38, 0x01, 0xCA,                // phaddw    MM1,  MM2
 4223   0x66, 0x0F, 0x38, 0x01, 0xCA,                // phaddw   XMM1, XMM2
 4224         0x0F, 0x38, 0x01, 0x5D, 0xC8,          // phaddw    MM3, -0x38[RBP]
 4225   0x66, 0x0F, 0x38, 0x01, 0x5D, 0xD0,          // phaddw   XMM3, -0x30[RBP]
 4226         0x0F, 0x38, 0x03, 0xCA,                // phaddsw   MM1,  MM2
 4227   0x66, 0x0F, 0x38, 0x03, 0xCA,                // phaddsw  XMM1, XMM2
 4228         0x0F, 0x38, 0x03, 0x5D, 0xC8,          // phaddsw   MM3, -0x38[RBP]
 4229   0x66, 0x0F, 0x38, 0x03, 0x5D, 0xD0,          // phaddsw  XMM3, -0x30[RBP]
 4230         0x0F, 0x38, 0x06, 0xCA,                // phsubd    MM1,  MM2
 4231   0x66, 0x0F, 0x38, 0x06, 0xCA,                // phsubd   XMM1, XMM2
 4232         0x0F, 0x38, 0x06, 0x5D, 0xC8,          // phsubd    MM3, -0x38[RBP]
 4233   0x66, 0x0F, 0x38, 0x06, 0x5D, 0xD0,          // phsubd   XMM3, -0x30[RBP]
 4234         0x0F, 0x38, 0x05, 0xCA,                // phsubw    MM1,  MM2
 4235   0x66, 0x0F, 0x38, 0x05, 0xCA,                // phsubw   XMM1, XMM2
 4236         0x0F, 0x38, 0x05, 0x5D, 0xC8,          // phsubw    MM3, -0x38[RBP]
 4237   0x66, 0x0F, 0x38, 0x05, 0x5D, 0xD0,          // phsubw   XMM3, -0x30[RBP]
 4238         0x0F, 0x38, 0x07, 0xCA,                // phsubsw   MM1,  MM2
 4239   0x66, 0x0F, 0x38, 0x07, 0xCA,                // phsubsw  XMM1, XMM2
 4240         0x0F, 0x38, 0x07, 0x5D, 0xC8,          // phsubsw   MM3, -0x38[RBP]
 4241   0x66, 0x0F, 0x38, 0x07, 0x5D, 0xD0,          // phsubsw  XMM3, -0x30[RBP]
 4242         0x0F, 0x38, 0x04, 0xCA,                // pmaddubsw  MM1,  MM2
 4243   0x66, 0x0F, 0x38, 0x04, 0xCA,                // pmaddubsw XMM1, XMM2
 4244         0x0F, 0x38, 0x04, 0x5D, 0xC8,          // pmaddubsw  MM3, -0x38[RBP]
 4245   0x66, 0x0F, 0x38, 0x04, 0x5D, 0xD0,          // pmaddubsw XMM3, -0x30[RBP]
 4246         0x0F, 0x38, 0x0B, 0xCA,                // pmulhrsw  MM1,  MM2
 4247   0x66, 0x0F, 0x38, 0x0B, 0xCA,                // pmulhrsw XMM1, XMM2
 4248         0x0F, 0x38, 0x0B, 0x5D, 0xC8,          // pmulhrsw  MM3, -0x38[RBP]
 4249   0x66, 0x0F, 0x38, 0x0B, 0x5D, 0xD0,          // pmulhrsw XMM3, -0x30[RBP]
 4250         0x0F, 0x38, 0x00, 0xCA,                // pshufb    MM1,  MM2
 4251   0x66, 0x0F, 0x38, 0x00, 0xCA,                // pshufb   XMM1, XMM2
 4252         0x0F, 0x38, 0x00, 0x5D, 0xC8,          // pshufb    MM3, -0x38[RBP]
 4253   0x66, 0x0F, 0x38, 0x00, 0x5D, 0xD0,          // pshufb   XMM3, -0x30[RBP]
 4254         0x0F, 0x38, 0x1C, 0xCA,                // pabsb     MM1,  MM2
 4255   0x66, 0x0F, 0x38, 0x1C, 0xCA,                // pabsb    XMM1, XMM2
 4256         0x0F, 0x38, 0x1C, 0x5D, 0xC8,          // pabsb     MM3, -0x38[RBP]
 4257   0x66, 0x0F, 0x38, 0x1C, 0x5D, 0xD0,          // pabsb    XMM3, -0x30[RBP]
 4258         0x0F, 0x38, 0x1E, 0xCA,                // pabsd     MM1,  MM2
 4259   0x66, 0x0F, 0x38, 0x1E, 0xCA,                // pabsd    XMM1, XMM2
 4260         0x0F, 0x38, 0x1E, 0x5D, 0xC8,          // pabsd     MM3, -0x38[RBP]
 4261   0x66, 0x0F, 0x38, 0x1E, 0x5D, 0xD0,          // pabsd    XMM3, -0x30[RBP]
 4262         0x0F, 0x38, 0x1D, 0xCA,                // pabsw     MM1,  MM2
 4263   0x66, 0x0F, 0x38, 0x1D, 0xCA,                // pabsw    XMM1, XMM2
 4264         0x0F, 0x38, 0x1D, 0x5D, 0xC8,          // pabsw     MM3, -0x38[RBP]
 4265   0x66, 0x0F, 0x38, 0x1D, 0x5D, 0xD0,          // pabsw    XMM3, -0x30[RBP]
 4266         0x0F, 0x38, 0x08, 0xCA,                // psignb    MM1,  MM2
 4267   0x66, 0x0F, 0x38, 0x08, 0xCA,                // psignb   XMM1, XMM2
 4268         0x0F, 0x38, 0x08, 0x5D, 0xC8,          // psignb    MM3, -0x38[RBP]
 4269   0x66, 0x0F, 0x38, 0x08, 0x5D, 0xD0,          // psignb   XMM3, -0x30[RBP]
 4270         0x0F, 0x38, 0x0A, 0xCA,                // psignd    MM1,  MM2
 4271   0x66, 0x0F, 0x38, 0x0A, 0xCA,                // psignd   XMM1, XMM2
 4272         0x0F, 0x38, 0x0A, 0x5D, 0xC8,          // psignd    MM3, -0x38[RBP]
 4273   0x66, 0x0F, 0x38, 0x0A, 0x5D, 0xD0,          // psignd   XMM3, -0x30[RBP]
 4274         0x0F, 0x38, 0x09, 0xCA,                // psignw    MM1,  MM2
 4275   0x66, 0x0F, 0x38, 0x09, 0xCA,                // psignw   XMM1, XMM2
 4276         0x0F, 0x38, 0x09, 0x5D, 0xC8,          // psignw    MM3, -0x38[RBP]
 4277   0x66, 0x0F, 0x38, 0x09, 0x5D, 0xD0,          // psignw   XMM3, -0x30[RBP]
 4278     ];
 4279 
 4280     asm
 4281     {
 4282         call  L1;
 4283 
 4284         palignr     MM1,  MM2, 3;
 4285         palignr    XMM1, XMM2, 3;
 4286         palignr     MM3, m64 , 3;
 4287         palignr    XMM3, m128, 3;
 4288 
 4289         phaddd      MM1,  MM2;
 4290         phaddd     XMM1, XMM2;
 4291         phaddd      MM3,  m64;
 4292         phaddd     XMM3, m128;
 4293 
 4294         phaddw      MM1,  MM2;
 4295         phaddw     XMM1, XMM2;
 4296         phaddw      MM3,  m64;
 4297         phaddw     XMM3, m128;
 4298 
 4299         phaddsw     MM1,  MM2;
 4300         phaddsw    XMM1, XMM2;
 4301         phaddsw     MM3,  m64;
 4302         phaddsw    XMM3, m128;
 4303 
 4304         phsubd      MM1,  MM2;
 4305         phsubd     XMM1, XMM2;
 4306         phsubd      MM3,  m64;
 4307         phsubd     XMM3, m128;
 4308 
 4309         phsubw      MM1,  MM2;
 4310         phsubw     XMM1, XMM2;
 4311         phsubw      MM3,  m64;
 4312         phsubw     XMM3, m128;
 4313 
 4314         phsubsw     MM1,  MM2;
 4315         phsubsw    XMM1, XMM2;
 4316         phsubsw     MM3,  m64;
 4317         phsubsw    XMM3, m128;
 4318 
 4319         pmaddubsw   MM1,  MM2;
 4320         pmaddubsw  XMM1, XMM2;
 4321         pmaddubsw   MM3,  m64;
 4322         pmaddubsw  XMM3, m128;
 4323 
 4324         pmulhrsw    MM1,  MM2;
 4325         pmulhrsw   XMM1, XMM2;
 4326         pmulhrsw    MM3,  m64;
 4327         pmulhrsw   XMM3, m128;
 4328 
 4329         pshufb      MM1,  MM2;
 4330         pshufb     XMM1, XMM2;
 4331         pshufb      MM3,  m64;
 4332         pshufb     XMM3, m128;
 4333 
 4334         pabsb       MM1,  MM2;
 4335         pabsb      XMM1, XMM2;
 4336         pabsb       MM3,  m64;
 4337         pabsb      XMM3, m128;
 4338 
 4339         pabsd       MM1,  MM2;
 4340         pabsd      XMM1, XMM2;
 4341         pabsd       MM3,  m64;
 4342         pabsd      XMM3, m128;
 4343 
 4344         pabsw       MM1,  MM2;
 4345         pabsw      XMM1, XMM2;
 4346         pabsw       MM3,  m64;
 4347         pabsw      XMM3, m128;
 4348 
 4349         psignb      MM1,  MM2;
 4350         psignb     XMM1, XMM2;
 4351         psignb      MM3,  m64;
 4352         psignb     XMM3, m128;
 4353 
 4354         psignd      MM1,  MM2;
 4355         psignd     XMM1, XMM2;
 4356         psignd      MM3,  m64;
 4357         psignd     XMM3, m128;
 4358 
 4359         psignw      MM1,  MM2;
 4360         psignw     XMM1, XMM2;
 4361         psignw      MM3,  m64;
 4362         psignw     XMM3, m128;
 4363 
 4364 L1:     pop     RAX;
 4365         mov     p[RBP],RAX;
 4366     }
 4367 
 4368     foreach (ref i, b; data)
 4369     {
 4370         //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b);
 4371         assert(p[i] == b);
 4372     }
 4373 }
 4374 
 4375 /****************************************************/
 4376 
 4377 /* ======================= SSE4.1 ======================= */
 4378 
 4379 void test58()
 4380 {
 4381     ubyte* p;
 4382     byte   m8;
 4383     short m16;
 4384     int   m32;
 4385     M64   m64;
 4386     M128 m128;
 4387     static ubyte[] data =
 4388     [
 4389   0x66,       0x0F, 0x3A, 0x0D, 0xCA,        3,// blendpd  XMM1,XMM2,0x3
 4390   0x66,       0x0F, 0x3A, 0x0D, 0x5D, 0xD0,  3,// blendpd  XMM3,XMMWORD PTR [RBP-0x30],0x3
 4391   0x66,       0x0F, 0x3A, 0x0C, 0xCA,        3,// blendps  XMM1,XMM2,0x3
 4392   0x66,       0x0F, 0x3A, 0x0C, 0x5D, 0xD0,  3,// blendps  XMM3,XMMWORD PTR [RBP-0x30],0x3
 4393   0x66,       0x0F, 0x38, 0x15, 0xCA,          // blendvpd XMM1,XMM2,XMM0
 4394   0x66,       0x0F, 0x38, 0x15, 0x5D, 0xD0,    // blendvpd XMM3,XMMWORD PTR [RBP-0x30],XMM0
 4395   0x66,       0x0F, 0x38, 0x14, 0xCA,          // blendvps XMM1,XMM2,XMM0
 4396   0x66,       0x0F, 0x38, 0x14, 0x5D, 0xD0,    // blendvps XMM3,XMMWORD PTR [RBP-0x30],XMM0
 4397   0x66,       0x0F, 0x3A, 0x41, 0xCA,        3,// dppd     XMM1,XMM2,0x3
 4398   0x66,       0x0F, 0x3A, 0x41, 0x5D, 0xD0,  3,// dppd     XMM3,XMMWORD PTR [RBP-0x30],0x3
 4399   0x66,       0x0F, 0x3A, 0x40, 0xCA,        3,// dpps     XMM1,XMM2,0x3
 4400   0x66,       0x0F, 0x3A, 0x40, 0x5D, 0xD0,  3,// dpps     XMM3,XMMWORD PTR [RBP-0x30],0x3
 4401   0x66,       0x0F, 0x3A, 0x17, 0xD2,        3,// extractps EDX,XMM2,0x3
 4402   0x66,       0x0F, 0x3A, 0x17, 0x55, 0xBC,  3,// extractps DWORD PTR [RBP-0x44],XMM2,0x3
 4403   0x66,       0x0F, 0x3A, 0x21, 0xCA,        3,// insertps XMM1,XMM2,0x3
 4404   0x66,       0x0F, 0x3A, 0x21, 0x5D, 0xBC,  3,// insertps XMM3,DWORD PTR [RBP-0x44],0x3
 4405   0x66,       0x0F, 0x38, 0x2A, 0x4D, 0xD0,    // movntdqa XMM1,XMMWORD PTR [RBP-0x30]
 4406   0x66,       0x0F, 0x3A, 0x42, 0xCA,        3,// mpsadbw  XMM1,XMM2,0x3
 4407   0x66,       0x0F, 0x3A, 0x42, 0x5D, 0xD0,  3,// mpsadbw  XMM3,XMMWORD PTR [RBP-0x30],0x3
 4408   0x66,       0x0F, 0x38, 0x2B, 0xCA,          // packusdw XMM1,XMM2
 4409   0x66,       0x0F, 0x38, 0x2B, 0x5D, 0xD0,    // packusdw XMM3,XMMWORD PTR [RBP-0x30]
 4410   0x66,       0x0F, 0x38, 0x10, 0xCA,          // pblendvb XMM1,XMM2,XMM0
 4411   0x66,       0x0F, 0x38, 0x10, 0x5D, 0xD0,    // pblendvb XMM3,XMMWORD PTR [RBP-0x30],XMM0
 4412   0x66,       0x0F, 0x3A, 0x0E, 0xCA,        3,// pblendw  XMM1,XMM2,0x3
 4413   0x66,       0x0F, 0x3A, 0x0E, 0x5D, 0xD0,  3,// pblendw  XMM3,XMMWORD PTR [RBP-0x30],0x3
 4414   0x66,       0x0F, 0x38, 0x29, 0xCA,          // pcmpeqq  XMM1,XMM2
 4415   0x66,       0x0F, 0x38, 0x29, 0x5D, 0xD0,    // pcmpeqq  XMM3,XMMWORD PTR [RBP-0x30]
 4416   0x66,       0x0F, 0x3A, 0x14, 0xD0,        3,// pextrb EAX,XMM2,0x3
 4417   0x66,       0x0F, 0x3A, 0x14, 0xD3,        3,// pextrb EBX,XMM2,0x3
 4418   0x66,       0x0F, 0x3A, 0x14, 0xD1,        3,// pextrb ECX,XMM2,0x3
 4419   0x66,       0x0F, 0x3A, 0x14, 0xD2,        3,// pextrb EDX,XMM2,0x3
 4420   0x66,       0x0F, 0x3A, 0x14, 0xD0,        3,// pextrb EAX,XMM2,0x3
 4421   0x66,       0x0F, 0x3A, 0x14, 0xD3,        3,// pextrb EBX,XMM2,0x3
 4422   0x66, 0x41, 0x0F, 0x3A, 0x14, 0xD0,        3,// pextrb R8D,XMM2,0x3
 4423   0x66, 0x41, 0x0F, 0x3A, 0x14, 0xD2,        3,// pextrb R10D,XMM2,0x3
 4424   0x66,       0x0F, 0x3A, 0x14, 0x5D, 0xB8,  3,// pextrb BYTE PTR [RBP-0x48],XMM3,0x3
 4425   0x66,       0x0F, 0x3A, 0x16, 0xD0,        3,// pextrd EAX,XMM2,0x3
 4426   0x66,       0x0F, 0x3A, 0x16, 0xD3,        3,// pextrd EBX,XMM2,0x3
 4427   0x66,       0x0F, 0x3A, 0x16, 0xD1,        3,// pextrd ECX,XMM2,0x3
 4428   0x66,       0x0F, 0x3A, 0x16, 0xD2,        3,// pextrd EDX,XMM2,0x3
 4429   0x66,       0x0F, 0x3A, 0x16, 0x5D, 0xBC,  3,// pextrd DWORD PTR [RBP-0x44],XMM3,0x3
 4430   0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD0,        3,// pextrq RAX,XMM2,0x3
 4431   0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD3,        3,// pextrq RBX,XMM2,0x3
 4432   0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD1,        3,// pextrq RCX,XMM2,0x3
 4433   0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD2,        3,// pextrq RDX,XMM2,0x3
 4434   0x66, 0x48, 0x0F, 0x3A, 0x16, 0x5D, 0xC0,  3,// pextrq QWORD PTR [RBP-0x40],XMM3,0x3
 4435   0x66,       0x0F, 0xC5, 0xC2,              3,// pextrw EAX,XMM2,0x3
 4436   0x66,       0x0F, 0xC5, 0xDA,              3,// pextrw EBX,XMM2,0x3
 4437   0x66,       0x0F, 0xC5, 0xCA,              3,// pextrw ECX,XMM2,0x3
 4438   0x66,       0x0F, 0xC5, 0xD2,              3,// pextrw EDX,XMM2,0x3
 4439   0x66,       0x0F, 0xC5, 0xC2,              3,// pextrw EAX,XMM2,0x3
 4440   0x66,       0x0F, 0xC5, 0xDA,              3,// pextrw EBX,XMM2,0x3
 4441   0x66, 0x44, 0x0F, 0xC5, 0xC2,              3,// pextrw R8D,XMM2,0x3
 4442   0x66, 0x44, 0x0F, 0xC5, 0xD2,              3,// pextrw R10D,XMM2,0x3
 4443   0x66,       0x0F, 0x3A, 0x15, 0x5D, 0xBA,  3,// pextrw WORD PTR [RBP-0x46],XMM3,0x3
 4444   0x66,       0x0F, 0x38, 0x41, 0xCA,          // phminposuw XMM1,XMM2
 4445   0x66,       0x0F, 0x38, 0x41, 0x5D, 0xD0,    // phminposuw XMM3,XMMWORD PTR [RBP-0x30]
 4446   0x66,       0x0F, 0x3A, 0x20, 0xC8,        3,// pinsrb  XMM1,EAX,0x3
 4447   0x66,       0x0F, 0x3A, 0x20, 0xCB,        3,// pinsrb  XMM1,EBX,0x3
 4448   0x66,       0x0F, 0x3A, 0x20, 0xC9,        3,// pinsrb  XMM1,ECX,0x3
 4449   0x66,       0x0F, 0x3A, 0x20, 0xCA,        3,// pinsrb  XMM1,EDX,0x3
 4450   0x66,       0x0F, 0x3A, 0x20, 0x5D, 0xB8,  3,// pinsrb  XMM3,BYTE PTR [RBP-0x48],0x3
 4451   0x66,       0x0F, 0x3A, 0x22, 0xC8,        3,// pinsrd  XMM1,EAX,0x3
 4452   0x66,       0x0F, 0x3A, 0x22, 0xCB,        3,// pinsrd  XMM1,EBX,0x3
 4453   0x66,       0x0F, 0x3A, 0x22, 0xC9,        3,// pinsrd  XMM1,ECX,0x3
 4454   0x66,       0x0F, 0x3A, 0x22, 0xCA,        3,// pinsrd  XMM1,EDX,0x3
 4455   0x66,       0x0F, 0x3A, 0x22, 0x5D, 0xBC,  3,// pinsrd  XMM3,DWORD PTR [RBP-0x44],0x3
 4456   0x66, 0x48, 0x0F, 0x3A, 0x22, 0xC8,        3,// pinsrq  XMM1,RAX,0x3
 4457   0x66, 0x48, 0x0F, 0x3A, 0x22, 0xCB,        3,// pinsrq  XMM1,RBX,0x3
 4458   0x66, 0x48, 0x0F, 0x3A, 0x22, 0xC9,        3,// pinsrq  XMM1,RCX,0x3
 4459   0x66, 0x48, 0x0F, 0x3A, 0x22, 0xCA,        3,// pinsrq  XMM1,RDX,0x3
 4460   0x66, 0x48, 0x0F, 0x3A, 0x22, 0x5D, 0xC0,  3,// pinsrq  XMM3,QWORD PTR [RBP-0x40],0x3
 4461   0x66,       0x0F, 0x38, 0x3C, 0xCA,          // pmaxsb  XMM1,XMM2
 4462   0x66,       0x0F, 0x38, 0x3C, 0x5D, 0xD0,    // pmaxsb  XMM3,XMMWORD PTR [RBP-0x30]
 4463   0x66,       0x0F, 0x38, 0x3D, 0xCA,          // pmaxsd  XMM1,XMM2
 4464   0x66,       0x0F, 0x38, 0x3D, 0x5D, 0xD0,    // pmaxsd  XMM3,XMMWORD PTR [RBP-0x30]
 4465   0x66,       0x0F, 0x38, 0x3F, 0xCA,          // pmaxud  XMM1,XMM2
 4466   0x66,       0x0F, 0x38, 0x3F, 0x5D, 0xD0,    // pmaxud  XMM3,XMMWORD PTR [RBP-0x30]
 4467   0x66,       0x0F, 0x38, 0x3E, 0xCA,          // pmaxuw  XMM1,XMM2
 4468   0x66,       0x0F, 0x38, 0x3E, 0x5D, 0xD0,    // pmaxuw  XMM3,XMMWORD PTR [RBP-0x30]
 4469   0x66,       0x0F, 0x38, 0x38, 0xCA,          // pminsb  XMM1,XMM2
 4470   0x66,       0x0F, 0x38, 0x38, 0x5D, 0xD0,    // pminsb  XMM3,XMMWORD PTR [RBP-0x30]
 4471   0x66,       0x0F, 0x38, 0x39, 0xCA,          // pminsd  XMM1,XMM2
 4472   0x66,       0x0F, 0x38, 0x39, 0x5D, 0xD0,    // pminsd  XMM3,XMMWORD PTR [RBP-0x30]
 4473   0x66,       0x0F, 0x38, 0x3B, 0xCA,          // pminud  XMM1,XMM2
 4474   0x66,       0x0F, 0x38, 0x3B, 0x5D, 0xD0,    // pminud  XMM3,XMMWORD PTR [RBP-0x30]
 4475   0x66,       0x0F, 0x38, 0x3A, 0xCA,          // pminuw  XMM1,XMM2
 4476   0x66,       0x0F, 0x38, 0x3A, 0x5D, 0xD0,    // pminuw  XMM3,XMMWORD PTR [RBP-0x30]
 4477   0x66,       0x0F, 0x38, 0x20, 0xCA,          // pmovsxbw XMM1,XMM2
 4478   0x66,       0x0F, 0x38, 0x20, 0x5D, 0xC0,    // pmovsxbw XMM3,QWORD PTR [RBP-0x40]
 4479   0x66,       0x0F, 0x38, 0x21, 0xCA,          // pmovsxbd XMM1,XMM2
 4480   0x66,       0x0F, 0x38, 0x21, 0x5D, 0xBC,    // pmovsxbd XMM3,DWORD PTR [RBP-0x44]
 4481   0x66,       0x0F, 0x38, 0x22, 0xCA,          // pmovsxbq XMM1,XMM2
 4482   0x66,       0x0F, 0x38, 0x22, 0x5D, 0xBA,    // pmovsxbq XMM3,WORD PTR [RBP-0x46]
 4483   0x66,       0x0F, 0x38, 0x23, 0xCA,          // pmovsxwd XMM1,XMM2
 4484   0x66,       0x0F, 0x38, 0x23, 0x5D, 0xC0,    // pmovsxwd XMM3,QWORD PTR [RBP-0x40]
 4485   0x66,       0x0F, 0x38, 0x24, 0xCA,          // pmovsxwq XMM1,XMM2
 4486   0x66,       0x0F, 0x38, 0x24, 0x5D, 0xBC,    // pmovsxwq XMM3,DWORD PTR [RBP-0x44]
 4487   0x66,       0x0F, 0x38, 0x25, 0xCA,          // pmovsxdq XMM1,XMM2
 4488   0x66,       0x0F, 0x38, 0x25, 0x5D, 0xC0,    // pmovsxdq XMM3,QWORD PTR [RBP-0x40]
 4489   0x66,       0x0F, 0x38, 0x30, 0xCA,          // pmovzxbw XMM1,XMM2
 4490   0x66,       0x0F, 0x38, 0x30, 0x5D, 0xC0,    // pmovzxbw XMM3,QWORD PTR [RBP-0x40]
 4491   0x66,       0x0F, 0x38, 0x31, 0xCA,          // pmovzxbd XMM1,XMM2
 4492   0x66,       0x0F, 0x38, 0x31, 0x5D, 0xBC,    // pmovzxbd XMM3,DWORD PTR [RBP-0x44]
 4493   0x66,       0x0F, 0x38, 0x32, 0xCA,          // pmovzxbq XMM1,XMM2
 4494   0x66,       0x0F, 0x38, 0x32, 0x5D, 0xBA,    // pmovzxbq XMM3,WORD PTR [RBP-0x46]
 4495   0x66,       0x0F, 0x38, 0x33, 0xCA,          // pmovzxwd XMM1,XMM2
 4496   0x66,       0x0F, 0x38, 0x33, 0x5D, 0xC0,    // pmovzxwd XMM3,QWORD PTR [RBP-0x40]
 4497   0x66,       0x0F, 0x38, 0x34, 0xCA,          // pmovzxwq XMM1,XMM2
 4498   0x66,       0x0F, 0x38, 0x34, 0x5D, 0xBC,    // pmovzxwq XMM3,DWORD PTR [RBP-0x44]
 4499   0x66,       0x0F, 0x38, 0x35, 0xCA,          // pmovzxdq XMM1,XMM2
 4500   0x66,       0x0F, 0x38, 0x35, 0x5D, 0xC0,    // pmovzxdq XMM3,QWORD PTR [RBP-0x40]
 4501   0x66,       0x0F, 0x38, 0x28, 0xCA,          // pmuldq  XMM1,XMM2
 4502   0x66,       0x0F, 0x38, 0x28, 0x5D, 0xD0,    // pmuldq  XMM3,XMMWORD PTR [RBP-0x30]
 4503   0x66,       0x0F, 0x38, 0x40, 0xCA,          // pmulld  XMM1,XMM2
 4504   0x66,       0x0F, 0x38, 0x40, 0x5D, 0xD0,    // pmulld  XMM3,XMMWORD PTR [RBP-0x30]
 4505   0x66,       0x0F, 0x38, 0x17, 0xCA,          // ptest   XMM1,XMM2
 4506   0x66,       0x0F, 0x38, 0x17, 0x5D, 0xD0,    // ptest   XMM3,XMMWORD PTR [RBP-0x30]
 4507   0x66,       0x0F, 0x3A, 0x09, 0xCA,        3,// roundpd XMM1,XMM2,0x3
 4508   0x66,       0x0F, 0x3A, 0x09, 0x5D, 0xD0,  3,// roundpd XMM3,XMMWORD PTR [RBP-0x30],0x3
 4509   0x66,       0x0F, 0x3A, 0x08, 0xCA,        3,// roundps XMM1,XMM2,0x3
 4510   0x66,       0x0F, 0x3A, 0x08, 0x5D, 0xD0,  3,// roundps XMM3,XMMWORD PTR [RBP-0x30],0x3
 4511   0x66,       0x0F, 0x3A, 0x0B, 0xCA,