"Fossies" - the Fresh Open Source Software Archive

Member "AutoHotkey_L-1.1.33.09/source/lib_pcre/pcre/sljit/sljitNativeX86_common.c" (8 May 2021, 79831 Bytes) of package /windows/misc/AutoHotkey_L-1.1.33.09.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_common.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
   28 {
   29     return "x86" SLJIT_CPUINFO;
   30 }
   31 
   32 /*
   33    32b register indexes:
   34      0 - EAX
   35      1 - ECX
   36      2 - EDX
   37      3 - EBX
   38      4 - none
   39      5 - EBP
   40      6 - ESI
   41      7 - EDI
   42 */
   43 
   44 /*
   45    64b register indexes:
   46      0 - RAX
   47      1 - RCX
   48      2 - RDX
   49      3 - RBX
   50      4 - none
   51      5 - RBP
   52      6 - RSI
   53      7 - RDI
   54      8 - R8   - From now on REX prefix is required
   55      9 - R9
   56     10 - R10
   57     11 - R11
   58     12 - R12
   59     13 - R13
   60     14 - R14
   61     15 - R15
   62 */
   63 
   64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   65 
   66 /* Last register + 1. */
   67 #define TMP_REGISTER    (SLJIT_NO_REGISTERS + 1)
   68 
   69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
   70   0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
   71 };
   72 
   73 #define CHECK_EXTRA_REGS(p, w, do) \
   74     if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
   75         w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
   76         p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   77         do; \
   78     } \
   79     else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
   80         w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
   81         p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   82         do; \
   83     }
   84 
   85 #else /* SLJIT_CONFIG_X86_32 */
   86 
   87 /* Last register + 1. */
   88 #define TMP_REGISTER    (SLJIT_NO_REGISTERS + 1)
   89 #define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
   90 #define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
   91 
   92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
   93    Note: avoid to use r12 and r13 for memory addessing
   94    therefore r12 is better for SAVED_EREG than SAVED_REG. */
   95 #ifndef _WIN64
   96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
   97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
   98   0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
   99 };
  100 /* low-map. reg_map & 0x7. */
  101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  102   0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
  103 };
  104 #else
  105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
  106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
  107   0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
  108 };
  109 /* low-map. reg_map & 0x7. */
  110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  111   0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  4,  7, 2,  0, 1
  112 };
  113 #endif
  114 
  115 #define REX_W       0x48
  116 #define REX_R       0x44
  117 #define REX_X       0x42
  118 #define REX_B       0x41
  119 #define REX     0x40
  120 
  121 typedef unsigned int sljit_uhw;
  122 typedef int sljit_hw;
  123 
  124 #define IS_HALFWORD(x)      ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
  125 #define NOT_HALFWORD(x)     ((x) > 0x7fffffffll || (x) < -0x80000000ll)
  126 
  127 #define CHECK_EXTRA_REGS(p, w, do)
  128 
  129 #endif /* SLJIT_CONFIG_X86_32 */
  130 
  131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  132 #define TMP_FREG    (SLJIT_FLOAT_REG4 + 1)
  133 #endif
  134 
  135 /* Size flags for emit_x86_instruction: */
  136 #define EX86_BIN_INS        0x0010
  137 #define EX86_SHIFT_INS      0x0020
  138 #define EX86_REX        0x0040
  139 #define EX86_NO_REXW        0x0080
  140 #define EX86_BYTE_ARG       0x0100
  141 #define EX86_HALF_ARG       0x0200
  142 #define EX86_PREF_66        0x0400
  143 
  144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  145 #define EX86_PREF_F2        0x0800
  146 #define EX86_SSE2       0x1000
  147 #endif
  148 
  149 #define INC_SIZE(s)         (*buf++ = (s), compiler->size += (s))
  150 #define INC_CSIZE(s)            (*code++ = (s), compiler->size += (s))
  151 
  152 #define PUSH_REG(r)         (*buf++ = (0x50 + (r)))
  153 #define POP_REG(r)          (*buf++ = (0x58 + (r)))
  154 #define RET()               (*buf++ = (0xc3))
  155 #define RETN(n)             (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
  156 /* r32, r/m32 */
  157 #define MOV_RM(mod, reg, rm)        (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
  158 
  159 static sljit_ub get_jump_code(int type)
  160 {
  161     switch (type) {
  162     case SLJIT_C_EQUAL:
  163     case SLJIT_C_FLOAT_EQUAL:
  164         return 0x84;
  165 
  166     case SLJIT_C_NOT_EQUAL:
  167     case SLJIT_C_FLOAT_NOT_EQUAL:
  168         return 0x85;
  169 
  170     case SLJIT_C_LESS:
  171     case SLJIT_C_FLOAT_LESS:
  172         return 0x82;
  173 
  174     case SLJIT_C_GREATER_EQUAL:
  175     case SLJIT_C_FLOAT_GREATER_EQUAL:
  176         return 0x83;
  177 
  178     case SLJIT_C_GREATER:
  179     case SLJIT_C_FLOAT_GREATER:
  180         return 0x87;
  181 
  182     case SLJIT_C_LESS_EQUAL:
  183     case SLJIT_C_FLOAT_LESS_EQUAL:
  184         return 0x86;
  185 
  186     case SLJIT_C_SIG_LESS:
  187         return 0x8c;
  188 
  189     case SLJIT_C_SIG_GREATER_EQUAL:
  190         return 0x8d;
  191 
  192     case SLJIT_C_SIG_GREATER:
  193         return 0x8f;
  194 
  195     case SLJIT_C_SIG_LESS_EQUAL:
  196         return 0x8e;
  197 
  198     case SLJIT_C_OVERFLOW:
  199     case SLJIT_C_MUL_OVERFLOW:
  200         return 0x80;
  201 
  202     case SLJIT_C_NOT_OVERFLOW:
  203     case SLJIT_C_MUL_NOT_OVERFLOW:
  204         return 0x81;
  205 
  206     case SLJIT_C_FLOAT_NAN:
  207         return 0x8a;
  208 
  209     case SLJIT_C_FLOAT_NOT_NAN:
  210         return 0x8b;
  211     }
  212     return 0;
  213 }
  214 
  215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
  216 
  217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
  219 #endif
  220 
  221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
  222 {
  223     int short_jump;
  224     sljit_uw label_addr;
  225 
  226     if (jump->flags & JUMP_LABEL)
  227         label_addr = (sljit_uw)(code + jump->u.label->size);
  228     else
  229         label_addr = jump->u.target;
  230     short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
  231 
  232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  233     if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
  234         return generate_far_jump_code(jump, code_ptr, type);
  235 #endif
  236 
  237     if (type == SLJIT_JUMP) {
  238         if (short_jump)
  239             *code_ptr++ = 0xeb;
  240         else
  241             *code_ptr++ = 0xe9;
  242         jump->addr++;
  243     }
  244     else if (type >= SLJIT_FAST_CALL) {
  245         short_jump = 0;
  246         *code_ptr++ = 0xe8;
  247         jump->addr++;
  248     }
  249     else if (short_jump) {
  250         *code_ptr++ = get_jump_code(type) - 0x10;
  251         jump->addr++;
  252     }
  253     else {
  254         *code_ptr++ = 0x0f;
  255         *code_ptr++ = get_jump_code(type);
  256         jump->addr += 2;
  257     }
  258 
  259     if (short_jump) {
  260         jump->flags |= PATCH_MB;
  261         code_ptr += sizeof(sljit_b);
  262     } else {
  263         jump->flags |= PATCH_MW;
  264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  265         code_ptr += sizeof(sljit_w);
  266 #else
  267         code_ptr += sizeof(sljit_hw);
  268 #endif
  269     }
  270 
  271     return code_ptr;
  272 }
  273 
  274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  275 {
  276     struct sljit_memory_fragment *buf;
  277     sljit_ub *code;
  278     sljit_ub *code_ptr;
  279     sljit_ub *buf_ptr;
  280     sljit_ub *buf_end;
  281     sljit_ub len;
  282 
  283     struct sljit_label *label;
  284     struct sljit_jump *jump;
  285     struct sljit_const *const_;
  286 
  287     CHECK_ERROR_PTR();
  288     check_sljit_generate_code(compiler);
  289     reverse_buf(compiler);
  290 
  291     /* Second code generation pass. */
  292     code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
  293     PTR_FAIL_WITH_EXEC_IF(code);
  294     buf = compiler->buf;
  295 
  296     code_ptr = code;
  297     label = compiler->labels;
  298     jump = compiler->jumps;
  299     const_ = compiler->consts;
  300     do {
  301         buf_ptr = buf->memory;
  302         buf_end = buf_ptr + buf->used_size;
  303         do {
  304             len = *buf_ptr++;
  305             if (len > 0) {
  306                 /* The code is already generated. */
  307                 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
  308                 code_ptr += len;
  309                 buf_ptr += len;
  310             }
  311             else {
  312                 if (*buf_ptr >= 4) {
  313                     jump->addr = (sljit_uw)code_ptr;
  314                     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  315                         code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
  316                     else
  317                         code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
  318                     jump = jump->next;
  319                 }
  320                 else if (*buf_ptr == 0) {
  321                     label->addr = (sljit_uw)code_ptr;
  322                     label->size = code_ptr - code;
  323                     label = label->next;
  324                 }
  325                 else if (*buf_ptr == 1) {
  326                     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
  327                     const_ = const_->next;
  328                 }
  329                 else {
  330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  331                     *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
  332                     buf_ptr++;
  333                     *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
  334                     code_ptr += sizeof(sljit_w);
  335                     buf_ptr += sizeof(sljit_w) - 1;
  336 #else
  337                     code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
  338                     buf_ptr += sizeof(sljit_w);
  339 #endif
  340                 }
  341                 buf_ptr++;
  342             }
  343         } while (buf_ptr < buf_end);
  344         SLJIT_ASSERT(buf_ptr == buf_end);
  345         buf = buf->next;
  346     } while (buf);
  347 
  348     SLJIT_ASSERT(!label);
  349     SLJIT_ASSERT(!jump);
  350     SLJIT_ASSERT(!const_);
  351 
  352     jump = compiler->jumps;
  353     while (jump) {
  354         if (jump->flags & PATCH_MB) {
  355             SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
  356             *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
  357         } else if (jump->flags & PATCH_MW) {
  358             if (jump->flags & JUMP_LABEL) {
  359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  360                 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
  361 #else
  362                 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  363                 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
  364 #endif
  365             }
  366             else {
  367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  368                 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
  369 #else
  370                 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  371                 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
  372 #endif
  373             }
  374         }
  375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  376         else if (jump->flags & PATCH_MD)
  377             *(sljit_w*)jump->addr = jump->u.label->addr;
  378 #endif
  379 
  380         jump = jump->next;
  381     }
  382 
  383     /* Maybe we waste some space because of short jumps. */
  384     SLJIT_ASSERT(code_ptr <= code + compiler->size);
  385     compiler->error = SLJIT_ERR_COMPILED;
  386     compiler->executable_size = compiler->size;
  387     return (void*)code;
  388 }
  389 
  390 /* --------------------------------------------------------------------- */
  391 /*  Operators                                                            */
  392 /* --------------------------------------------------------------------- */
  393 
  394 static int emit_cum_binary(struct sljit_compiler *compiler,
  395     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  396     int dst, sljit_w dstw,
  397     int src1, sljit_w src1w,
  398     int src2, sljit_w src2w);
  399 
  400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
  401     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  402     int dst, sljit_w dstw,
  403     int src1, sljit_w src1w,
  404     int src2, sljit_w src2w);
  405 
  406 static int emit_mov(struct sljit_compiler *compiler,
  407     int dst, sljit_w dstw,
  408     int src, sljit_w srcw);
  409 
  410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
  411 {
  412     sljit_ub *buf;
  413 
  414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  415     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  416     FAIL_IF(!buf);
  417     INC_SIZE(5);
  418     *buf++ = 0x9c; /* pushfd */
  419 #else
  420     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  421     FAIL_IF(!buf);
  422     INC_SIZE(6);
  423     *buf++ = 0x9c; /* pushfq */
  424     *buf++ = 0x48;
  425 #endif
  426     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
  427     *buf++ = 0x64;
  428     *buf++ = 0x24;
  429     *buf++ = sizeof(sljit_w);
  430     compiler->flags_saved = 1;
  431     return SLJIT_SUCCESS;
  432 }
  433 
  434 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
  435 {
  436     sljit_ub *buf;
  437 
  438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  439     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  440     FAIL_IF(!buf);
  441     INC_SIZE(5);
  442 #else
  443     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  444     FAIL_IF(!buf);
  445     INC_SIZE(6);
  446     *buf++ = 0x48;
  447 #endif
  448     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
  449     *buf++ = 0x64;
  450     *buf++ = 0x24;
  451     *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
  452     *buf++ = 0x9d; /* popfd / popfq */
  453     compiler->flags_saved = keep_flags;
  454     return SLJIT_SUCCESS;
  455 }
  456 
  457 #ifdef _WIN32
  458 #include <malloc.h>
  459 
  460 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
  461 {
  462     /* Workaround for calling _chkstk. */
  463     alloca(local_size);
  464 }
  465 #endif
  466 
  467 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  468 #include "sljitNativeX86_32.c"
  469 #else
  470 #include "sljitNativeX86_64.c"
  471 #endif
  472 
  473 static int emit_mov(struct sljit_compiler *compiler,
  474     int dst, sljit_w dstw,
  475     int src, sljit_w srcw)
  476 {
  477     sljit_ub* code;
  478 
  479     if (dst == SLJIT_UNUSED) {
  480         /* No destination, doesn't need to setup flags. */
  481         if (src & SLJIT_MEM) {
  482             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  483             FAIL_IF(!code);
  484             *code = 0x8b;
  485         }
  486         return SLJIT_SUCCESS;
  487     }
  488     if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
  489         code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  490         FAIL_IF(!code);
  491         *code = 0x89;
  492         return SLJIT_SUCCESS;
  493     }
  494     if (src & SLJIT_IMM) {
  495         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  497             return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  498 #else
  499             if (!compiler->mode32) {
  500                 if (NOT_HALFWORD(srcw))
  501                     return emit_load_imm64(compiler, dst, srcw);
  502             }
  503             else
  504                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
  505 #endif
  506         }
  507 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  508         if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  509             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
  510             code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
  511             FAIL_IF(!code);
  512             *code = 0x89;
  513             return SLJIT_SUCCESS;
  514         }
  515 #endif
  516         code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  517         FAIL_IF(!code);
  518         *code = 0xc7;
  519         return SLJIT_SUCCESS;
  520     }
  521     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  522         code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  523         FAIL_IF(!code);
  524         *code = 0x8b;
  525         return SLJIT_SUCCESS;
  526     }
  527 
  528     /* Memory to memory move. Requires two instruction. */
  529     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  530     FAIL_IF(!code);
  531     *code = 0x8b;
  532     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
  533     FAIL_IF(!code);
  534     *code = 0x89;
  535     return SLJIT_SUCCESS;
  536 }
  537 
  538 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  539     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  540 
  541 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
  542 {
  543     sljit_ub *buf;
  544 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  545     int size;
  546 #endif
  547 
  548     CHECK_ERROR();
  549     check_sljit_emit_op0(compiler, op);
  550 
  551     switch (GET_OPCODE(op)) {
  552     case SLJIT_BREAKPOINT:
  553         buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  554         FAIL_IF(!buf);
  555         INC_SIZE(1);
  556         *buf = 0xcc;
  557         break;
  558     case SLJIT_NOP:
  559         buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  560         FAIL_IF(!buf);
  561         INC_SIZE(1);
  562         *buf = 0x90;
  563         break;
  564     case SLJIT_UMUL:
  565     case SLJIT_SMUL:
  566     case SLJIT_UDIV:
  567     case SLJIT_SDIV:
  568         compiler->flags_saved = 0;
  569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  570 #ifdef _WIN64
  571         SLJIT_COMPILE_ASSERT(
  572             reg_map[SLJIT_TEMPORARY_REG1] == 0
  573             && reg_map[SLJIT_TEMPORARY_REG2] == 2
  574             && reg_map[TMP_REGISTER] > 7,
  575             invalid_register_assignment_for_div_mul);
  576 #else
  577         SLJIT_COMPILE_ASSERT(
  578             reg_map[SLJIT_TEMPORARY_REG1] == 0
  579             && reg_map[SLJIT_TEMPORARY_REG2] < 7
  580             && reg_map[TMP_REGISTER] == 2,
  581             invalid_register_assignment_for_div_mul);
  582 #endif
  583         compiler->mode32 = op & SLJIT_INT_OP;
  584 #endif
  585 
  586         op = GET_OPCODE(op);
  587         if (op == SLJIT_UDIV) {
  588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  589             EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
  590             buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
  591 #else
  592             buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  593 #endif
  594             FAIL_IF(!buf);
  595             *buf = 0x33;
  596         }
  597 
  598         if (op == SLJIT_SDIV) {
  599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  600             EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
  601 #endif
  602 
  603             /* CDQ instruction */
  604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  605             buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  606             FAIL_IF(!buf);
  607             INC_SIZE(1);
  608             *buf = 0x99;
  609 #else
  610             if (compiler->mode32) {
  611                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  612                 FAIL_IF(!buf);
  613                 INC_SIZE(1);
  614                 *buf = 0x99;
  615             } else {
  616                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  617                 FAIL_IF(!buf);
  618                 INC_SIZE(2);
  619                 *buf++ = REX_W;
  620                 *buf = 0x99;
  621             }
  622 #endif
  623         }
  624 
  625 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  626         buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  627         FAIL_IF(!buf);
  628         INC_SIZE(2);
  629         *buf++ = 0xf7;
  630         *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
  631 #else
  632 #ifdef _WIN64
  633         size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
  634 #else
  635         size = (!compiler->mode32) ? 3 : 2;
  636 #endif
  637         buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  638         FAIL_IF(!buf);
  639         INC_SIZE(size);
  640 #ifdef _WIN64
  641         if (!compiler->mode32)
  642             *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
  643         else if (op >= SLJIT_UDIV)
  644             *buf++ = REX_B;
  645         *buf++ = 0xf7;
  646         *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
  647 #else
  648         if (!compiler->mode32)
  649             *buf++ = REX_W;
  650         *buf++ = 0xf7;
  651         *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
  652 #endif
  653 #endif
  654         switch (op) {
  655         case SLJIT_UMUL:
  656             *buf |= 4 << 3;
  657             break;
  658         case SLJIT_SMUL:
  659             *buf |= 5 << 3;
  660             break;
  661         case SLJIT_UDIV:
  662             *buf |= 6 << 3;
  663             break;
  664         case SLJIT_SDIV:
  665             *buf |= 7 << 3;
  666             break;
  667         }
  668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  669         EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
  670 #endif
  671         break;
  672     }
  673 
  674     return SLJIT_SUCCESS;
  675 }
  676 
  677 #define ENCODE_PREFIX(prefix) \
  678     do { \
  679         code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
  680         FAIL_IF(!code); \
  681         INC_CSIZE(1); \
  682         *code = (prefix); \
  683     } while (0)
  684 
  685 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
  686     int dst, sljit_w dstw,
  687     int src, sljit_w srcw)
  688 {
  689     sljit_ub* code;
  690     int dst_r;
  691 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  692     int work_r;
  693 #endif
  694 
  695 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  696     compiler->mode32 = 0;
  697 #endif
  698 
  699     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  700         return SLJIT_SUCCESS; /* Empty instruction. */
  701 
  702     if (src & SLJIT_IMM) {
  703         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  704 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  705             return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  706 #else
  707             return emit_load_imm64(compiler, dst, srcw);
  708 #endif
  709         }
  710         code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  711         FAIL_IF(!code);
  712         *code = 0xc6;
  713         return SLJIT_SUCCESS;
  714     }
  715 
  716     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  717 
  718     if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
  719 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  720         if (reg_map[src] >= 4) {
  721             SLJIT_ASSERT(dst_r == TMP_REGISTER);
  722             EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
  723         } else
  724             dst_r = src;
  725 #else
  726         dst_r = src;
  727 #endif
  728     }
  729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  730     else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
  731         /* src, dst are registers. */
  732         SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
  733         if (reg_map[dst] < 4) {
  734             if (dst != src)
  735                 EMIT_MOV(compiler, dst, 0, src, 0);
  736             code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  737             FAIL_IF(!code);
  738             *code++ = 0x0f;
  739             *code = sign ? 0xbe : 0xb6;
  740         }
  741         else {
  742             if (dst != src)
  743                 EMIT_MOV(compiler, dst, 0, src, 0);
  744             if (sign) {
  745                 /* shl reg, 24 */
  746                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  747                 FAIL_IF(!code);
  748                 *code |= 0x4 << 3;
  749                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  750                 FAIL_IF(!code);
  751                 /* shr/sar reg, 24 */
  752                 *code |= 0x7 << 3;
  753             }
  754             else {
  755                 /* and dst, 0xff */
  756                 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
  757                 FAIL_IF(!code);
  758                 *(code + 1) |= 0x4 << 3;
  759             }
  760         }
  761         return SLJIT_SUCCESS;
  762     }
  763 #endif
  764     else {
  765         /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  766         code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  767         FAIL_IF(!code);
  768         *code++ = 0x0f;
  769         *code = sign ? 0xbe : 0xb6;
  770     }
  771 
  772     if (dst & SLJIT_MEM) {
  773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  774         if (dst_r == TMP_REGISTER) {
  775             /* Find a non-used register, whose reg_map[src] < 4. */
  776             if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
  777                 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
  778                     work_r = SLJIT_TEMPORARY_REG3;
  779                 else
  780                     work_r = SLJIT_TEMPORARY_REG2;
  781             }
  782             else {
  783                 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
  784                     work_r = SLJIT_TEMPORARY_REG1;
  785                 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
  786                     work_r = SLJIT_TEMPORARY_REG3;
  787                 else
  788                     work_r = SLJIT_TEMPORARY_REG2;
  789             }
  790 
  791             if (work_r == SLJIT_TEMPORARY_REG1) {
  792                 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  793             }
  794             else {
  795                 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  796                 FAIL_IF(!code);
  797                 *code = 0x87;
  798             }
  799 
  800             code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  801             FAIL_IF(!code);
  802             *code = 0x88;
  803 
  804             if (work_r == SLJIT_TEMPORARY_REG1) {
  805                 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  806             }
  807             else {
  808                 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  809                 FAIL_IF(!code);
  810                 *code = 0x87;
  811             }
  812         }
  813         else {
  814             code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  815             FAIL_IF(!code);
  816             *code = 0x88;
  817         }
  818 #else
  819         code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  820         FAIL_IF(!code);
  821         *code = 0x88;
  822 #endif
  823     }
  824 
  825     return SLJIT_SUCCESS;
  826 }
  827 
  828 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
  829     int dst, sljit_w dstw,
  830     int src, sljit_w srcw)
  831 {
  832     sljit_ub* code;
  833     int dst_r;
  834 
  835 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  836     compiler->mode32 = 0;
  837 #endif
  838 
  839     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  840         return SLJIT_SUCCESS; /* Empty instruction. */
  841 
  842     if (src & SLJIT_IMM) {
  843         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  844 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  845             return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  846 #else
  847             return emit_load_imm64(compiler, dst, srcw);
  848 #endif
  849         }
  850         code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  851         FAIL_IF(!code);
  852         *code = 0xc7;
  853         return SLJIT_SUCCESS;
  854     }
  855 
  856     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  857 
  858     if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
  859         dst_r = src;
  860     else {
  861         code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  862         FAIL_IF(!code);
  863         *code++ = 0x0f;
  864         *code = sign ? 0xbf : 0xb7;
  865     }
  866 
  867     if (dst & SLJIT_MEM) {
  868         code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  869         FAIL_IF(!code);
  870         *code = 0x89;
  871     }
  872 
  873     return SLJIT_SUCCESS;
  874 }
  875 
  876 static int emit_unary(struct sljit_compiler *compiler, int un_index,
  877     int dst, sljit_w dstw,
  878     int src, sljit_w srcw)
  879 {
  880     sljit_ub* code;
  881 
  882     if (dst == SLJIT_UNUSED) {
  883         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  884         code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  885         FAIL_IF(!code);
  886         *code++ = 0xf7;
  887         *code |= (un_index) << 3;
  888         return SLJIT_SUCCESS;
  889     }
  890     if (dst == src && dstw == srcw) {
  891         /* Same input and output */
  892         code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  893         FAIL_IF(!code);
  894         *code++ = 0xf7;
  895         *code |= (un_index) << 3;
  896         return SLJIT_SUCCESS;
  897     }
  898     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  899         EMIT_MOV(compiler, dst, 0, src, srcw);
  900         code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  901         FAIL_IF(!code);
  902         *code++ = 0xf7;
  903         *code |= (un_index) << 3;
  904         return SLJIT_SUCCESS;
  905     }
  906     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  907     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  908     FAIL_IF(!code);
  909     *code++ = 0xf7;
  910     *code |= (un_index) << 3;
  911     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  912     return SLJIT_SUCCESS;
  913 }
  914 
  915 static int emit_not_with_flags(struct sljit_compiler *compiler,
  916     int dst, sljit_w dstw,
  917     int src, sljit_w srcw)
  918 {
  919     sljit_ub* code;
  920 
  921     if (dst == SLJIT_UNUSED) {
  922         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  923         code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  924         FAIL_IF(!code);
  925         *code++ = 0xf7;
  926         *code |= 0x2 << 3;
  927         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  928         FAIL_IF(!code);
  929         *code = 0x0b;
  930         return SLJIT_SUCCESS;
  931     }
  932     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  933         EMIT_MOV(compiler, dst, 0, src, srcw);
  934         code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  935         FAIL_IF(!code);
  936         *code++ = 0xf7;
  937         *code |= 0x2 << 3;
  938         code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  939         FAIL_IF(!code);
  940         *code = 0x0b;
  941         return SLJIT_SUCCESS;
  942     }
  943     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  944     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  945     FAIL_IF(!code);
  946     *code++ = 0xf7;
  947     *code |= 0x2 << 3;
  948     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  949     FAIL_IF(!code);
  950     *code = 0x0b;
  951     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  952     return SLJIT_SUCCESS;
  953 }
  954 
  955 static int emit_clz(struct sljit_compiler *compiler, int op,
  956     int dst, sljit_w dstw,
  957     int src, sljit_w srcw)
  958 {
  959     sljit_ub* code;
  960     int dst_r;
  961 
  962     SLJIT_UNUSED_ARG(op);
  963     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  964         /* Just set the zero flag. */
  965         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  966         code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  967         FAIL_IF(!code);
  968         *code++ = 0xf7;
  969         *code |= 0x2 << 3;
  970 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  971         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
  972 #else
  973         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
  974 #endif
  975         FAIL_IF(!code);
  976         *code |= 0x5 << 3;
  977         return SLJIT_SUCCESS;
  978     }
  979 
  980     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  981         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  982         src = TMP_REGISTER;
  983         srcw = 0;
  984     }
  985 
  986     code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
  987     FAIL_IF(!code);
  988     *code++ = 0x0f;
  989     *code = 0xbd;
  990 
  991 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  992     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
  993         dst_r = dst;
  994     else {
  995         /* Find an unused temporary register. */
  996         if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
  997             dst_r = SLJIT_TEMPORARY_REG1;
  998         else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
  999             dst_r = SLJIT_TEMPORARY_REG2;
 1000         else
 1001             dst_r = SLJIT_TEMPORARY_REG3;
 1002         EMIT_MOV(compiler, dst, dstw, dst_r, 0);
 1003     }
 1004     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
 1005 #else
 1006     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
 1007     compiler->mode32 = 0;
 1008     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
 1009     compiler->mode32 = op & SLJIT_INT_OP;
 1010 #endif
 1011 
 1012     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
 1013     FAIL_IF(!code);
 1014     *code++ = 0x0f;
 1015     *code = 0x45;
 1016 
 1017 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1018     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
 1019 #else
 1020     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
 1021 #endif
 1022     FAIL_IF(!code);
 1023     *(code + 1) |= 0x6 << 3;
 1024 
 1025 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1026     if (dst & SLJIT_MEM) {
 1027         code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
 1028         FAIL_IF(!code);
 1029         *code = 0x87;
 1030     }
 1031 #else
 1032     if (dst & SLJIT_MEM)
 1033         EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
 1034 #endif
 1035     return SLJIT_SUCCESS;
 1036 }
 1037 
 1038 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
 1039     int dst, sljit_w dstw,
 1040     int src, sljit_w srcw)
 1041 {
 1042     sljit_ub* code;
 1043     int update = 0;
 1044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1045     int dst_is_ereg = 0;
 1046     int src_is_ereg = 0;
 1047 #else
 1048     #define src_is_ereg 0
 1049 #endif
 1050 
 1051     CHECK_ERROR();
 1052     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
 1053 
 1054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1055     compiler->mode32 = op & SLJIT_INT_OP;
 1056 #endif
 1057     CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
 1058     CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
 1059 
 1060     if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
 1061         op = GET_OPCODE(op);
 1062 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1063         compiler->mode32 = 0;
 1064 #endif
 1065 
 1066         SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
 1067         if (op >= SLJIT_MOVU) {
 1068             update = 1;
 1069             op -= 7;
 1070         }
 1071 
 1072         if (src & SLJIT_IMM) {
 1073             switch (op) {
 1074             case SLJIT_MOV_UB:
 1075                 srcw = (unsigned char)srcw;
 1076                 break;
 1077             case SLJIT_MOV_SB:
 1078                 srcw = (signed char)srcw;
 1079                 break;
 1080             case SLJIT_MOV_UH:
 1081                 srcw = (unsigned short)srcw;
 1082                 break;
 1083             case SLJIT_MOV_SH:
 1084                 srcw = (signed short)srcw;
 1085                 break;
 1086 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1087             case SLJIT_MOV_UI:
 1088                 srcw = (unsigned int)srcw;
 1089                 break;
 1090             case SLJIT_MOV_SI:
 1091                 srcw = (signed int)srcw;
 1092                 break;
 1093 #endif
 1094             }
 1095 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1096             if (SLJIT_UNLIKELY(dst_is_ereg))
 1097                 return emit_mov(compiler, dst, dstw, src, srcw);
 1098 #endif
 1099         }
 1100 
 1101         if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
 1102             code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
 1103             FAIL_IF(!code);
 1104             *code = 0x8d;
 1105             src &= SLJIT_MEM | 0xf;
 1106             srcw = 0;
 1107         }
 1108 
 1109 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1110         if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
 1111             SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
 1112             dst = TMP_REGISTER;
 1113         }
 1114 #endif
 1115 
 1116         switch (op) {
 1117         case SLJIT_MOV:
 1118 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1119         case SLJIT_MOV_UI:
 1120         case SLJIT_MOV_SI:
 1121 #endif
 1122             FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 1123             break;
 1124         case SLJIT_MOV_UB:
 1125             FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
 1126             break;
 1127         case SLJIT_MOV_SB:
 1128             FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
 1129             break;
 1130         case SLJIT_MOV_UH:
 1131             FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
 1132             break;
 1133         case SLJIT_MOV_SH:
 1134             FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
 1135             break;
 1136 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1137         case SLJIT_MOV_UI:
 1138             FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
 1139             break;
 1140         case SLJIT_MOV_SI:
 1141             FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
 1142             break;
 1143 #endif
 1144         }
 1145 
 1146 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1147         if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
 1148             return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
 1149 #endif
 1150 
 1151         if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
 1152             code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
 1153             FAIL_IF(!code);
 1154             *code = 0x8d;
 1155         }
 1156         return SLJIT_SUCCESS;
 1157     }
 1158 
 1159     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1160         compiler->flags_saved = 0;
 1161 
 1162     switch (GET_OPCODE(op)) {
 1163     case SLJIT_NOT:
 1164         if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
 1165             return emit_not_with_flags(compiler, dst, dstw, src, srcw);
 1166         return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
 1167 
 1168     case SLJIT_NEG:
 1169         if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1170             FAIL_IF(emit_save_flags(compiler));
 1171         return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
 1172 
 1173     case SLJIT_CLZ:
 1174         if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1175             FAIL_IF(emit_save_flags(compiler));
 1176         return emit_clz(compiler, op, dst, dstw, src, srcw);
 1177     }
 1178 
 1179     return SLJIT_SUCCESS;
 1180 
 1181 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1182     #undef src_is_ereg
 1183 #endif
 1184 }
 1185 
 1186 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1187 
 1188 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1189     if (IS_HALFWORD(immw) || compiler->mode32) { \
 1190         code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1191         FAIL_IF(!code); \
 1192         *(code + 1) |= (_op_imm_); \
 1193     } \
 1194     else { \
 1195         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
 1196         code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
 1197         FAIL_IF(!code); \
 1198         *code = (_op_mr_); \
 1199     }
 1200 
 1201 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1202     FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
 1203 
 1204 #else
 1205 
 1206 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1207     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1208     FAIL_IF(!code); \
 1209     *(code + 1) |= (_op_imm_);
 1210 
 1211 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1212     FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
 1213 
 1214 #endif
 1215 
 1216 static int emit_cum_binary(struct sljit_compiler *compiler,
 1217     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1218     int dst, sljit_w dstw,
 1219     int src1, sljit_w src1w,
 1220     int src2, sljit_w src2w)
 1221 {
 1222     sljit_ub* code;
 1223 
 1224     if (dst == SLJIT_UNUSED) {
 1225         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1226         if (src2 & SLJIT_IMM) {
 1227             BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1228         }
 1229         else {
 1230             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1231             FAIL_IF(!code);
 1232             *code = op_rm;
 1233         }
 1234         return SLJIT_SUCCESS;
 1235     }
 1236 
 1237     if (dst == src1 && dstw == src1w) {
 1238         if (src2 & SLJIT_IMM) {
 1239 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1240             if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1241 #else
 1242             if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1243 #endif
 1244                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1245             }
 1246             else {
 1247                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1248             }
 1249         }
 1250         else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1251             code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1252             FAIL_IF(!code);
 1253             *code = op_rm;
 1254         }
 1255         else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
 1256             /* Special exception for sljit_emit_cond_value. */
 1257             code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1258             FAIL_IF(!code);
 1259             *code = op_mr;
 1260         }
 1261         else {
 1262             EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1263             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1264             FAIL_IF(!code);
 1265             *code = op_mr;
 1266         }
 1267         return SLJIT_SUCCESS;
 1268     }
 1269 
 1270     /* Only for cumulative operations. */
 1271     if (dst == src2 && dstw == src2w) {
 1272         if (src1 & SLJIT_IMM) {
 1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1274             if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1275 #else
 1276             if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
 1277 #endif
 1278                 BINARY_EAX_IMM(op_eax_imm, src1w);
 1279             }
 1280             else {
 1281                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
 1282             }
 1283         }
 1284         else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1285             code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
 1286             FAIL_IF(!code);
 1287             *code = op_rm;
 1288         }
 1289         else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1290             code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
 1291             FAIL_IF(!code);
 1292             *code = op_mr;
 1293         }
 1294         else {
 1295             EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1296             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1297             FAIL_IF(!code);
 1298             *code = op_mr;
 1299         }
 1300         return SLJIT_SUCCESS;
 1301     }
 1302 
 1303     /* General version. */
 1304     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1305         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1306         if (src2 & SLJIT_IMM) {
 1307             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1308         }
 1309         else {
 1310             code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1311             FAIL_IF(!code);
 1312             *code = op_rm;
 1313         }
 1314     }
 1315     else {
 1316         /* This version requires less memory writing. */
 1317         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1318         if (src2 & SLJIT_IMM) {
 1319             BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1320         }
 1321         else {
 1322             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1323             FAIL_IF(!code);
 1324             *code = op_rm;
 1325         }
 1326         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1327     }
 1328 
 1329     return SLJIT_SUCCESS;
 1330 }
 1331 
 1332 static int emit_non_cum_binary(struct sljit_compiler *compiler,
 1333     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1334     int dst, sljit_w dstw,
 1335     int src1, sljit_w src1w,
 1336     int src2, sljit_w src2w)
 1337 {
 1338     sljit_ub* code;
 1339 
 1340     if (dst == SLJIT_UNUSED) {
 1341         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1342         if (src2 & SLJIT_IMM) {
 1343             BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1344         }
 1345         else {
 1346             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1347             FAIL_IF(!code);
 1348             *code = op_rm;
 1349         }
 1350         return SLJIT_SUCCESS;
 1351     }
 1352 
 1353     if (dst == src1 && dstw == src1w) {
 1354         if (src2 & SLJIT_IMM) {
 1355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1356             if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1357 #else
 1358             if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1359 #endif
 1360                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1361             }
 1362             else {
 1363                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1364             }
 1365         }
 1366         else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1367             code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1368             FAIL_IF(!code);
 1369             *code = op_rm;
 1370         }
 1371         else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1372             code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1373             FAIL_IF(!code);
 1374             *code = op_mr;
 1375         }
 1376         else {
 1377             EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1378             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1379             FAIL_IF(!code);
 1380             *code = op_mr;
 1381         }
 1382         return SLJIT_SUCCESS;
 1383     }
 1384 
 1385     /* General version. */
 1386     if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
 1387         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1388         if (src2 & SLJIT_IMM) {
 1389             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1390         }
 1391         else {
 1392             code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1393             FAIL_IF(!code);
 1394             *code = op_rm;
 1395         }
 1396     }
 1397     else {
 1398         /* This version requires less memory writing. */
 1399         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1400         if (src2 & SLJIT_IMM) {
 1401             BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1402         }
 1403         else {
 1404             code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1405             FAIL_IF(!code);
 1406             *code = op_rm;
 1407         }
 1408         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1409     }
 1410 
 1411     return SLJIT_SUCCESS;
 1412 }
 1413 
 1414 static int emit_mul(struct sljit_compiler *compiler,
 1415     int dst, sljit_w dstw,
 1416     int src1, sljit_w src1w,
 1417     int src2, sljit_w src2w)
 1418 {
 1419     sljit_ub* code;
 1420     int dst_r;
 1421 
 1422     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1423 
 1424     /* Register destination. */
 1425     if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
 1426         code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1427         FAIL_IF(!code);
 1428         *code++ = 0x0f;
 1429         *code = 0xaf;
 1430     }
 1431     else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
 1432         code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
 1433         FAIL_IF(!code);
 1434         *code++ = 0x0f;
 1435         *code = 0xaf;
 1436     }
 1437     else if (src1 & SLJIT_IMM) {
 1438         if (src2 & SLJIT_IMM) {
 1439             EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
 1440             src2 = dst_r;
 1441             src2w = 0;
 1442         }
 1443 
 1444         if (src1w <= 127 && src1w >= -128) {
 1445             code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1446             FAIL_IF(!code);
 1447             *code = 0x6b;
 1448             code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1449             FAIL_IF(!code);
 1450             INC_CSIZE(1);
 1451             *code = (sljit_b)src1w;
 1452         }
 1453 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1454         else {
 1455             code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1456             FAIL_IF(!code);
 1457             *code = 0x69;
 1458             code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1459             FAIL_IF(!code);
 1460             INC_CSIZE(4);
 1461             *(sljit_w*)code = src1w;
 1462         }
 1463 #else
 1464         else if (IS_HALFWORD(src1w)) {
 1465             code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1466             FAIL_IF(!code);
 1467             *code = 0x69;
 1468             code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1469             FAIL_IF(!code);
 1470             INC_CSIZE(4);
 1471             *(sljit_hw*)code = (sljit_hw)src1w;
 1472         }
 1473         else {
 1474             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1475             if (dst_r != src2)
 1476                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
 1477             code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1478             FAIL_IF(!code);
 1479             *code++ = 0x0f;
 1480             *code = 0xaf;
 1481         }
 1482 #endif
 1483     }
 1484     else if (src2 & SLJIT_IMM) {
 1485         /* Note: src1 is NOT immediate. */
 1486 
 1487         if (src2w <= 127 && src2w >= -128) {
 1488             code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1489             FAIL_IF(!code);
 1490             *code = 0x6b;
 1491             code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1492             FAIL_IF(!code);
 1493             INC_CSIZE(1);
 1494             *code = (sljit_b)src2w;
 1495         }
 1496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1497         else {
 1498             code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1499             FAIL_IF(!code);
 1500             *code = 0x69;
 1501             code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1502             FAIL_IF(!code);
 1503             INC_CSIZE(4);
 1504             *(sljit_w*)code = src2w;
 1505         }
 1506 #else
 1507         else if (IS_HALFWORD(src2w)) {
 1508             code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1509             FAIL_IF(!code);
 1510             *code = 0x69;
 1511             code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1512             FAIL_IF(!code);
 1513             INC_CSIZE(4);
 1514             *(sljit_hw*)code = (sljit_hw)src2w;
 1515         }
 1516         else {
 1517             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1518             if (dst_r != src1)
 1519                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1520             code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1521             FAIL_IF(!code);
 1522             *code++ = 0x0f;
 1523             *code = 0xaf;
 1524         }
 1525 #endif
 1526     }
 1527     else {
 1528         /* Neither argument is immediate. */
 1529         if (ADDRESSING_DEPENDS_ON(src2, dst_r))
 1530             dst_r = TMP_REGISTER;
 1531         EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1532         code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1533         FAIL_IF(!code);
 1534         *code++ = 0x0f;
 1535         *code = 0xaf;
 1536     }
 1537 
 1538     if (dst_r == TMP_REGISTER)
 1539         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1540 
 1541     return SLJIT_SUCCESS;
 1542 }
 1543 
 1544 static int emit_lea_binary(struct sljit_compiler *compiler,
 1545     int dst, sljit_w dstw,
 1546     int src1, sljit_w src1w,
 1547     int src2, sljit_w src2w)
 1548 {
 1549     sljit_ub* code;
 1550     int dst_r, done = 0;
 1551 
 1552     /* These cases better be left to handled by normal way. */
 1553     if (dst == src1 && dstw == src1w)
 1554         return SLJIT_ERR_UNSUPPORTED;
 1555     if (dst == src2 && dstw == src2w)
 1556         return SLJIT_ERR_UNSUPPORTED;
 1557 
 1558     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1559 
 1560     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1561         if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1562             /* It is not possible to be both SLJIT_LOCALS_REG. */
 1563             if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
 1564                 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
 1565                 FAIL_IF(!code);
 1566                 *code = 0x8d;
 1567                 done = 1;
 1568             }
 1569         }
 1570 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1571         if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1572             code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
 1573 #else
 1574         if (src2 & SLJIT_IMM) {
 1575             code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
 1576 #endif
 1577             FAIL_IF(!code);
 1578             *code = 0x8d;
 1579             done = 1;
 1580         }
 1581     }
 1582     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1583 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1584         if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1585             code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
 1586 #else
 1587         if (src1 & SLJIT_IMM) {
 1588             code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
 1589 #endif
 1590             FAIL_IF(!code);
 1591             *code = 0x8d;
 1592             done = 1;
 1593         }
 1594     }
 1595 
 1596     if (done) {
 1597         if (dst_r == TMP_REGISTER)
 1598             return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
 1599         return SLJIT_SUCCESS;
 1600     }
 1601     return SLJIT_ERR_UNSUPPORTED;
 1602 }
 1603 
 1604 static int emit_cmp_binary(struct sljit_compiler *compiler,
 1605     int src1, sljit_w src1w,
 1606     int src2, sljit_w src2w)
 1607 {
 1608     sljit_ub* code;
 1609 
 1610 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1611     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1612 #else
 1613     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1614 #endif
 1615         BINARY_EAX_IMM(0x3d, src2w);
 1616         return SLJIT_SUCCESS;
 1617     }
 1618 
 1619     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1620         if (src2 & SLJIT_IMM) {
 1621             BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
 1622         }
 1623         else {
 1624             code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1625             FAIL_IF(!code);
 1626             *code = 0x3b;
 1627         }
 1628         return SLJIT_SUCCESS;
 1629     }
 1630 
 1631     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
 1632         code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1633         FAIL_IF(!code);
 1634         *code = 0x39;
 1635         return SLJIT_SUCCESS;
 1636     }
 1637 
 1638     if (src2 & SLJIT_IMM) {
 1639         if (src1 & SLJIT_IMM) {
 1640             EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1641             src1 = TMP_REGISTER;
 1642             src1w = 0;
 1643         }
 1644         BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
 1645     }
 1646     else {
 1647         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1648         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1649         FAIL_IF(!code);
 1650         *code = 0x3b;
 1651     }
 1652     return SLJIT_SUCCESS;
 1653 }
 1654 
 1655 static int emit_test_binary(struct sljit_compiler *compiler,
 1656     int src1, sljit_w src1w,
 1657     int src2, sljit_w src2w)
 1658 {
 1659     sljit_ub* code;
 1660 
 1661 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1662     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1663 #else
 1664     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1665 #endif
 1666         BINARY_EAX_IMM(0xa9, src2w);
 1667         return SLJIT_SUCCESS;
 1668     }
 1669 
 1670 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1671     if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1672 #else
 1673     if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 1674 #endif
 1675         BINARY_EAX_IMM(0xa9, src1w);
 1676         return SLJIT_SUCCESS;
 1677     }
 1678 
 1679     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1680         if (src2 & SLJIT_IMM) {
 1681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1682             if (IS_HALFWORD(src2w) || compiler->mode32) {
 1683                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1684                 FAIL_IF(!code);
 1685                 *code = 0xf7;
 1686             }
 1687             else {
 1688                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1689                 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
 1690                 FAIL_IF(!code);
 1691                 *code = 0x85;
 1692             }
 1693 #else
 1694             code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1695             FAIL_IF(!code);
 1696             *code = 0xf7;
 1697 #endif
 1698         }
 1699         else {
 1700             code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1701             FAIL_IF(!code);
 1702             *code = 0x85;
 1703         }
 1704         return SLJIT_SUCCESS;
 1705     }
 1706 
 1707     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1708         if (src1 & SLJIT_IMM) {
 1709 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1710             if (IS_HALFWORD(src1w) || compiler->mode32) {
 1711                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
 1712                 FAIL_IF(!code);
 1713                 *code = 0xf7;
 1714             }
 1715             else {
 1716                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
 1717                 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
 1718                 FAIL_IF(!code);
 1719                 *code = 0x85;
 1720             }
 1721 #else
 1722             code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
 1723             FAIL_IF(!code);
 1724             *code = 0xf7;
 1725 #endif
 1726         }
 1727         else {
 1728             code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1729             FAIL_IF(!code);
 1730             *code = 0x85;
 1731         }
 1732         return SLJIT_SUCCESS;
 1733     }
 1734 
 1735     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1736     if (src2 & SLJIT_IMM) {
 1737 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1738         if (IS_HALFWORD(src2w) || compiler->mode32) {
 1739             code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1740             FAIL_IF(!code);
 1741             *code = 0xf7;
 1742         }
 1743         else {
 1744             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1745             code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
 1746             FAIL_IF(!code);
 1747             *code = 0x85;
 1748         }
 1749 #else
 1750         code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1751         FAIL_IF(!code);
 1752         *code = 0xf7;
 1753 #endif
 1754     }
 1755     else {
 1756         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1757         FAIL_IF(!code);
 1758         *code = 0x85;
 1759     }
 1760     return SLJIT_SUCCESS;
 1761 }
 1762 
 1763 static int emit_shift(struct sljit_compiler *compiler,
 1764     sljit_ub mode,
 1765     int dst, sljit_w dstw,
 1766     int src1, sljit_w src1w,
 1767     int src2, sljit_w src2w)
 1768 {
 1769     sljit_ub* code;
 1770 
 1771     if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 1772         if (dst == src1 && dstw == src1w) {
 1773             code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
 1774             FAIL_IF(!code);
 1775             *code |= mode;
 1776             return SLJIT_SUCCESS;
 1777         }
 1778         if (dst == SLJIT_UNUSED) {
 1779             EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1780             code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1781             FAIL_IF(!code);
 1782             *code |= mode;
 1783             return SLJIT_SUCCESS;
 1784         }
 1785         if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
 1786             EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1787             code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1788             FAIL_IF(!code);
 1789             *code |= mode;
 1790             EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1791             return SLJIT_SUCCESS;
 1792         }
 1793         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1794             EMIT_MOV(compiler, dst, 0, src1, src1w);
 1795             code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
 1796             FAIL_IF(!code);
 1797             *code |= mode;
 1798             return SLJIT_SUCCESS;
 1799         }
 1800 
 1801         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1802         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1803         FAIL_IF(!code);
 1804         *code |= mode;
 1805         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1806         return SLJIT_SUCCESS;
 1807     }
 1808 
 1809     if (dst == SLJIT_PREF_SHIFT_REG) {
 1810         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1811         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1812         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1813         FAIL_IF(!code);
 1814         *code |= mode;
 1815         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1816     }
 1817     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 1818         if (src1 != dst)
 1819             EMIT_MOV(compiler, dst, 0, src1, src1w);
 1820         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
 1821         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1822         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 1823         FAIL_IF(!code);
 1824         *code |= mode;
 1825         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1826     }
 1827     else {
 1828         /* This case is really difficult, since ecx itself may used for
 1829            addressing, and we must ensure to work even in that case. */
 1830         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1831 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1832         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 1833 #else
 1834         /* [esp - 4] is reserved for eflags. */
 1835         EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
 1836 #endif
 1837         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1838         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1839         FAIL_IF(!code);
 1840         *code |= mode;
 1841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1842         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 1843 #else
 1844         /* [esp - 4] is reserved for eflags. */
 1845         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
 1846 #endif
 1847         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1848     }
 1849 
 1850     return SLJIT_SUCCESS;
 1851 }
 1852 
 1853 static int emit_shift_with_flags(struct sljit_compiler *compiler,
 1854     sljit_ub mode, int set_flags,
 1855     int dst, sljit_w dstw,
 1856     int src1, sljit_w src1w,
 1857     int src2, sljit_w src2w)
 1858 {
 1859     /* The CPU does not set flags if the shift count is 0. */
 1860     if (src2 & SLJIT_IMM) {
 1861 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1862         if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
 1863             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1864 #else
 1865         if ((src2w & 0x1f) != 0)
 1866             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1867 #endif
 1868         if (!set_flags)
 1869             return emit_mov(compiler, dst, dstw, src1, src1w);
 1870         /* OR dst, src, 0 */
 1871         return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
 1872             dst, dstw, src1, src1w, SLJIT_IMM, 0);
 1873     }
 1874 
 1875     if (!set_flags)
 1876         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1877 
 1878     if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
 1879         FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
 1880 
 1881     FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
 1882 
 1883     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
 1884         return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
 1885     return SLJIT_SUCCESS;
 1886 }
 1887 
 1888 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
 1889     int dst, sljit_w dstw,
 1890     int src1, sljit_w src1w,
 1891     int src2, sljit_w src2w)
 1892 {
 1893     CHECK_ERROR();
 1894     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 1895 
 1896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1897     compiler->mode32 = op & SLJIT_INT_OP;
 1898 #endif
 1899     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 1900     CHECK_EXTRA_REGS(src1, src1w, (void)0);
 1901     CHECK_EXTRA_REGS(src2, src2w, (void)0);
 1902 
 1903     if (GET_OPCODE(op) >= SLJIT_MUL) {
 1904         if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1905             compiler->flags_saved = 0;
 1906         else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1907             FAIL_IF(emit_save_flags(compiler));
 1908     }
 1909 
 1910     switch (GET_OPCODE(op)) {
 1911     case SLJIT_ADD:
 1912         if (!GET_FLAGS(op)) {
 1913             if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
 1914                 return compiler->error;
 1915         } 
 1916         else
 1917             compiler->flags_saved = 0;
 1918         if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1919             FAIL_IF(emit_save_flags(compiler));
 1920         return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
 1921             dst, dstw, src1, src1w, src2, src2w);
 1922     case SLJIT_ADDC:
 1923         if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1924             FAIL_IF(emit_restore_flags(compiler, 1));
 1925         else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1926             FAIL_IF(emit_save_flags(compiler));
 1927         if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1928             compiler->flags_saved = 0;
 1929         return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
 1930             dst, dstw, src1, src1w, src2, src2w);
 1931     case SLJIT_SUB:
 1932         if (!GET_FLAGS(op)) {
 1933             if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
 1934                 return compiler->error;
 1935         }
 1936         else
 1937             compiler->flags_saved = 0;
 1938         if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1939             FAIL_IF(emit_save_flags(compiler));
 1940         if (dst == SLJIT_UNUSED)
 1941             return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
 1942         return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
 1943             dst, dstw, src1, src1w, src2, src2w);
 1944     case SLJIT_SUBC:
 1945         if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1946             FAIL_IF(emit_restore_flags(compiler, 1));
 1947         else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1948             FAIL_IF(emit_save_flags(compiler));
 1949         if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1950             compiler->flags_saved = 0;
 1951         return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
 1952             dst, dstw, src1, src1w, src2, src2w);
 1953     case SLJIT_MUL:
 1954         return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
 1955     case SLJIT_AND:
 1956         if (dst == SLJIT_UNUSED)
 1957             return emit_test_binary(compiler, src1, src1w, src2, src2w);
 1958         return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
 1959             dst, dstw, src1, src1w, src2, src2w);
 1960     case SLJIT_OR:
 1961         return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
 1962             dst, dstw, src1, src1w, src2, src2w);
 1963     case SLJIT_XOR:
 1964         return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
 1965             dst, dstw, src1, src1w, src2, src2w);
 1966     case SLJIT_SHL:
 1967         return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
 1968             dst, dstw, src1, src1w, src2, src2w);
 1969     case SLJIT_LSHR:
 1970         return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
 1971             dst, dstw, src1, src1w, src2, src2w);
 1972     case SLJIT_ASHR:
 1973         return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
 1974             dst, dstw, src1, src1w, src2, src2w);
 1975     }
 1976 
 1977     return SLJIT_SUCCESS;
 1978 }
 1979 
 1980 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
 1981 {
 1982     check_sljit_get_register_index(reg);
 1983 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1984     if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
 1985             || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
 1986         return -1;
 1987 #endif
 1988     return reg_map[reg];
 1989 }
 1990 
 1991 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
 1992     void *instruction, int size)
 1993 {
 1994     sljit_ub *buf;
 1995 
 1996     CHECK_ERROR();
 1997     check_sljit_emit_op_custom(compiler, instruction, size);
 1998     SLJIT_ASSERT(size > 0 && size < 16);
 1999 
 2000     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
 2001     FAIL_IF(!buf);
 2002     INC_SIZE(size);
 2003     SLJIT_MEMMOVE(buf, instruction, size);
 2004     return SLJIT_SUCCESS;
 2005 }
 2006 
 2007 /* --------------------------------------------------------------------- */
 2008 /*  Floating point operators                                             */
 2009 /* --------------------------------------------------------------------- */
 2010 
 2011 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2012 static int sse2_available = 0;
 2013 #endif
 2014 
 2015 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 2016 
 2017 /* Alignment + 2 * 16 bytes. */
 2018 static sljit_i sse2_data[3 + 4 + 4];
 2019 static sljit_i *sse2_buffer;
 2020 
 2021 static void init_compiler()
 2022 {
 2023 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2024     int features = 0;
 2025 #endif
 2026 
 2027     sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
 2028     sse2_buffer[0] = 0;
 2029     sse2_buffer[1] = 0x80000000;
 2030     sse2_buffer[4] = 0xffffffff;
 2031     sse2_buffer[5] = 0x7fffffff;
 2032 
 2033 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2034 #ifdef __GNUC__
 2035     /* AT&T syntax. */
 2036     asm (
 2037         "pushl %%ebx\n"
 2038         "movl $0x1, %%eax\n"
 2039         "cpuid\n"
 2040         "popl %%ebx\n"
 2041         "movl %%edx, %0\n"
 2042         : "=g" (features)
 2043         :
 2044         : "%eax", "%ecx", "%edx"
 2045     );
 2046 #elif defined(_MSC_VER) || defined(__BORLANDC__)
 2047     /* Intel syntax. */
 2048     __asm {
 2049         mov eax, 1
 2050         push ebx
 2051         cpuid
 2052         pop ebx
 2053         mov features, edx
 2054     }
 2055 #else
 2056     #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
 2057 #endif
 2058     sse2_available = (features >> 26) & 0x1;
 2059 #endif
 2060 }
 2061 
 2062 #endif
 2063 
 2064 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
 2065 {
 2066     /* Always available. */
 2067     return 1;
 2068 }
 2069 
 2070 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 2071 
 2072 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
 2073     int xmm1, int xmm2, sljit_w xmm2w)
 2074 {
 2075     sljit_ub *buf;
 2076 
 2077     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2078     FAIL_IF(!buf);
 2079     *buf++ = 0x0f;
 2080     *buf = opcode;
 2081     return SLJIT_SUCCESS;
 2082 }
 2083 
 2084 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
 2085     int xmm1, int xmm2, sljit_w xmm2w)
 2086 {
 2087     sljit_ub *buf;
 2088 
 2089     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2090     FAIL_IF(!buf);
 2091     *buf++ = 0x0f;
 2092     *buf = opcode;
 2093     return SLJIT_SUCCESS;
 2094 }
 2095 
 2096 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
 2097     int dst, int src, sljit_w srcw)
 2098 {
 2099     return emit_sse2(compiler, 0x10, dst, src, srcw);
 2100 }
 2101 
 2102 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
 2103     int dst, sljit_w dstw, int src)
 2104 {
 2105     return emit_sse2(compiler, 0x11, src, dst, dstw);
 2106 }
 2107 
 2108 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2109 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2110 #else
 2111 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
 2112 #endif
 2113     int dst, sljit_w dstw,
 2114     int src, sljit_w srcw)
 2115 {
 2116     int dst_r;
 2117 
 2118     CHECK_ERROR();
 2119     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 2120 
 2121 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2122     compiler->mode32 = 1;
 2123 #endif
 2124 
 2125     if (GET_OPCODE(op) == SLJIT_FCMP) {
 2126         compiler->flags_saved = 0;
 2127         if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 2128             dst_r = dst;
 2129         else {
 2130             dst_r = TMP_FREG;
 2131             FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
 2132         }
 2133         return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
 2134     }
 2135 
 2136     if (op == SLJIT_FMOV) {
 2137         if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 2138             return emit_sse2_load(compiler, dst, src, srcw);
 2139         if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
 2140             return emit_sse2_store(compiler, dst, dstw, src);
 2141         FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
 2142         return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2143     }
 2144 
 2145     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 2146         dst_r = dst;
 2147         if (dst != src)
 2148             FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 2149     }
 2150     else {
 2151         dst_r = TMP_FREG;
 2152         FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 2153     }
 2154 
 2155     switch (op) {
 2156     case SLJIT_FNEG:
 2157         FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
 2158         break;
 2159 
 2160     case SLJIT_FABS:
 2161         FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
 2162         break;
 2163     }
 2164 
 2165     if (dst_r == TMP_FREG)
 2166         return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2167     return SLJIT_SUCCESS;
 2168 }
 2169 
 2170 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2171 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2172 #else
 2173 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
 2174 #endif
 2175     int dst, sljit_w dstw,
 2176     int src1, sljit_w src1w,
 2177     int src2, sljit_w src2w)
 2178 {
 2179     int dst_r;
 2180 
 2181     CHECK_ERROR();
 2182     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2183 
 2184 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2185     compiler->mode32 = 1;
 2186 #endif
 2187 
 2188     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 2189         dst_r = dst;
 2190         if (dst == src1)
 2191             ; /* Do nothing here. */
 2192         else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
 2193             /* Swap arguments. */
 2194             src2 = src1;
 2195             src2w = src1w;
 2196         }
 2197         else if (dst != src2)
 2198             FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
 2199         else {
 2200             dst_r = TMP_FREG;
 2201             FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2202         }
 2203     }
 2204     else {
 2205         dst_r = TMP_FREG;
 2206         FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2207     }
 2208 
 2209     switch (op) {
 2210     case SLJIT_FADD:
 2211         FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
 2212         break;
 2213 
 2214     case SLJIT_FSUB:
 2215         FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
 2216         break;
 2217 
 2218     case SLJIT_FMUL:
 2219         FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
 2220         break;
 2221 
 2222     case SLJIT_FDIV:
 2223         FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
 2224         break;
 2225     }
 2226 
 2227     if (dst_r == TMP_FREG)
 2228         return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2229     return SLJIT_SUCCESS;
 2230 }
 2231 
 2232 #endif
 2233 
 2234 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
 2235 
 2236 static int emit_fld(struct sljit_compiler *compiler,
 2237     int src, sljit_w srcw)
 2238 {
 2239     sljit_ub *buf;
 2240 
 2241     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2242         buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2243         FAIL_IF(!buf);
 2244         INC_SIZE(2);
 2245         *buf++ = 0xd9;
 2246         *buf = 0xc0 + src - 1;
 2247         return SLJIT_SUCCESS;
 2248     }
 2249 
 2250     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2251     FAIL_IF(!buf);
 2252     *buf = 0xdd;
 2253     return SLJIT_SUCCESS;
 2254 }
 2255 
 2256 static int emit_fop(struct sljit_compiler *compiler,
 2257     sljit_ub st_arg, sljit_ub st_arg2,
 2258     sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
 2259     int src, sljit_w srcw)
 2260 {
 2261     sljit_ub *buf;
 2262 
 2263     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2264         buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2265         FAIL_IF(!buf);
 2266         INC_SIZE(2);
 2267         *buf++ = st_arg;
 2268         *buf = st_arg2 + src;
 2269         return SLJIT_SUCCESS;
 2270     }
 2271 
 2272     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2273     FAIL_IF(!buf);
 2274     *buf++ = m64fp_arg;
 2275     *buf |= m64fp_arg2;
 2276     return SLJIT_SUCCESS;
 2277 }
 2278 
 2279 static int emit_fop_regs(struct sljit_compiler *compiler,
 2280     sljit_ub st_arg, sljit_ub st_arg2,
 2281     int src)
 2282 {
 2283     sljit_ub *buf;
 2284 
 2285     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2286     FAIL_IF(!buf);
 2287     INC_SIZE(2);
 2288     *buf++ = st_arg;
 2289     *buf = st_arg2 + src;
 2290     return SLJIT_SUCCESS;
 2291 }
 2292 
 2293 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2294 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2295 #else
 2296 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
 2297 #endif
 2298     int dst, sljit_w dstw,
 2299     int src, sljit_w srcw)
 2300 {
 2301 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2302     sljit_ub *buf;
 2303 #endif
 2304 
 2305     CHECK_ERROR();
 2306     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 2307 
 2308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2309     compiler->mode32 = 1;
 2310 #endif
 2311 
 2312     if (GET_OPCODE(op) == SLJIT_FCMP) {
 2313         compiler->flags_saved = 0;
 2314 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2315         FAIL_IF(emit_fld(compiler, dst, dstw));
 2316         FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
 2317 
 2318         /* Copy flags. */
 2319         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2320         buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
 2321         FAIL_IF(!buf);
 2322         INC_SIZE(3);
 2323         *buf++ = 0xdf;
 2324         *buf++ = 0xe0;
 2325         /* Note: lahf is not supported on all x86-64 architectures. */
 2326         *buf++ = 0x9e;
 2327         EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
 2328 #else
 2329         if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2330             FAIL_IF(emit_fld(compiler, dst, dstw));
 2331             FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
 2332         } else {
 2333             FAIL_IF(emit_fld(compiler, src, srcw));
 2334             FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
 2335             FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
 2336             FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
 2337         }
 2338 #endif
 2339         return SLJIT_SUCCESS;
 2340     }
 2341 
 2342     FAIL_IF(emit_fld(compiler, src, srcw));
 2343 
 2344     switch (op) {
 2345     case SLJIT_FNEG:
 2346         FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
 2347         break;
 2348     case SLJIT_FABS:
 2349         FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
 2350         break;
 2351     }
 2352 
 2353     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
 2354 
 2355     return SLJIT_SUCCESS;
 2356 }
 2357 
 2358 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2359 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2360 #else
 2361 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
 2362 #endif
 2363     int dst, sljit_w dstw,
 2364     int src1, sljit_w src1w,
 2365     int src2, sljit_w src2w)
 2366 {
 2367     CHECK_ERROR();
 2368     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2369 
 2370 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2371     compiler->mode32 = 1;
 2372 #endif
 2373 
 2374     if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
 2375         FAIL_IF(emit_fld(compiler, src2, src2w));
 2376 
 2377         switch (op) {
 2378         case SLJIT_FADD:
 2379             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
 2380             break;
 2381         case SLJIT_FSUB:
 2382             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
 2383             break;
 2384         case SLJIT_FMUL:
 2385             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
 2386             break;
 2387         case SLJIT_FDIV:
 2388             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
 2389             break;
 2390         }
 2391         return SLJIT_SUCCESS;
 2392     }
 2393 
 2394     FAIL_IF(emit_fld(compiler, src1, src1w));
 2395 
 2396     if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
 2397         switch (op) {
 2398         case SLJIT_FADD:
 2399             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
 2400             break;
 2401         case SLJIT_FSUB:
 2402             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
 2403             break;
 2404         case SLJIT_FMUL:
 2405             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
 2406             break;
 2407         case SLJIT_FDIV:
 2408             FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
 2409             break;
 2410         }
 2411         return SLJIT_SUCCESS;
 2412     }
 2413 
 2414     switch (op) {
 2415     case SLJIT_FADD:
 2416         FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
 2417         break;
 2418     case SLJIT_FSUB:
 2419         FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
 2420         break;
 2421     case SLJIT_FMUL:
 2422         FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
 2423         break;
 2424     case SLJIT_FDIV:
 2425         FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
 2426         break;
 2427     }
 2428 
 2429     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
 2430 
 2431     return SLJIT_SUCCESS;
 2432 }
 2433 #endif
 2434 
 2435 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2436 
 2437 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2438     int dst, sljit_w dstw,
 2439     int src, sljit_w srcw)
 2440 {
 2441     if (sse2_available)
 2442         return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
 2443     else
 2444         return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
 2445 }
 2446 
 2447 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2448     int dst, sljit_w dstw,
 2449     int src1, sljit_w src1w,
 2450     int src2, sljit_w src2w)
 2451 {
 2452     if (sse2_available)
 2453         return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2454     else
 2455         return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2456 }
 2457 
 2458 #endif
 2459 
 2460 /* --------------------------------------------------------------------- */
 2461 /*  Conditional instructions                                             */
 2462 /* --------------------------------------------------------------------- */
 2463 
 2464 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2465 {
 2466     sljit_ub *buf;
 2467     struct sljit_label *label;
 2468 
 2469     CHECK_ERROR_PTR();
 2470     check_sljit_emit_label(compiler);
 2471 
 2472     /* We should restore the flags before the label,
 2473        since other taken jumps has their own flags as well. */
 2474     if (SLJIT_UNLIKELY(compiler->flags_saved))
 2475         PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2476 
 2477     if (compiler->last_label && compiler->last_label->size == compiler->size)
 2478         return compiler->last_label;
 2479 
 2480     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2481     PTR_FAIL_IF(!label);
 2482     set_label(label, compiler);
 2483 
 2484     buf = (sljit_ub*)ensure_buf(compiler, 2);
 2485     PTR_FAIL_IF(!buf);
 2486 
 2487     *buf++ = 0;
 2488     *buf++ = 0;
 2489 
 2490     return label;
 2491 }
 2492 
 2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
 2494 {
 2495     sljit_ub *buf;
 2496     struct sljit_jump *jump;
 2497 
 2498     CHECK_ERROR_PTR();
 2499     check_sljit_emit_jump(compiler, type);
 2500 
 2501     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2502         if ((type & 0xff) <= SLJIT_JUMP)
 2503             PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2504         compiler->flags_saved = 0;
 2505     }
 2506 
 2507     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2508     PTR_FAIL_IF_NULL(jump);
 2509     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2510     type &= 0xff;
 2511 
 2512     if (type >= SLJIT_CALL1)
 2513         PTR_FAIL_IF(call_with_args(compiler, type));
 2514 
 2515     /* Worst case size. */
 2516 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2517     compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
 2518 #else
 2519     compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
 2520 #endif
 2521 
 2522     buf = (sljit_ub*)ensure_buf(compiler, 2);
 2523     PTR_FAIL_IF_NULL(buf);
 2524 
 2525     *buf++ = 0;
 2526     *buf++ = type + 4;
 2527     return jump;
 2528 }
 2529 
 2530 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
 2531 {
 2532     sljit_ub *code;
 2533     struct sljit_jump *jump;
 2534 
 2535     CHECK_ERROR();
 2536     check_sljit_emit_ijump(compiler, type, src, srcw);
 2537 
 2538     CHECK_EXTRA_REGS(src, srcw, (void)0);
 2539     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2540         if (type <= SLJIT_JUMP)
 2541             FAIL_IF(emit_restore_flags(compiler, 0));
 2542         compiler->flags_saved = 0;
 2543     }
 2544 
 2545     if (type >= SLJIT_CALL1) {
 2546 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2547 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
 2548         if (src == SLJIT_TEMPORARY_REG3) {
 2549             EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2550             src = TMP_REGISTER;
 2551         }
 2552         if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
 2553             if (src & 0xf0) {
 2554                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
 2555                 src = TMP_REGISTER;
 2556             }
 2557             else
 2558                 srcw += sizeof(sljit_w);
 2559         }
 2560 #else
 2561         if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
 2562             if (src & 0xf0) {
 2563                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
 2564                 src = TMP_REGISTER;
 2565             }
 2566             else
 2567                 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
 2568         }
 2569 #endif
 2570 #endif
 2571 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
 2572         if (src == SLJIT_TEMPORARY_REG3) {
 2573             EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2574             src = TMP_REGISTER;
 2575         }
 2576 #endif
 2577         FAIL_IF(call_with_args(compiler, type));
 2578     }
 2579 
 2580     if (src == SLJIT_IMM) {
 2581         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2582         FAIL_IF_NULL(jump);
 2583         set_jump(jump, compiler, JUMP_ADDR);
 2584         jump->u.target = srcw;
 2585 
 2586         /* Worst case size. */
 2587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2588         compiler->size += 5;
 2589 #else
 2590         compiler->size += 10 + 3;
 2591 #endif
 2592 
 2593         code = (sljit_ub*)ensure_buf(compiler, 2);
 2594         FAIL_IF_NULL(code);
 2595 
 2596         *code++ = 0;
 2597         *code++ = type + 4;
 2598     }
 2599     else {
 2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2601         /* REX_W is not necessary (src is not immediate). */
 2602         compiler->mode32 = 1;
 2603 #endif
 2604         code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2605         FAIL_IF(!code);
 2606         *code++ = 0xff;
 2607         *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
 2608     }
 2609     return SLJIT_SUCCESS;
 2610 }
 2611 
 2612 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
 2613 {
 2614     sljit_ub *buf;
 2615     sljit_ub cond_set = 0;
 2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2617     int reg;
 2618 #endif
 2619 
 2620     CHECK_ERROR();
 2621     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
 2622 
 2623     if (dst == SLJIT_UNUSED)
 2624         return SLJIT_SUCCESS;
 2625 
 2626     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2627     if (SLJIT_UNLIKELY(compiler->flags_saved))
 2628         FAIL_IF(emit_restore_flags(compiler, 0));
 2629 
 2630     switch (type) {
 2631     case SLJIT_C_EQUAL:
 2632     case SLJIT_C_FLOAT_EQUAL:
 2633         cond_set = 0x94;
 2634         break;
 2635 
 2636     case SLJIT_C_NOT_EQUAL:
 2637     case SLJIT_C_FLOAT_NOT_EQUAL:
 2638         cond_set = 0x95;
 2639         break;
 2640 
 2641     case SLJIT_C_LESS:
 2642     case SLJIT_C_FLOAT_LESS:
 2643         cond_set = 0x92;
 2644         break;
 2645 
 2646     case SLJIT_C_GREATER_EQUAL:
 2647     case SLJIT_C_FLOAT_GREATER_EQUAL:
 2648         cond_set = 0x93;
 2649         break;
 2650 
 2651     case SLJIT_C_GREATER:
 2652     case SLJIT_C_FLOAT_GREATER:
 2653         cond_set = 0x97;
 2654         break;
 2655 
 2656     case SLJIT_C_LESS_EQUAL:
 2657     case SLJIT_C_FLOAT_LESS_EQUAL:
 2658         cond_set = 0x96;
 2659         break;
 2660 
 2661     case SLJIT_C_SIG_LESS:
 2662         cond_set = 0x9c;
 2663         break;
 2664 
 2665     case SLJIT_C_SIG_GREATER_EQUAL:
 2666         cond_set = 0x9d;
 2667         break;
 2668 
 2669     case SLJIT_C_SIG_GREATER:
 2670         cond_set = 0x9f;
 2671         break;
 2672 
 2673     case SLJIT_C_SIG_LESS_EQUAL:
 2674         cond_set = 0x9e;
 2675         break;
 2676 
 2677     case SLJIT_C_OVERFLOW:
 2678     case SLJIT_C_MUL_OVERFLOW:
 2679         cond_set = 0x90;
 2680         break;
 2681 
 2682     case SLJIT_C_NOT_OVERFLOW:
 2683     case SLJIT_C_MUL_NOT_OVERFLOW:
 2684         cond_set = 0x91;
 2685         break;
 2686 
 2687     case SLJIT_C_FLOAT_NAN:
 2688         cond_set = 0x9a;
 2689         break;
 2690 
 2691     case SLJIT_C_FLOAT_NOT_NAN:
 2692         cond_set = 0x9b;
 2693         break;
 2694     }
 2695 
 2696 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2697     reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2698 
 2699     buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
 2700     FAIL_IF(!buf);
 2701     INC_SIZE(4 + 4);
 2702     /* Set low register to conditional flag. */
 2703     *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
 2704     *buf++ = 0x0f;
 2705     *buf++ = cond_set;
 2706     *buf++ = 0xC0 | reg_lmap[reg];
 2707     *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
 2708     *buf++ = 0x0f;
 2709     *buf++ = 0xb6;
 2710     *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
 2711 
 2712     if (reg == TMP_REGISTER) {
 2713         if (op == SLJIT_MOV) {
 2714             compiler->mode32 = 0;
 2715             EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 2716         }
 2717         else {
 2718 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2719             compiler->skip_checks = 1;
 2720 #endif
 2721             return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
 2722         }
 2723     }
 2724 #else
 2725     if (op == SLJIT_MOV) {
 2726         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2727             buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2728             FAIL_IF(!buf);
 2729             INC_SIZE(3 + 3);
 2730             /* Set low byte to conditional flag. */
 2731             *buf++ = 0x0f;
 2732             *buf++ = cond_set;
 2733             *buf++ = 0xC0 | reg_map[dst];
 2734 
 2735             *buf++ = 0x0f;
 2736             *buf++ = 0xb6;
 2737             *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
 2738         }
 2739         else {
 2740             EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2741 
 2742             buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2743             FAIL_IF(!buf);
 2744             INC_SIZE(3 + 3);
 2745             /* Set al to conditional flag. */
 2746             *buf++ = 0x0f;
 2747             *buf++ = cond_set;
 2748             *buf++ = 0xC0;
 2749 
 2750             *buf++ = 0x0f;
 2751             *buf++ = 0xb6;
 2752             if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
 2753                 *buf = 0xC0 | (reg_map[dst] << 3);
 2754             else {
 2755                 *buf = 0xC0;
 2756                 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
 2757             }
 2758 
 2759             EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
 2760         }
 2761     }
 2762     else {
 2763         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2764             EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
 2765             buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
 2766             FAIL_IF(!buf);
 2767             INC_SIZE(3);
 2768 
 2769             *buf++ = 0x0f;
 2770             *buf++ = cond_set;
 2771             *buf++ = 0xC0 | reg_map[dst];
 2772         }
 2773         else {
 2774             EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2775 
 2776             buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
 2777             FAIL_IF(!buf);
 2778             INC_SIZE(3 + 3 + 1);
 2779             /* Set al to conditional flag. */
 2780             *buf++ = 0x0f;
 2781             *buf++ = cond_set;
 2782             *buf++ = 0xC0;
 2783 
 2784             *buf++ = 0x0f;
 2785             *buf++ = 0xb6;
 2786             *buf++ = 0xC0;
 2787 
 2788             *buf++ = 0x90 + reg_map[TMP_REGISTER];
 2789         }
 2790 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2791         compiler->skip_checks = 1;
 2792 #endif
 2793         return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
 2794     }
 2795 #endif
 2796 
 2797     return SLJIT_SUCCESS;
 2798 }
 2799 
 2800 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
 2801 {
 2802     sljit_ub *buf;
 2803     struct sljit_const *const_;
 2804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2805     int reg;
 2806 #endif
 2807 
 2808     CHECK_ERROR_PTR();
 2809     check_sljit_emit_const(compiler, dst, dstw, init_value);
 2810 
 2811     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2812 
 2813     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2814     PTR_FAIL_IF(!const_);
 2815     set_const(const_, compiler);
 2816 
 2817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2818     compiler->mode32 = 0;
 2819     reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2820 
 2821     if (emit_load_imm64(compiler, reg, init_value))
 2822         return NULL;
 2823 #else
 2824     if (dst == SLJIT_UNUSED)
 2825         dst = TMP_REGISTER;
 2826 
 2827     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
 2828         return NULL;
 2829 #endif
 2830 
 2831     buf = (sljit_ub*)ensure_buf(compiler, 2);
 2832     PTR_FAIL_IF(!buf);
 2833 
 2834     *buf++ = 0;
 2835     *buf++ = 1;
 2836 
 2837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2838     if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
 2839         if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
 2840             return NULL;
 2841 #endif
 2842 
 2843     return const_;
 2844 }
 2845 
 2846 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
 2847 {
 2848 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2849     *(sljit_w*)addr = new_addr - (addr + 4);
 2850 #else
 2851     *(sljit_uw*)addr = new_addr;
 2852 #endif
 2853 }
 2854 
 2855 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
 2856 {
 2857     *(sljit_w*)addr = new_constant;
 2858 }