"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/sljit/sljitNativeX86_common.c" (13 Mar 2018, 81524 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_common.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 8.41_vs_8.42.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
   28 {
   29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   30     return "x86" SLJIT_CPUINFO " ABI:fastcall";
   31 #else
   32     return "x86" SLJIT_CPUINFO;
   33 #endif
   34 }
   35 
   36 /*
   37    32b register indexes:
   38      0 - EAX
   39      1 - ECX
   40      2 - EDX
   41      3 - EBX
   42      4 - ESP
   43      5 - EBP
   44      6 - ESI
   45      7 - EDI
   46 */
   47 
   48 /*
   49    64b register indexes:
   50      0 - RAX
   51      1 - RCX
   52      2 - RDX
   53      3 - RBX
   54      4 - RSP
   55      5 - RBP
   56      6 - RSI
   57      7 - RDI
   58      8 - R8   - From now on REX prefix is required
   59      9 - R9
   60     10 - R10
   61     11 - R11
   62     12 - R12
   63     13 - R13
   64     14 - R14
   65     15 - R15
   66 */
   67 
   68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   69 
   70 /* Last register + 1. */
   71 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   72 
   73 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
   74     0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
   75 };
   76 
   77 #define CHECK_EXTRA_REGS(p, w, do) \
   78     if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
   79         if (p <= compiler->scratches) \
   80             w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
   81         else \
   82             w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
   83         p = SLJIT_MEM1(SLJIT_SP); \
   84         do; \
   85     }
   86 
   87 #else /* SLJIT_CONFIG_X86_32 */
   88 
   89 /* Last register + 1. */
   90 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   91 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
   92 
   93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
   94    Note: avoid to use r12 and r13 for memory addessing
   95    therefore r12 is better to be a higher saved register. */
   96 #ifndef _WIN64
   97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
   98 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
   99     0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
  100 };
  101 /* low-map. reg_map & 0x7. */
  102 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  103     0, 0, 6, 7, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
  104 };
  105 #else
  106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
  107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  108     0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
  109 };
  110 /* low-map. reg_map & 0x7. */
  111 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  112     0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
  113 };
  114 #endif
  115 
  116 /* Args: xmm0-xmm3 */
  117 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  118     4, 0, 1, 2, 3, 5, 6
  119 };
  120 /* low-map. freg_map & 0x7. */
  121 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  122     4, 0, 1, 2, 3, 5, 6
  123 };
  124 
  125 #define REX_W       0x48
  126 #define REX_R       0x44
  127 #define REX_X       0x42
  128 #define REX_B       0x41
  129 #define REX     0x40
  130 
  131 #ifndef _WIN64
  132 #define HALFWORD_MAX 0x7fffffffl
  133 #define HALFWORD_MIN -0x80000000l
  134 #else
  135 #define HALFWORD_MAX 0x7fffffffll
  136 #define HALFWORD_MIN -0x80000000ll
  137 #endif
  138 
  139 #define IS_HALFWORD(x)      ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
  140 #define NOT_HALFWORD(x)     ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
  141 
  142 #define CHECK_EXTRA_REGS(p, w, do)
  143 
  144 #endif /* SLJIT_CONFIG_X86_32 */
  145 
  146 #define TMP_FREG    (0)
  147 
  148 /* Size flags for emit_x86_instruction: */
  149 #define EX86_BIN_INS        0x0010
  150 #define EX86_SHIFT_INS      0x0020
  151 #define EX86_REX        0x0040
  152 #define EX86_NO_REXW        0x0080
  153 #define EX86_BYTE_ARG       0x0100
  154 #define EX86_HALF_ARG       0x0200
  155 #define EX86_PREF_66        0x0400
  156 #define EX86_PREF_F2        0x0800
  157 #define EX86_PREF_F3        0x1000
  158 #define EX86_SSE2_OP1       0x2000
  159 #define EX86_SSE2_OP2       0x4000
  160 #define EX86_SSE2       (EX86_SSE2_OP1 | EX86_SSE2_OP2)
  161 
  162 /* --------------------------------------------------------------------- */
  163 /*  Instrucion forms                                                     */
  164 /* --------------------------------------------------------------------- */
  165 
  166 #define ADD     (/* BINARY */ 0 << 3)
  167 #define ADD_EAX_i32 0x05
  168 #define ADD_r_rm    0x03
  169 #define ADD_rm_r    0x01
  170 #define ADDSD_x_xm  0x58
  171 #define ADC     (/* BINARY */ 2 << 3)
  172 #define ADC_EAX_i32 0x15
  173 #define ADC_r_rm    0x13
  174 #define ADC_rm_r    0x11
  175 #define AND     (/* BINARY */ 4 << 3)
  176 #define AND_EAX_i32 0x25
  177 #define AND_r_rm    0x23
  178 #define AND_rm_r    0x21
  179 #define ANDPD_x_xm  0x54
  180 #define BSR_r_rm    (/* GROUP_0F */ 0xbd)
  181 #define CALL_i32    0xe8
  182 #define CALL_rm     (/* GROUP_FF */ 2 << 3)
  183 #define CDQ     0x99
  184 #define CMOVE_r_rm  (/* GROUP_0F */ 0x44)
  185 #define CMP     (/* BINARY */ 7 << 3)
  186 #define CMP_EAX_i32 0x3d
  187 #define CMP_r_rm    0x3b
  188 #define CMP_rm_r    0x39
  189 #define CVTPD2PS_x_xm   0x5a
  190 #define CVTSI2SD_x_rm   0x2a
  191 #define CVTTSD2SI_r_xm  0x2c
  192 #define DIV     (/* GROUP_F7 */ 6 << 3)
  193 #define DIVSD_x_xm  0x5e
  194 #define FSTPS       0xd9
  195 #define FSTPD       0xdd
  196 #define INT3        0xcc
  197 #define IDIV        (/* GROUP_F7 */ 7 << 3)
  198 #define IMUL        (/* GROUP_F7 */ 5 << 3)
  199 #define IMUL_r_rm   (/* GROUP_0F */ 0xaf)
  200 #define IMUL_r_rm_i8    0x6b
  201 #define IMUL_r_rm_i32   0x69
  202 #define JE_i8       0x74
  203 #define JNE_i8      0x75
  204 #define JMP_i8      0xeb
  205 #define JMP_i32     0xe9
  206 #define JMP_rm      (/* GROUP_FF */ 4 << 3)
  207 #define LEA_r_m     0x8d
  208 #define MOV_r_rm    0x8b
  209 #define MOV_r_i32   0xb8
  210 #define MOV_rm_r    0x89
  211 #define MOV_rm_i32  0xc7
  212 #define MOV_rm8_i8  0xc6
  213 #define MOV_rm8_r8  0x88
  214 #define MOVSD_x_xm  0x10
  215 #define MOVSD_xm_x  0x11
  216 #define MOVSXD_r_rm 0x63
  217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
  218 #define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
  219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
  220 #define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
  221 #define MUL     (/* GROUP_F7 */ 4 << 3)
  222 #define MULSD_x_xm  0x59
  223 #define NEG_rm      (/* GROUP_F7 */ 3 << 3)
  224 #define NOP     0x90
  225 #define NOT_rm      (/* GROUP_F7 */ 2 << 3)
  226 #define OR      (/* BINARY */ 1 << 3)
  227 #define OR_r_rm     0x0b
  228 #define OR_EAX_i32  0x0d
  229 #define OR_rm_r     0x09
  230 #define OR_rm8_r8   0x08
  231 #define POP_r       0x58
  232 #define POP_rm      0x8f
  233 #define POPF        0x9d
  234 #define PREFETCH    0x18
  235 #define PUSH_i32    0x68
  236 #define PUSH_r      0x50
  237 #define PUSH_rm     (/* GROUP_FF */ 6 << 3)
  238 #define PUSHF       0x9c
  239 #define RET_near    0xc3
  240 #define RET_i16     0xc2
  241 #define SBB     (/* BINARY */ 3 << 3)
  242 #define SBB_EAX_i32 0x1d
  243 #define SBB_r_rm    0x1b
  244 #define SBB_rm_r    0x19
  245 #define SAR     (/* SHIFT */ 7 << 3)
  246 #define SHL     (/* SHIFT */ 4 << 3)
  247 #define SHR     (/* SHIFT */ 5 << 3)
  248 #define SUB     (/* BINARY */ 5 << 3)
  249 #define SUB_EAX_i32 0x2d
  250 #define SUB_r_rm    0x2b
  251 #define SUB_rm_r    0x29
  252 #define SUBSD_x_xm  0x5c
  253 #define TEST_EAX_i32    0xa9
  254 #define TEST_rm_r   0x85
  255 #define UCOMISD_x_xm    0x2e
  256 #define UNPCKLPD_x_xm   0x14
  257 #define XCHG_EAX_r  0x90
  258 #define XCHG_r_rm   0x87
  259 #define XOR     (/* BINARY */ 6 << 3)
  260 #define XOR_EAX_i32 0x35
  261 #define XOR_r_rm    0x33
  262 #define XOR_rm_r    0x31
  263 #define XORPD_x_xm  0x57
  264 
  265 #define GROUP_0F    0x0f
  266 #define GROUP_F7    0xf7
  267 #define GROUP_FF    0xff
  268 #define GROUP_BINARY_81 0x81
  269 #define GROUP_BINARY_83 0x83
  270 #define GROUP_SHIFT_1   0xd1
  271 #define GROUP_SHIFT_N   0xc1
  272 #define GROUP_SHIFT_CL  0xd3
  273 
  274 #define MOD_REG     0xc0
  275 #define MOD_DISP8   0x40
  276 
  277 #define INC_SIZE(s)         (*inst++ = (s), compiler->size += (s))
  278 
  279 #define PUSH_REG(r)         (*inst++ = (PUSH_r + (r)))
  280 #define POP_REG(r)          (*inst++ = (POP_r + (r)))
  281 #define RET()               (*inst++ = (RET_near))
  282 #define RET_I16(n)          (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
  283 /* r32, r/m32 */
  284 #define MOV_RM(mod, reg, rm)        (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
  285 
  286 /* Multithreading does not affect these static variables, since they store
  287    built-in CPU features. Therefore they can be overwritten by different threads
  288    if they detect the CPU features in the same time. */
  289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  290 static sljit_s32 cpu_has_sse2 = -1;
  291 #endif
  292 static sljit_s32 cpu_has_cmov = -1;
  293 
  294 #ifdef _WIN32_WCE
  295 #include <cmnintrin.h>
  296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
  297 #include <intrin.h>
  298 #endif
  299 
  300 /******************************************************/
  301 /*    Unaligned-store functions                       */
  302 /******************************************************/
  303 
  304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
  305 {
  306     SLJIT_MEMCPY(addr, &value, sizeof(value));
  307 }
  308 
  309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
  310 {
  311     SLJIT_MEMCPY(addr, &value, sizeof(value));
  312 }
  313 
  314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
  315 {
  316     SLJIT_MEMCPY(addr, &value, sizeof(value));
  317 }
  318 
  319 /******************************************************/
  320 /*    Utility functions                               */
  321 /******************************************************/
  322 
  323 static void get_cpu_features(void)
  324 {
  325     sljit_u32 features;
  326 
  327 #if defined(_MSC_VER) && _MSC_VER >= 1400
  328 
  329     int CPUInfo[4];
  330     __cpuid(CPUInfo, 1);
  331     features = (sljit_u32)CPUInfo[3];
  332 
  333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
  334 
  335     /* AT&T syntax. */
  336     __asm__ (
  337         "movl $0x1, %%eax\n"
  338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  339         /* On x86-32, there is no red zone, so this
  340            should work (no need for a local variable). */
  341         "push %%ebx\n"
  342 #endif
  343         "cpuid\n"
  344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  345         "pop %%ebx\n"
  346 #endif
  347         "movl %%edx, %0\n"
  348         : "=g" (features)
  349         :
  350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  351         : "%eax", "%ecx", "%edx"
  352 #else
  353         : "%rax", "%rbx", "%rcx", "%rdx"
  354 #endif
  355     );
  356 
  357 #else /* _MSC_VER && _MSC_VER >= 1400 */
  358 
  359     /* Intel syntax. */
  360     __asm {
  361         mov eax, 1
  362         cpuid
  363         mov features, edx
  364     }
  365 
  366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
  367 
  368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  369     cpu_has_sse2 = (features >> 26) & 0x1;
  370 #endif
  371     cpu_has_cmov = (features >> 15) & 0x1;
  372 }
  373 
  374 static sljit_u8 get_jump_code(sljit_s32 type)
  375 {
  376     switch (type) {
  377     case SLJIT_EQUAL:
  378     case SLJIT_EQUAL_F64:
  379         return 0x84 /* je */;
  380 
  381     case SLJIT_NOT_EQUAL:
  382     case SLJIT_NOT_EQUAL_F64:
  383         return 0x85 /* jne */;
  384 
  385     case SLJIT_LESS:
  386     case SLJIT_LESS_F64:
  387         return 0x82 /* jc */;
  388 
  389     case SLJIT_GREATER_EQUAL:
  390     case SLJIT_GREATER_EQUAL_F64:
  391         return 0x83 /* jae */;
  392 
  393     case SLJIT_GREATER:
  394     case SLJIT_GREATER_F64:
  395         return 0x87 /* jnbe */;
  396 
  397     case SLJIT_LESS_EQUAL:
  398     case SLJIT_LESS_EQUAL_F64:
  399         return 0x86 /* jbe */;
  400 
  401     case SLJIT_SIG_LESS:
  402         return 0x8c /* jl */;
  403 
  404     case SLJIT_SIG_GREATER_EQUAL:
  405         return 0x8d /* jnl */;
  406 
  407     case SLJIT_SIG_GREATER:
  408         return 0x8f /* jnle */;
  409 
  410     case SLJIT_SIG_LESS_EQUAL:
  411         return 0x8e /* jle */;
  412 
  413     case SLJIT_OVERFLOW:
  414     case SLJIT_MUL_OVERFLOW:
  415         return 0x80 /* jo */;
  416 
  417     case SLJIT_NOT_OVERFLOW:
  418     case SLJIT_MUL_NOT_OVERFLOW:
  419         return 0x81 /* jno */;
  420 
  421     case SLJIT_UNORDERED_F64:
  422         return 0x8a /* jp */;
  423 
  424     case SLJIT_ORDERED_F64:
  425         return 0x8b /* jpo */;
  426     }
  427     return 0;
  428 }
  429 
  430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  431 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
  432 #else
  433 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
  434 #endif
  435 
  436 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
  437 {
  438     sljit_s32 short_jump;
  439     sljit_uw label_addr;
  440 
  441     if (jump->flags & JUMP_LABEL)
  442         label_addr = (sljit_uw)(code + jump->u.label->size);
  443     else
  444         label_addr = jump->u.target - executable_offset;
  445 
  446     short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
  447 
  448 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  449     if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
  450         return generate_far_jump_code(jump, code_ptr, type);
  451 #endif
  452 
  453     if (type == SLJIT_JUMP) {
  454         if (short_jump)
  455             *code_ptr++ = JMP_i8;
  456         else
  457             *code_ptr++ = JMP_i32;
  458         jump->addr++;
  459     }
  460     else if (type >= SLJIT_FAST_CALL) {
  461         short_jump = 0;
  462         *code_ptr++ = CALL_i32;
  463         jump->addr++;
  464     }
  465     else if (short_jump) {
  466         *code_ptr++ = get_jump_code(type) - 0x10;
  467         jump->addr++;
  468     }
  469     else {
  470         *code_ptr++ = GROUP_0F;
  471         *code_ptr++ = get_jump_code(type);
  472         jump->addr += 2;
  473     }
  474 
  475     if (short_jump) {
  476         jump->flags |= PATCH_MB;
  477         code_ptr += sizeof(sljit_s8);
  478     } else {
  479         jump->flags |= PATCH_MW;
  480         code_ptr += sizeof(sljit_s32);
  481     }
  482 
  483     return code_ptr;
  484 }
  485 
  486 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  487 {
  488     struct sljit_memory_fragment *buf;
  489     sljit_u8 *code;
  490     sljit_u8 *code_ptr;
  491     sljit_u8 *buf_ptr;
  492     sljit_u8 *buf_end;
  493     sljit_u8 len;
  494     sljit_sw executable_offset;
  495     sljit_sw jump_addr;
  496 
  497     struct sljit_label *label;
  498     struct sljit_jump *jump;
  499     struct sljit_const *const_;
  500 
  501     CHECK_ERROR_PTR();
  502     CHECK_PTR(check_sljit_generate_code(compiler));
  503     reverse_buf(compiler);
  504 
  505     /* Second code generation pass. */
  506     code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
  507     PTR_FAIL_WITH_EXEC_IF(code);
  508     buf = compiler->buf;
  509 
  510     code_ptr = code;
  511     label = compiler->labels;
  512     jump = compiler->jumps;
  513     const_ = compiler->consts;
  514     executable_offset = SLJIT_EXEC_OFFSET(code);
  515 
  516     do {
  517         buf_ptr = buf->memory;
  518         buf_end = buf_ptr + buf->used_size;
  519         do {
  520             len = *buf_ptr++;
  521             if (len > 0) {
  522                 /* The code is already generated. */
  523                 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
  524                 code_ptr += len;
  525                 buf_ptr += len;
  526             }
  527             else {
  528                 if (*buf_ptr >= 2) {
  529                     jump->addr = (sljit_uw)code_ptr;
  530                     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  531                         code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
  532                     else {
  533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  534                         code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
  535 #else
  536                         code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
  537 #endif
  538                     }
  539                     jump = jump->next;
  540                 }
  541                 else if (*buf_ptr == 0) {
  542                     label->addr = ((sljit_uw)code_ptr) + executable_offset;
  543                     label->size = code_ptr - code;
  544                     label = label->next;
  545                 }
  546                 else { /* *buf_ptr is 1 */
  547                     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
  548                     const_ = const_->next;
  549                 }
  550                 buf_ptr++;
  551             }
  552         } while (buf_ptr < buf_end);
  553         SLJIT_ASSERT(buf_ptr == buf_end);
  554         buf = buf->next;
  555     } while (buf);
  556 
  557     SLJIT_ASSERT(!label);
  558     SLJIT_ASSERT(!jump);
  559     SLJIT_ASSERT(!const_);
  560 
  561     jump = compiler->jumps;
  562     while (jump) {
  563         jump_addr = jump->addr + executable_offset;
  564 
  565         if (jump->flags & PATCH_MB) {
  566             SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
  567             *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
  568         } else if (jump->flags & PATCH_MW) {
  569             if (jump->flags & JUMP_LABEL) {
  570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  571                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
  572 #else
  573                 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  574                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
  575 #endif
  576             }
  577             else {
  578 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  579                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
  580 #else
  581                 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  582                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
  583 #endif
  584             }
  585         }
  586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  587         else if (jump->flags & PATCH_MD)
  588             sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
  589 #endif
  590 
  591         jump = jump->next;
  592     }
  593 
  594     /* Some space may be wasted because of short jumps. */
  595     SLJIT_ASSERT(code_ptr <= code + compiler->size);
  596     compiler->error = SLJIT_ERR_COMPILED;
  597     compiler->executable_offset = executable_offset;
  598     compiler->executable_size = code_ptr - code;
  599     return (void*)(code + executable_offset);
  600 }
  601 
  602 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  603 {
  604     switch (feature_type) {
  605     case SLJIT_HAS_FPU:
  606 #ifdef SLJIT_IS_FPU_AVAILABLE
  607         return SLJIT_IS_FPU_AVAILABLE;
  608 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  609         if (cpu_has_sse2 == -1)
  610             get_cpu_features();
  611         return cpu_has_sse2;
  612 #else /* SLJIT_DETECT_SSE2 */
  613         return 1;
  614 #endif /* SLJIT_DETECT_SSE2 */
  615 
  616 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  617     case SLJIT_HAS_VIRTUAL_REGISTERS:
  618         return 1;
  619 #endif
  620 
  621     case SLJIT_HAS_CLZ:
  622     case SLJIT_HAS_CMOV:
  623         if (cpu_has_cmov == -1)
  624             get_cpu_features();
  625         return cpu_has_cmov;
  626 
  627     case SLJIT_HAS_SSE2:
  628 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  629         if (cpu_has_sse2 == -1)
  630             get_cpu_features();
  631         return cpu_has_sse2;
  632 #else
  633         return 1;
  634 #endif
  635 
  636     default:
  637         return 0;
  638     }
  639 }
  640 
  641 /* --------------------------------------------------------------------- */
  642 /*  Operators                                                            */
  643 /* --------------------------------------------------------------------- */
  644 
  645 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
  646 
  647 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  648     sljit_u32 op_types,
  649     sljit_s32 dst, sljit_sw dstw,
  650     sljit_s32 src1, sljit_sw src1w,
  651     sljit_s32 src2, sljit_sw src2w);
  652 
  653 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  654     sljit_u32 op_types,
  655     sljit_s32 dst, sljit_sw dstw,
  656     sljit_s32 src1, sljit_sw src1w,
  657     sljit_s32 src2, sljit_sw src2w);
  658 
  659 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  660     sljit_s32 dst, sljit_sw dstw,
  661     sljit_s32 src, sljit_sw srcw);
  662 
  663 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  664     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  665 
  666 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  667     sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
  668 
  669 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  670     sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
  671 
  672 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  673 #include "sljitNativeX86_32.c"
  674 #else
  675 #include "sljitNativeX86_64.c"
  676 #endif
  677 
  678 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  679     sljit_s32 dst, sljit_sw dstw,
  680     sljit_s32 src, sljit_sw srcw)
  681 {
  682     sljit_u8* inst;
  683 
  684     SLJIT_ASSERT(dst != SLJIT_UNUSED);
  685 
  686     if (FAST_IS_REG(src)) {
  687         inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  688         FAIL_IF(!inst);
  689         *inst = MOV_rm_r;
  690         return SLJIT_SUCCESS;
  691     }
  692     if (src & SLJIT_IMM) {
  693         if (FAST_IS_REG(dst)) {
  694 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  695             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  696 #else
  697             if (!compiler->mode32) {
  698                 if (NOT_HALFWORD(srcw))
  699                     return emit_load_imm64(compiler, dst, srcw);
  700             }
  701             else
  702                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
  703 #endif
  704         }
  705 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  706         if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  707             /* Immediate to memory move. Only SLJIT_MOV operation copies
  708                an immediate directly into memory so TMP_REG1 can be used. */
  709             FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
  710             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  711             FAIL_IF(!inst);
  712             *inst = MOV_rm_r;
  713             return SLJIT_SUCCESS;
  714         }
  715 #endif
  716         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  717         FAIL_IF(!inst);
  718         *inst = MOV_rm_i32;
  719         return SLJIT_SUCCESS;
  720     }
  721     if (FAST_IS_REG(dst)) {
  722         inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  723         FAIL_IF(!inst);
  724         *inst = MOV_r_rm;
  725         return SLJIT_SUCCESS;
  726     }
  727 
  728     /* Memory to memory move. Only SLJIT_MOV operation copies
  729        data from memory to memory so TMP_REG1 can be used. */
  730     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  731     FAIL_IF(!inst);
  732     *inst = MOV_r_rm;
  733     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  734     FAIL_IF(!inst);
  735     *inst = MOV_rm_r;
  736     return SLJIT_SUCCESS;
  737 }
  738 
  739 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
  740 {
  741     sljit_u8 *inst;
  742 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  743     sljit_s32 size;
  744 #endif
  745 
  746     CHECK_ERROR();
  747     CHECK(check_sljit_emit_op0(compiler, op));
  748 
  749     switch (GET_OPCODE(op)) {
  750     case SLJIT_BREAKPOINT:
  751         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  752         FAIL_IF(!inst);
  753         INC_SIZE(1);
  754         *inst = INT3;
  755         break;
  756     case SLJIT_NOP:
  757         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  758         FAIL_IF(!inst);
  759         INC_SIZE(1);
  760         *inst = NOP;
  761         break;
  762     case SLJIT_LMUL_UW:
  763     case SLJIT_LMUL_SW:
  764     case SLJIT_DIVMOD_UW:
  765     case SLJIT_DIVMOD_SW:
  766     case SLJIT_DIV_UW:
  767     case SLJIT_DIV_SW:
  768 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  769 #ifdef _WIN64
  770         SLJIT_ASSERT(
  771             reg_map[SLJIT_R0] == 0
  772             && reg_map[SLJIT_R1] == 2
  773             && reg_map[TMP_REG1] > 7);
  774 #else
  775         SLJIT_ASSERT(
  776             reg_map[SLJIT_R0] == 0
  777             && reg_map[SLJIT_R1] < 7
  778             && reg_map[TMP_REG1] == 2);
  779 #endif
  780         compiler->mode32 = op & SLJIT_I32_OP;
  781 #endif
  782         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
  783 
  784         op = GET_OPCODE(op);
  785         if ((op | 0x2) == SLJIT_DIV_UW) {
  786 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  787             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  788             inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
  789 #else
  790             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  791 #endif
  792             FAIL_IF(!inst);
  793             *inst = XOR_r_rm;
  794         }
  795 
  796         if ((op | 0x2) == SLJIT_DIV_SW) {
  797 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  798             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  799 #endif
  800 
  801 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  802             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  803             FAIL_IF(!inst);
  804             INC_SIZE(1);
  805             *inst = CDQ;
  806 #else
  807             if (compiler->mode32) {
  808                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  809                 FAIL_IF(!inst);
  810                 INC_SIZE(1);
  811                 *inst = CDQ;
  812             } else {
  813                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  814                 FAIL_IF(!inst);
  815                 INC_SIZE(2);
  816                 *inst++ = REX_W;
  817                 *inst = CDQ;
  818             }
  819 #endif
  820         }
  821 
  822 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  823         inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  824         FAIL_IF(!inst);
  825         INC_SIZE(2);
  826         *inst++ = GROUP_F7;
  827         *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
  828 #else
  829 #ifdef _WIN64
  830         size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
  831 #else
  832         size = (!compiler->mode32) ? 3 : 2;
  833 #endif
  834         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  835         FAIL_IF(!inst);
  836         INC_SIZE(size);
  837 #ifdef _WIN64
  838         if (!compiler->mode32)
  839             *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
  840         else if (op >= SLJIT_DIVMOD_UW)
  841             *inst++ = REX_B;
  842         *inst++ = GROUP_F7;
  843         *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
  844 #else
  845         if (!compiler->mode32)
  846             *inst++ = REX_W;
  847         *inst++ = GROUP_F7;
  848         *inst = MOD_REG | reg_map[SLJIT_R1];
  849 #endif
  850 #endif
  851         switch (op) {
  852         case SLJIT_LMUL_UW:
  853             *inst |= MUL;
  854             break;
  855         case SLJIT_LMUL_SW:
  856             *inst |= IMUL;
  857             break;
  858         case SLJIT_DIVMOD_UW:
  859         case SLJIT_DIV_UW:
  860             *inst |= DIV;
  861             break;
  862         case SLJIT_DIVMOD_SW:
  863         case SLJIT_DIV_SW:
  864             *inst |= IDIV;
  865             break;
  866         }
  867 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  868         if (op <= SLJIT_DIVMOD_SW)
  869             EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  870 #else
  871         if (op >= SLJIT_DIV_UW)
  872             EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  873 #endif
  874         break;
  875     }
  876 
  877     return SLJIT_SUCCESS;
  878 }
  879 
  880 #define ENCODE_PREFIX(prefix) \
  881     do { \
  882         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
  883         FAIL_IF(!inst); \
  884         INC_SIZE(1); \
  885         *inst = (prefix); \
  886     } while (0)
  887 
  888 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
  889     sljit_s32 dst, sljit_sw dstw,
  890     sljit_s32 src, sljit_sw srcw)
  891 {
  892     sljit_u8* inst;
  893     sljit_s32 dst_r;
  894 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  895     sljit_s32 work_r;
  896 #endif
  897 
  898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  899     compiler->mode32 = 0;
  900 #endif
  901 
  902     if (src & SLJIT_IMM) {
  903         if (FAST_IS_REG(dst)) {
  904 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  905             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  906 #else
  907             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  908             FAIL_IF(!inst);
  909             *inst = MOV_rm_i32;
  910             return SLJIT_SUCCESS;
  911 #endif
  912         }
  913         inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  914         FAIL_IF(!inst);
  915         *inst = MOV_rm8_i8;
  916         return SLJIT_SUCCESS;
  917     }
  918 
  919     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  920 
  921     if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
  922 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  923         if (reg_map[src] >= 4) {
  924             SLJIT_ASSERT(dst_r == TMP_REG1);
  925             EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  926         } else
  927             dst_r = src;
  928 #else
  929         dst_r = src;
  930 #endif
  931     }
  932 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  933     else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
  934         /* src, dst are registers. */
  935         SLJIT_ASSERT(SLOW_IS_REG(dst));
  936         if (reg_map[dst] < 4) {
  937             if (dst != src)
  938                 EMIT_MOV(compiler, dst, 0, src, 0);
  939             inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  940             FAIL_IF(!inst);
  941             *inst++ = GROUP_0F;
  942             *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  943         }
  944         else {
  945             if (dst != src)
  946                 EMIT_MOV(compiler, dst, 0, src, 0);
  947             if (sign) {
  948                 /* shl reg, 24 */
  949                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  950                 FAIL_IF(!inst);
  951                 *inst |= SHL;
  952                 /* sar reg, 24 */
  953                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  954                 FAIL_IF(!inst);
  955                 *inst |= SAR;
  956             }
  957             else {
  958                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
  959                 FAIL_IF(!inst);
  960                 *(inst + 1) |= AND;
  961             }
  962         }
  963         return SLJIT_SUCCESS;
  964     }
  965 #endif
  966     else {
  967         /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  968         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  969         FAIL_IF(!inst);
  970         *inst++ = GROUP_0F;
  971         *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  972     }
  973 
  974     if (dst & SLJIT_MEM) {
  975 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  976         if (dst_r == TMP_REG1) {
  977             /* Find a non-used register, whose reg_map[src] < 4. */
  978             if ((dst & REG_MASK) == SLJIT_R0) {
  979                 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
  980                     work_r = SLJIT_R2;
  981                 else
  982                     work_r = SLJIT_R1;
  983             }
  984             else {
  985                 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
  986                     work_r = SLJIT_R0;
  987                 else if ((dst & REG_MASK) == SLJIT_R1)
  988                     work_r = SLJIT_R2;
  989                 else
  990                     work_r = SLJIT_R1;
  991             }
  992 
  993             if (work_r == SLJIT_R0) {
  994                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  995             }
  996             else {
  997                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  998                 FAIL_IF(!inst);
  999                 *inst = XCHG_r_rm;
 1000             }
 1001 
 1002             inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
 1003             FAIL_IF(!inst);
 1004             *inst = MOV_rm8_r8;
 1005 
 1006             if (work_r == SLJIT_R0) {
 1007                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
 1008             }
 1009             else {
 1010                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
 1011                 FAIL_IF(!inst);
 1012                 *inst = XCHG_r_rm;
 1013             }
 1014         }
 1015         else {
 1016             inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
 1017             FAIL_IF(!inst);
 1018             *inst = MOV_rm8_r8;
 1019         }
 1020 #else
 1021         inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
 1022         FAIL_IF(!inst);
 1023         *inst = MOV_rm8_r8;
 1024 #endif
 1025     }
 1026 
 1027     return SLJIT_SUCCESS;
 1028 }
 1029 
 1030 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
 1031     sljit_s32 src, sljit_sw srcw)
 1032 {
 1033     sljit_u8* inst;
 1034 
 1035 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1036     compiler->mode32 = 1;
 1037 #endif
 1038 
 1039     inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
 1040     FAIL_IF(!inst);
 1041     *inst++ = GROUP_0F;
 1042     *inst++ = PREFETCH;
 1043 
 1044     if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
 1045         *inst |= (3 << 3);
 1046     else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
 1047         *inst |= (2 << 3);
 1048     else
 1049         *inst |= (1 << 3);
 1050 
 1051     return SLJIT_SUCCESS;
 1052 }
 1053 
 1054 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
 1055     sljit_s32 dst, sljit_sw dstw,
 1056     sljit_s32 src, sljit_sw srcw)
 1057 {
 1058     sljit_u8* inst;
 1059     sljit_s32 dst_r;
 1060 
 1061 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1062     compiler->mode32 = 0;
 1063 #endif
 1064 
 1065     if (src & SLJIT_IMM) {
 1066         if (FAST_IS_REG(dst)) {
 1067 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1068             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
 1069 #else
 1070             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
 1071             FAIL_IF(!inst);
 1072             *inst = MOV_rm_i32;
 1073             return SLJIT_SUCCESS;
 1074 #endif
 1075         }
 1076         inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
 1077         FAIL_IF(!inst);
 1078         *inst = MOV_rm_i32;
 1079         return SLJIT_SUCCESS;
 1080     }
 1081 
 1082     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1083 
 1084     if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
 1085         dst_r = src;
 1086     else {
 1087         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 1088         FAIL_IF(!inst);
 1089         *inst++ = GROUP_0F;
 1090         *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
 1091     }
 1092 
 1093     if (dst & SLJIT_MEM) {
 1094         inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
 1095         FAIL_IF(!inst);
 1096         *inst = MOV_rm_r;
 1097     }
 1098 
 1099     return SLJIT_SUCCESS;
 1100 }
 1101 
 1102 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
 1103     sljit_s32 dst, sljit_sw dstw,
 1104     sljit_s32 src, sljit_sw srcw)
 1105 {
 1106     sljit_u8* inst;
 1107 
 1108     if (dst == src && dstw == srcw) {
 1109         /* Same input and output */
 1110         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
 1111         FAIL_IF(!inst);
 1112         *inst++ = GROUP_F7;
 1113         *inst |= opcode;
 1114         return SLJIT_SUCCESS;
 1115     }
 1116 
 1117     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
 1118         dst = TMP_REG1;
 1119 
 1120     if (FAST_IS_REG(dst)) {
 1121         EMIT_MOV(compiler, dst, 0, src, srcw);
 1122         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
 1123         FAIL_IF(!inst);
 1124         *inst++ = GROUP_F7;
 1125         *inst |= opcode;
 1126         return SLJIT_SUCCESS;
 1127     }
 1128 
 1129     EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 1130     inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
 1131     FAIL_IF(!inst);
 1132     *inst++ = GROUP_F7;
 1133     *inst |= opcode;
 1134     EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1135     return SLJIT_SUCCESS;
 1136 }
 1137 
 1138 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
 1139     sljit_s32 dst, sljit_sw dstw,
 1140     sljit_s32 src, sljit_sw srcw)
 1141 {
 1142     sljit_u8* inst;
 1143 
 1144     if (dst == SLJIT_UNUSED)
 1145         dst = TMP_REG1;
 1146 
 1147     if (FAST_IS_REG(dst)) {
 1148         EMIT_MOV(compiler, dst, 0, src, srcw);
 1149         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
 1150         FAIL_IF(!inst);
 1151         *inst++ = GROUP_F7;
 1152         *inst |= NOT_rm;
 1153         inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
 1154         FAIL_IF(!inst);
 1155         *inst = OR_r_rm;
 1156         return SLJIT_SUCCESS;
 1157     }
 1158 
 1159     EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 1160     inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
 1161     FAIL_IF(!inst);
 1162     *inst++ = GROUP_F7;
 1163     *inst |= NOT_rm;
 1164     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
 1165     FAIL_IF(!inst);
 1166     *inst = OR_r_rm;
 1167     EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1168     return SLJIT_SUCCESS;
 1169 }
 1170 
 1171 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1172 static const sljit_sw emit_clz_arg = 32 + 31;
 1173 #endif
 1174 
 1175 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
 1176     sljit_s32 dst, sljit_sw dstw,
 1177     sljit_s32 src, sljit_sw srcw)
 1178 {
 1179     sljit_u8* inst;
 1180     sljit_s32 dst_r;
 1181 
 1182     SLJIT_UNUSED_ARG(op_flags);
 1183 
 1184     if (cpu_has_cmov == -1)
 1185         get_cpu_features();
 1186 
 1187     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1188 
 1189     inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 1190     FAIL_IF(!inst);
 1191     *inst++ = GROUP_0F;
 1192     *inst = BSR_r_rm;
 1193 
 1194 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1195     if (cpu_has_cmov) {
 1196         if (dst_r != TMP_REG1) {
 1197             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
 1198             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
 1199         }
 1200         else
 1201             inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
 1202 
 1203         FAIL_IF(!inst);
 1204         *inst++ = GROUP_0F;
 1205         *inst = CMOVE_r_rm;
 1206     }
 1207     else
 1208         FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
 1209 
 1210     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
 1211 #else
 1212     if (cpu_has_cmov) {
 1213         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
 1214 
 1215         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1216         FAIL_IF(!inst);
 1217         *inst++ = GROUP_0F;
 1218         *inst = CMOVE_r_rm;
 1219     }
 1220     else
 1221         FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
 1222 
 1223     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
 1224 #endif
 1225 
 1226     FAIL_IF(!inst);
 1227     *(inst + 1) |= XOR;
 1228 
 1229     if (dst & SLJIT_MEM)
 1230         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1231     return SLJIT_SUCCESS;
 1232 }
 1233 
 1234 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 1235     sljit_s32 dst, sljit_sw dstw,
 1236     sljit_s32 src, sljit_sw srcw)
 1237 {
 1238     sljit_s32 op_flags = GET_ALL_FLAGS(op);
 1239 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1240     sljit_s32 dst_is_ereg = 0;
 1241 #endif
 1242 
 1243     CHECK_ERROR();
 1244     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
 1245     ADJUST_LOCAL_OFFSET(dst, dstw);
 1246     ADJUST_LOCAL_OFFSET(src, srcw);
 1247 
 1248     CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
 1249     CHECK_EXTRA_REGS(src, srcw, (void)0);
 1250 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1251     compiler->mode32 = op_flags & SLJIT_I32_OP;
 1252 #endif
 1253 
 1254     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
 1255         if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
 1256             return emit_prefetch(compiler, op, src, srcw);
 1257         return SLJIT_SUCCESS;
 1258     }
 1259 
 1260     op = GET_OPCODE(op);
 1261 
 1262     if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
 1263 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1264         compiler->mode32 = 0;
 1265 #endif
 1266 
 1267         if (FAST_IS_REG(src) && src == dst) {
 1268             if (!TYPE_CAST_NEEDED(op))
 1269                 return SLJIT_SUCCESS;
 1270         }
 1271 
 1272         if (op_flags & SLJIT_I32_OP) {
 1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1274             if (src & SLJIT_MEM) {
 1275                 if (op == SLJIT_MOV_S32)
 1276                     op = SLJIT_MOV_U32;
 1277             }
 1278             else if (src & SLJIT_IMM) {
 1279                 if (op == SLJIT_MOV_U32)
 1280                     op = SLJIT_MOV_S32;
 1281             }
 1282 #endif
 1283         }
 1284 
 1285         if (src & SLJIT_IMM) {
 1286             switch (op) {
 1287             case SLJIT_MOV_U8:
 1288                 srcw = (sljit_u8)srcw;
 1289                 break;
 1290             case SLJIT_MOV_S8:
 1291                 srcw = (sljit_s8)srcw;
 1292                 break;
 1293             case SLJIT_MOV_U16:
 1294                 srcw = (sljit_u16)srcw;
 1295                 break;
 1296             case SLJIT_MOV_S16:
 1297                 srcw = (sljit_s16)srcw;
 1298                 break;
 1299 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1300             case SLJIT_MOV_U32:
 1301                 srcw = (sljit_u32)srcw;
 1302                 break;
 1303             case SLJIT_MOV_S32:
 1304                 srcw = (sljit_s32)srcw;
 1305                 break;
 1306 #endif
 1307             }
 1308 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1309             if (SLJIT_UNLIKELY(dst_is_ereg))
 1310                 return emit_mov(compiler, dst, dstw, src, srcw);
 1311 #endif
 1312         }
 1313 
 1314 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1315         if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
 1316             SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
 1317             dst = TMP_REG1;
 1318         }
 1319 #endif
 1320 
 1321         switch (op) {
 1322         case SLJIT_MOV:
 1323         case SLJIT_MOV_P:
 1324 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1325         case SLJIT_MOV_U32:
 1326         case SLJIT_MOV_S32:
 1327 #endif
 1328             FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 1329             break;
 1330         case SLJIT_MOV_U8:
 1331             FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
 1332             break;
 1333         case SLJIT_MOV_S8:
 1334             FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
 1335             break;
 1336         case SLJIT_MOV_U16:
 1337             FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
 1338             break;
 1339         case SLJIT_MOV_S16:
 1340             FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
 1341             break;
 1342 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1343         case SLJIT_MOV_U32:
 1344             FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
 1345             break;
 1346         case SLJIT_MOV_S32:
 1347             FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
 1348             break;
 1349 #endif
 1350         }
 1351 
 1352 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1353         if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
 1354             return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
 1355 #endif
 1356         return SLJIT_SUCCESS;
 1357     }
 1358 
 1359     switch (op) {
 1360     case SLJIT_NOT:
 1361         if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
 1362             return emit_not_with_flags(compiler, dst, dstw, src, srcw);
 1363         return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
 1364 
 1365     case SLJIT_NEG:
 1366         return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
 1367 
 1368     case SLJIT_CLZ:
 1369         return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
 1370     }
 1371 
 1372     return SLJIT_SUCCESS;
 1373 }
 1374 
 1375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1376 
 1377 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
 1378     if (IS_HALFWORD(immw) || compiler->mode32) { \
 1379         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1380         FAIL_IF(!inst); \
 1381         *(inst + 1) |= (op_imm); \
 1382     } \
 1383     else { \
 1384         FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
 1385         inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
 1386         FAIL_IF(!inst); \
 1387         *inst = (op_mr); \
 1388     }
 1389 
 1390 #define BINARY_EAX_IMM(op_eax_imm, immw) \
 1391     FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
 1392 
 1393 #else
 1394 
 1395 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
 1396     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1397     FAIL_IF(!inst); \
 1398     *(inst + 1) |= (op_imm);
 1399 
 1400 #define BINARY_EAX_IMM(op_eax_imm, immw) \
 1401     FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
 1402 
 1403 #endif
 1404 
 1405 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
 1406     sljit_u32 op_types,
 1407     sljit_s32 dst, sljit_sw dstw,
 1408     sljit_s32 src1, sljit_sw src1w,
 1409     sljit_s32 src2, sljit_sw src2w)
 1410 {
 1411     sljit_u8* inst;
 1412     sljit_u8 op_eax_imm = (op_types >> 24);
 1413     sljit_u8 op_rm = (op_types >> 16) & 0xff;
 1414     sljit_u8 op_mr = (op_types >> 8) & 0xff;
 1415     sljit_u8 op_imm = op_types & 0xff;
 1416 
 1417     if (dst == SLJIT_UNUSED) {
 1418         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1419         if (src2 & SLJIT_IMM) {
 1420             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1421         }
 1422         else {
 1423             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1424             FAIL_IF(!inst);
 1425             *inst = op_rm;
 1426         }
 1427         return SLJIT_SUCCESS;
 1428     }
 1429 
 1430     if (dst == src1 && dstw == src1w) {
 1431         if (src2 & SLJIT_IMM) {
 1432 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1433             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1434 #else
 1435             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 1436 #endif
 1437                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1438             }
 1439             else {
 1440                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1441             }
 1442         }
 1443         else if (FAST_IS_REG(dst)) {
 1444             inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1445             FAIL_IF(!inst);
 1446             *inst = op_rm;
 1447         }
 1448         else if (FAST_IS_REG(src2)) {
 1449             /* Special exception for sljit_emit_op_flags. */
 1450             inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1451             FAIL_IF(!inst);
 1452             *inst = op_mr;
 1453         }
 1454         else {
 1455             EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
 1456             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1457             FAIL_IF(!inst);
 1458             *inst = op_mr;
 1459         }
 1460         return SLJIT_SUCCESS;
 1461     }
 1462 
 1463     /* Only for cumulative operations. */
 1464     if (dst == src2 && dstw == src2w) {
 1465         if (src1 & SLJIT_IMM) {
 1466 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1467             if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1468 #else
 1469             if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
 1470 #endif
 1471                 BINARY_EAX_IMM(op_eax_imm, src1w);
 1472             }
 1473             else {
 1474                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
 1475             }
 1476         }
 1477         else if (FAST_IS_REG(dst)) {
 1478             inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
 1479             FAIL_IF(!inst);
 1480             *inst = op_rm;
 1481         }
 1482         else if (FAST_IS_REG(src1)) {
 1483             inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
 1484             FAIL_IF(!inst);
 1485             *inst = op_mr;
 1486         }
 1487         else {
 1488             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1489             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1490             FAIL_IF(!inst);
 1491             *inst = op_mr;
 1492         }
 1493         return SLJIT_SUCCESS;
 1494     }
 1495 
 1496     /* General version. */
 1497     if (FAST_IS_REG(dst)) {
 1498         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1499         if (src2 & SLJIT_IMM) {
 1500             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1501         }
 1502         else {
 1503             inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1504             FAIL_IF(!inst);
 1505             *inst = op_rm;
 1506         }
 1507     }
 1508     else {
 1509         /* This version requires less memory writing. */
 1510         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1511         if (src2 & SLJIT_IMM) {
 1512             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1513         }
 1514         else {
 1515             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1516             FAIL_IF(!inst);
 1517             *inst = op_rm;
 1518         }
 1519         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1520     }
 1521 
 1522     return SLJIT_SUCCESS;
 1523 }
 1524 
 1525 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
 1526     sljit_u32 op_types,
 1527     sljit_s32 dst, sljit_sw dstw,
 1528     sljit_s32 src1, sljit_sw src1w,
 1529     sljit_s32 src2, sljit_sw src2w)
 1530 {
 1531     sljit_u8* inst;
 1532     sljit_u8 op_eax_imm = (op_types >> 24);
 1533     sljit_u8 op_rm = (op_types >> 16) & 0xff;
 1534     sljit_u8 op_mr = (op_types >> 8) & 0xff;
 1535     sljit_u8 op_imm = op_types & 0xff;
 1536 
 1537     if (dst == SLJIT_UNUSED) {
 1538         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1539         if (src2 & SLJIT_IMM) {
 1540             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1541         }
 1542         else {
 1543             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1544             FAIL_IF(!inst);
 1545             *inst = op_rm;
 1546         }
 1547         return SLJIT_SUCCESS;
 1548     }
 1549 
 1550     if (dst == src1 && dstw == src1w) {
 1551         if (src2 & SLJIT_IMM) {
 1552 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1553             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1554 #else
 1555             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 1556 #endif
 1557                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1558             }
 1559             else {
 1560                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1561             }
 1562         }
 1563         else if (FAST_IS_REG(dst)) {
 1564             inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1565             FAIL_IF(!inst);
 1566             *inst = op_rm;
 1567         }
 1568         else if (FAST_IS_REG(src2)) {
 1569             inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1570             FAIL_IF(!inst);
 1571             *inst = op_mr;
 1572         }
 1573         else {
 1574             EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
 1575             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1576             FAIL_IF(!inst);
 1577             *inst = op_mr;
 1578         }
 1579         return SLJIT_SUCCESS;
 1580     }
 1581 
 1582     /* General version. */
 1583     if (FAST_IS_REG(dst) && dst != src2) {
 1584         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1585         if (src2 & SLJIT_IMM) {
 1586             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1587         }
 1588         else {
 1589             inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1590             FAIL_IF(!inst);
 1591             *inst = op_rm;
 1592         }
 1593     }
 1594     else {
 1595         /* This version requires less memory writing. */
 1596         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1597         if (src2 & SLJIT_IMM) {
 1598             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1599         }
 1600         else {
 1601             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1602             FAIL_IF(!inst);
 1603             *inst = op_rm;
 1604         }
 1605         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1606     }
 1607 
 1608     return SLJIT_SUCCESS;
 1609 }
 1610 
 1611 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
 1612     sljit_s32 dst, sljit_sw dstw,
 1613     sljit_s32 src1, sljit_sw src1w,
 1614     sljit_s32 src2, sljit_sw src2w)
 1615 {
 1616     sljit_u8* inst;
 1617     sljit_s32 dst_r;
 1618 
 1619     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
 1620 
 1621     /* Register destination. */
 1622     if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
 1623         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1624         FAIL_IF(!inst);
 1625         *inst++ = GROUP_0F;
 1626         *inst = IMUL_r_rm;
 1627     }
 1628     else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
 1629         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
 1630         FAIL_IF(!inst);
 1631         *inst++ = GROUP_0F;
 1632         *inst = IMUL_r_rm;
 1633     }
 1634     else if (src1 & SLJIT_IMM) {
 1635         if (src2 & SLJIT_IMM) {
 1636             EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
 1637             src2 = dst_r;
 1638             src2w = 0;
 1639         }
 1640 
 1641         if (src1w <= 127 && src1w >= -128) {
 1642             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1643             FAIL_IF(!inst);
 1644             *inst = IMUL_r_rm_i8;
 1645             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 1646             FAIL_IF(!inst);
 1647             INC_SIZE(1);
 1648             *inst = (sljit_s8)src1w;
 1649         }
 1650 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1651         else {
 1652             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1653             FAIL_IF(!inst);
 1654             *inst = IMUL_r_rm_i32;
 1655             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1656             FAIL_IF(!inst);
 1657             INC_SIZE(4);
 1658             sljit_unaligned_store_sw(inst, src1w);
 1659         }
 1660 #else
 1661         else if (IS_HALFWORD(src1w)) {
 1662             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1663             FAIL_IF(!inst);
 1664             *inst = IMUL_r_rm_i32;
 1665             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1666             FAIL_IF(!inst);
 1667             INC_SIZE(4);
 1668             sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
 1669         }
 1670         else {
 1671             if (dst_r != src2)
 1672                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
 1673             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
 1674             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1675             FAIL_IF(!inst);
 1676             *inst++ = GROUP_0F;
 1677             *inst = IMUL_r_rm;
 1678         }
 1679 #endif
 1680     }
 1681     else if (src2 & SLJIT_IMM) {
 1682         /* Note: src1 is NOT immediate. */
 1683 
 1684         if (src2w <= 127 && src2w >= -128) {
 1685             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1686             FAIL_IF(!inst);
 1687             *inst = IMUL_r_rm_i8;
 1688             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 1689             FAIL_IF(!inst);
 1690             INC_SIZE(1);
 1691             *inst = (sljit_s8)src2w;
 1692         }
 1693 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1694         else {
 1695             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1696             FAIL_IF(!inst);
 1697             *inst = IMUL_r_rm_i32;
 1698             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1699             FAIL_IF(!inst);
 1700             INC_SIZE(4);
 1701             sljit_unaligned_store_sw(inst, src2w);
 1702         }
 1703 #else
 1704         else if (IS_HALFWORD(src2w)) {
 1705             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1706             FAIL_IF(!inst);
 1707             *inst = IMUL_r_rm_i32;
 1708             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1709             FAIL_IF(!inst);
 1710             INC_SIZE(4);
 1711             sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
 1712         }
 1713         else {
 1714             if (dst_r != src1)
 1715                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1716             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1717             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1718             FAIL_IF(!inst);
 1719             *inst++ = GROUP_0F;
 1720             *inst = IMUL_r_rm;
 1721         }
 1722 #endif
 1723     }
 1724     else {
 1725         /* Neither argument is immediate. */
 1726         if (ADDRESSING_DEPENDS_ON(src2, dst_r))
 1727             dst_r = TMP_REG1;
 1728         EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1729         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1730         FAIL_IF(!inst);
 1731         *inst++ = GROUP_0F;
 1732         *inst = IMUL_r_rm;
 1733     }
 1734 
 1735     if (dst & SLJIT_MEM)
 1736         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1737 
 1738     return SLJIT_SUCCESS;
 1739 }
 1740 
 1741 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
 1742     sljit_s32 dst, sljit_sw dstw,
 1743     sljit_s32 src1, sljit_sw src1w,
 1744     sljit_s32 src2, sljit_sw src2w)
 1745 {
 1746     sljit_u8* inst;
 1747     sljit_s32 dst_r, done = 0;
 1748 
 1749     /* These cases better be left to handled by normal way. */
 1750     if (dst == src1 && dstw == src1w)
 1751         return SLJIT_ERR_UNSUPPORTED;
 1752     if (dst == src2 && dstw == src2w)
 1753         return SLJIT_ERR_UNSUPPORTED;
 1754 
 1755     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1756 
 1757     if (FAST_IS_REG(src1)) {
 1758         if (FAST_IS_REG(src2)) {
 1759             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
 1760             FAIL_IF(!inst);
 1761             *inst = LEA_r_m;
 1762             done = 1;
 1763         }
 1764 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1765         if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1766             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
 1767 #else
 1768         if (src2 & SLJIT_IMM) {
 1769             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
 1770 #endif
 1771             FAIL_IF(!inst);
 1772             *inst = LEA_r_m;
 1773             done = 1;
 1774         }
 1775     }
 1776     else if (FAST_IS_REG(src2)) {
 1777 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1778         if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1779             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
 1780 #else
 1781         if (src1 & SLJIT_IMM) {
 1782             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
 1783 #endif
 1784             FAIL_IF(!inst);
 1785             *inst = LEA_r_m;
 1786             done = 1;
 1787         }
 1788     }
 1789 
 1790     if (done) {
 1791         if (dst_r == TMP_REG1)
 1792             return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 1793         return SLJIT_SUCCESS;
 1794     }
 1795     return SLJIT_ERR_UNSUPPORTED;
 1796 }
 1797 
 1798 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
 1799     sljit_s32 src1, sljit_sw src1w,
 1800     sljit_s32 src2, sljit_sw src2w)
 1801 {
 1802     sljit_u8* inst;
 1803 
 1804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1805     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1806 #else
 1807     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1808 #endif
 1809         BINARY_EAX_IMM(CMP_EAX_i32, src2w);
 1810         return SLJIT_SUCCESS;
 1811     }
 1812 
 1813     if (FAST_IS_REG(src1)) {
 1814         if (src2 & SLJIT_IMM) {
 1815             BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
 1816         }
 1817         else {
 1818             inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1819             FAIL_IF(!inst);
 1820             *inst = CMP_r_rm;
 1821         }
 1822         return SLJIT_SUCCESS;
 1823     }
 1824 
 1825     if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
 1826         inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1827         FAIL_IF(!inst);
 1828         *inst = CMP_rm_r;
 1829         return SLJIT_SUCCESS;
 1830     }
 1831 
 1832     if (src2 & SLJIT_IMM) {
 1833         if (src1 & SLJIT_IMM) {
 1834             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1835             src1 = TMP_REG1;
 1836             src1w = 0;
 1837         }
 1838         BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
 1839     }
 1840     else {
 1841         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1842         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1843         FAIL_IF(!inst);
 1844         *inst = CMP_r_rm;
 1845     }
 1846     return SLJIT_SUCCESS;
 1847 }
 1848 
 1849 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
 1850     sljit_s32 src1, sljit_sw src1w,
 1851     sljit_s32 src2, sljit_sw src2w)
 1852 {
 1853     sljit_u8* inst;
 1854 
 1855 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1856     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1857 #else
 1858     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1859 #endif
 1860         BINARY_EAX_IMM(TEST_EAX_i32, src2w);
 1861         return SLJIT_SUCCESS;
 1862     }
 1863 
 1864 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1865     if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1866 #else
 1867     if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 1868 #endif
 1869         BINARY_EAX_IMM(TEST_EAX_i32, src1w);
 1870         return SLJIT_SUCCESS;
 1871     }
 1872 
 1873     if (!(src1 & SLJIT_IMM)) {
 1874         if (src2 & SLJIT_IMM) {
 1875 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1876             if (IS_HALFWORD(src2w) || compiler->mode32) {
 1877                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
 1878                 FAIL_IF(!inst);
 1879                 *inst = GROUP_F7;
 1880             }
 1881             else {
 1882                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
 1883                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
 1884                 FAIL_IF(!inst);
 1885                 *inst = TEST_rm_r;
 1886             }
 1887 #else
 1888             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
 1889             FAIL_IF(!inst);
 1890             *inst = GROUP_F7;
 1891 #endif
 1892             return SLJIT_SUCCESS;
 1893         }
 1894         else if (FAST_IS_REG(src1)) {
 1895             inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1896             FAIL_IF(!inst);
 1897             *inst = TEST_rm_r;
 1898             return SLJIT_SUCCESS;
 1899         }
 1900     }
 1901 
 1902     if (!(src2 & SLJIT_IMM)) {
 1903         if (src1 & SLJIT_IMM) {
 1904 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1905             if (IS_HALFWORD(src1w) || compiler->mode32) {
 1906                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
 1907                 FAIL_IF(!inst);
 1908                 *inst = GROUP_F7;
 1909             }
 1910             else {
 1911                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
 1912                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1913                 FAIL_IF(!inst);
 1914                 *inst = TEST_rm_r;
 1915             }
 1916 #else
 1917             inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
 1918             FAIL_IF(!inst);
 1919             *inst = GROUP_F7;
 1920 #endif
 1921             return SLJIT_SUCCESS;
 1922         }
 1923         else if (FAST_IS_REG(src2)) {
 1924             inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1925             FAIL_IF(!inst);
 1926             *inst = TEST_rm_r;
 1927             return SLJIT_SUCCESS;
 1928         }
 1929     }
 1930 
 1931     EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1932     if (src2 & SLJIT_IMM) {
 1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1934         if (IS_HALFWORD(src2w) || compiler->mode32) {
 1935             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
 1936             FAIL_IF(!inst);
 1937             *inst = GROUP_F7;
 1938         }
 1939         else {
 1940             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1941             inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
 1942             FAIL_IF(!inst);
 1943             *inst = TEST_rm_r;
 1944         }
 1945 #else
 1946         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
 1947         FAIL_IF(!inst);
 1948         *inst = GROUP_F7;
 1949 #endif
 1950     }
 1951     else {
 1952         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1953         FAIL_IF(!inst);
 1954         *inst = TEST_rm_r;
 1955     }
 1956     return SLJIT_SUCCESS;
 1957 }
 1958 
 1959 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
 1960     sljit_u8 mode,
 1961     sljit_s32 dst, sljit_sw dstw,
 1962     sljit_s32 src1, sljit_sw src1w,
 1963     sljit_s32 src2, sljit_sw src2w)
 1964 {
 1965     sljit_u8* inst;
 1966 
 1967     if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 1968         if (dst == src1 && dstw == src1w) {
 1969             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
 1970             FAIL_IF(!inst);
 1971             *inst |= mode;
 1972             return SLJIT_SUCCESS;
 1973         }
 1974         if (dst == SLJIT_UNUSED) {
 1975             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1976             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
 1977             FAIL_IF(!inst);
 1978             *inst |= mode;
 1979             return SLJIT_SUCCESS;
 1980         }
 1981         if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
 1982             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1983             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 1984             FAIL_IF(!inst);
 1985             *inst |= mode;
 1986             EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 1987             return SLJIT_SUCCESS;
 1988         }
 1989         if (FAST_IS_REG(dst)) {
 1990             EMIT_MOV(compiler, dst, 0, src1, src1w);
 1991             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
 1992             FAIL_IF(!inst);
 1993             *inst |= mode;
 1994             return SLJIT_SUCCESS;
 1995         }
 1996 
 1997         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1998         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
 1999         FAIL_IF(!inst);
 2000         *inst |= mode;
 2001         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 2002         return SLJIT_SUCCESS;
 2003     }
 2004 
 2005     if (dst == SLJIT_PREF_SHIFT_REG) {
 2006         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2007         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2008         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2009         FAIL_IF(!inst);
 2010         *inst |= mode;
 2011         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2012     }
 2013     else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 2014         if (src1 != dst)
 2015             EMIT_MOV(compiler, dst, 0, src1, src1w);
 2016         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
 2017         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2018         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 2019         FAIL_IF(!inst);
 2020         *inst |= mode;
 2021         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2022     }
 2023     else {
 2024         /* This case is complex since ecx itself may be used for
 2025            addressing, and this case must be supported as well. */
 2026         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2027 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2028         EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
 2029         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2030         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2031         FAIL_IF(!inst);
 2032         *inst |= mode;
 2033         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
 2034 #else
 2035         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 2036         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2037         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2038         FAIL_IF(!inst);
 2039         *inst |= mode;
 2040         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 2041 #endif
 2042         if (dst != SLJIT_UNUSED)
 2043             return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2044     }
 2045 
 2046     return SLJIT_SUCCESS;
 2047 }
 2048 
 2049 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
 2050     sljit_u8 mode, sljit_s32 set_flags,
 2051     sljit_s32 dst, sljit_sw dstw,
 2052     sljit_s32 src1, sljit_sw src1w,
 2053     sljit_s32 src2, sljit_sw src2w)
 2054 {
 2055     /* The CPU does not set flags if the shift count is 0. */
 2056     if (src2 & SLJIT_IMM) {
 2057 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2058         if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
 2059             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2060 #else
 2061         if ((src2w & 0x1f) != 0)
 2062             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2063 #endif
 2064         if (!set_flags)
 2065             return emit_mov(compiler, dst, dstw, src1, src1w);
 2066         /* OR dst, src, 0 */
 2067         return emit_cum_binary(compiler, BINARY_OPCODE(OR),
 2068             dst, dstw, src1, src1w, SLJIT_IMM, 0);
 2069     }
 2070 
 2071     if (!set_flags)
 2072         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2073 
 2074     if (!FAST_IS_REG(dst))
 2075         FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
 2076 
 2077     FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
 2078 
 2079     if (FAST_IS_REG(dst))
 2080         return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
 2081     return SLJIT_SUCCESS;
 2082 }
 2083 
 2084 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 2085     sljit_s32 dst, sljit_sw dstw,
 2086     sljit_s32 src1, sljit_sw src1w,
 2087     sljit_s32 src2, sljit_sw src2w)
 2088 {
 2089     CHECK_ERROR();
 2090     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 2091     ADJUST_LOCAL_OFFSET(dst, dstw);
 2092     ADJUST_LOCAL_OFFSET(src1, src1w);
 2093     ADJUST_LOCAL_OFFSET(src2, src2w);
 2094 
 2095     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2096     CHECK_EXTRA_REGS(src1, src1w, (void)0);
 2097     CHECK_EXTRA_REGS(src2, src2w, (void)0);
 2098 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2099     compiler->mode32 = op & SLJIT_I32_OP;
 2100 #endif
 2101 
 2102     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
 2103         return SLJIT_SUCCESS;
 2104 
 2105     switch (GET_OPCODE(op)) {
 2106     case SLJIT_ADD:
 2107         if (!HAS_FLAGS(op)) {
 2108             if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
 2109                 return compiler->error;
 2110         }
 2111         return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
 2112             dst, dstw, src1, src1w, src2, src2w);
 2113     case SLJIT_ADDC:
 2114         return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
 2115             dst, dstw, src1, src1w, src2, src2w);
 2116     case SLJIT_SUB:
 2117         if (!HAS_FLAGS(op)) {
 2118             if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
 2119                 return compiler->error;
 2120         }
 2121 
 2122         if (dst == SLJIT_UNUSED)
 2123             return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
 2124         return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
 2125             dst, dstw, src1, src1w, src2, src2w);
 2126     case SLJIT_SUBC:
 2127         return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
 2128             dst, dstw, src1, src1w, src2, src2w);
 2129     case SLJIT_MUL:
 2130         return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
 2131     case SLJIT_AND:
 2132         if (dst == SLJIT_UNUSED)
 2133             return emit_test_binary(compiler, src1, src1w, src2, src2w);
 2134         return emit_cum_binary(compiler, BINARY_OPCODE(AND),
 2135             dst, dstw, src1, src1w, src2, src2w);
 2136     case SLJIT_OR:
 2137         return emit_cum_binary(compiler, BINARY_OPCODE(OR),
 2138             dst, dstw, src1, src1w, src2, src2w);
 2139     case SLJIT_XOR:
 2140         return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
 2141             dst, dstw, src1, src1w, src2, src2w);
 2142     case SLJIT_SHL:
 2143         return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
 2144             dst, dstw, src1, src1w, src2, src2w);
 2145     case SLJIT_LSHR:
 2146         return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
 2147             dst, dstw, src1, src1w, src2, src2w);
 2148     case SLJIT_ASHR:
 2149         return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
 2150             dst, dstw, src1, src1w, src2, src2w);
 2151     }
 2152 
 2153     return SLJIT_SUCCESS;
 2154 }
 2155 
 2156 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 2157 {
 2158     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 2159 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2160     if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
 2161         return -1;
 2162 #endif
 2163     return reg_map[reg];
 2164 }
 2165 
 2166 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 2167 {
 2168     CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
 2169 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2170     return reg;
 2171 #else
 2172     return freg_map[reg];
 2173 #endif
 2174 }
 2175 
 2176 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
 2177     void *instruction, sljit_s32 size)
 2178 {
 2179     sljit_u8 *inst;
 2180 
 2181     CHECK_ERROR();
 2182     CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 2183 
 2184     inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 2185     FAIL_IF(!inst);
 2186     INC_SIZE(size);
 2187     SLJIT_MEMCPY(inst, instruction, size);
 2188     return SLJIT_SUCCESS;
 2189 }
 2190 
 2191 /* --------------------------------------------------------------------- */
 2192 /*  Floating point operators                                             */
 2193 /* --------------------------------------------------------------------- */
 2194 
 2195 /* Alignment(3) + 4 * 16 bytes. */
 2196 static sljit_s32 sse2_data[3 + (4 * 4)];
 2197 static sljit_s32 *sse2_buffer;
 2198 
 2199 static void init_compiler(void)
 2200 {
 2201     /* Align to 16 bytes. */
 2202     sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
 2203 
 2204     /* Single precision constants (each constant is 16 byte long). */
 2205     sse2_buffer[0] = 0x80000000;
 2206     sse2_buffer[4] = 0x7fffffff;
 2207     /* Double precision constants (each constant is 16 byte long). */
 2208     sse2_buffer[8] = 0;
 2209     sse2_buffer[9] = 0x80000000;
 2210     sse2_buffer[12] = 0xffffffff;
 2211     sse2_buffer[13] = 0x7fffffff;
 2212 }
 2213 
 2214 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
 2215     sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 2216 {
 2217     sljit_u8 *inst;
 2218 
 2219     inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2220     FAIL_IF(!inst);
 2221     *inst++ = GROUP_0F;
 2222     *inst = opcode;
 2223     return SLJIT_SUCCESS;
 2224 }
 2225 
 2226 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
 2227     sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 2228 {
 2229     sljit_u8 *inst;
 2230 
 2231     inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2232     FAIL_IF(!inst);
 2233     *inst++ = GROUP_0F;
 2234     *inst = opcode;
 2235     return SLJIT_SUCCESS;
 2236 }
 2237 
 2238 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
 2239     sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
 2240 {
 2241     return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
 2242 }
 2243 
 2244 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
 2245     sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
 2246 {
 2247     return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
 2248 }
 2249 
 2250 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
 2251     sljit_s32 dst, sljit_sw dstw,
 2252     sljit_s32 src, sljit_sw srcw)
 2253 {
 2254     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2255     sljit_u8 *inst;
 2256 
 2257 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2258     if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
 2259         compiler->mode32 = 0;
 2260 #endif
 2261 
 2262     inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
 2263     FAIL_IF(!inst);
 2264     *inst++ = GROUP_0F;
 2265     *inst = CVTTSD2SI_r_xm;
 2266 
 2267     if (dst & SLJIT_MEM)
 2268         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2269     return SLJIT_SUCCESS;
 2270 }
 2271 
 2272 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
 2273     sljit_s32 dst, sljit_sw dstw,
 2274     sljit_s32 src, sljit_sw srcw)
 2275 {
 2276     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
 2277     sljit_u8 *inst;
 2278 
 2279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2280     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
 2281         compiler->mode32 = 0;
 2282 #endif
 2283 
 2284     if (src & SLJIT_IMM) {
 2285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2286         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 2287             srcw = (sljit_s32)srcw;
 2288 #endif
 2289         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 2290         src = TMP_REG1;
 2291         srcw = 0;
 2292     }
 2293 
 2294     inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
 2295     FAIL_IF(!inst);
 2296     *inst++ = GROUP_0F;
 2297     *inst = CVTSI2SD_x_rm;
 2298 
 2299 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2300     compiler->mode32 = 1;
 2301 #endif
 2302     if (dst_r == TMP_FREG)
 2303         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2304     return SLJIT_SUCCESS;
 2305 }
 2306 
 2307 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
 2308     sljit_s32 src1, sljit_sw src1w,
 2309     sljit_s32 src2, sljit_sw src2w)
 2310 {
 2311     if (!FAST_IS_REG(src1)) {
 2312         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2313         src1 = TMP_FREG;
 2314     }
 2315 
 2316     return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
 2317 }
 2318 
 2319 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 2320     sljit_s32 dst, sljit_sw dstw,
 2321     sljit_s32 src, sljit_sw srcw)
 2322 {
 2323     sljit_s32 dst_r;
 2324 
 2325 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2326     compiler->mode32 = 1;
 2327 #endif
 2328 
 2329     CHECK_ERROR();
 2330     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 2331 
 2332     if (GET_OPCODE(op) == SLJIT_MOV_F64) {
 2333         if (FAST_IS_REG(dst))
 2334             return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
 2335         if (FAST_IS_REG(src))
 2336             return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
 2337         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
 2338         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2339     }
 2340 
 2341     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
 2342         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
 2343         if (FAST_IS_REG(src)) {
 2344             /* We overwrite the high bits of source. From SLJIT point of view,
 2345                this is not an issue.
 2346                Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
 2347             FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
 2348         }
 2349         else {
 2350             FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
 2351             src = TMP_FREG;
 2352         }
 2353 
 2354         FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
 2355         if (dst_r == TMP_FREG)
 2356             return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2357         return SLJIT_SUCCESS;
 2358     }
 2359 
 2360     if (FAST_IS_REG(dst)) {
 2361         dst_r = dst;
 2362         if (dst != src)
 2363             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
 2364     }
 2365     else {
 2366         dst_r = TMP_FREG;
 2367         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
 2368     }
 2369 
 2370     switch (GET_OPCODE(op)) {
 2371     case SLJIT_NEG_F64:
 2372         FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
 2373         break;
 2374 
 2375     case SLJIT_ABS_F64:
 2376         FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
 2377         break;
 2378     }
 2379 
 2380     if (dst_r == TMP_FREG)
 2381         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2382     return SLJIT_SUCCESS;
 2383 }
 2384 
 2385 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 2386     sljit_s32 dst, sljit_sw dstw,
 2387     sljit_s32 src1, sljit_sw src1w,
 2388     sljit_s32 src2, sljit_sw src2w)
 2389 {
 2390     sljit_s32 dst_r;
 2391 
 2392     CHECK_ERROR();
 2393     CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 2394     ADJUST_LOCAL_OFFSET(dst, dstw);
 2395     ADJUST_LOCAL_OFFSET(src1, src1w);
 2396     ADJUST_LOCAL_OFFSET(src2, src2w);
 2397 
 2398 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2399     compiler->mode32 = 1;
 2400 #endif
 2401 
 2402     if (FAST_IS_REG(dst)) {
 2403         dst_r = dst;
 2404         if (dst == src1)
 2405             ; /* Do nothing here. */
 2406         else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
 2407             /* Swap arguments. */
 2408             src2 = src1;
 2409             src2w = src1w;
 2410         }
 2411         else if (dst != src2)
 2412             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
 2413         else {
 2414             dst_r = TMP_FREG;
 2415             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2416         }
 2417     }
 2418     else {
 2419         dst_r = TMP_FREG;
 2420         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2421     }
 2422 
 2423     switch (GET_OPCODE(op)) {
 2424     case SLJIT_ADD_F64:
 2425         FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2426         break;
 2427 
 2428     case SLJIT_SUB_F64:
 2429         FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2430         break;
 2431 
 2432     case SLJIT_MUL_F64:
 2433         FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2434         break;
 2435 
 2436     case SLJIT_DIV_F64:
 2437         FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2438         break;
 2439     }
 2440 
 2441     if (dst_r == TMP_FREG)
 2442         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2443     return SLJIT_SUCCESS;
 2444 }
 2445 
 2446 /* --------------------------------------------------------------------- */
 2447 /*  Conditional instructions                                             */
 2448 /* --------------------------------------------------------------------- */
 2449 
 2450 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2451 {
 2452     sljit_u8 *inst;
 2453     struct sljit_label *label;
 2454 
 2455     CHECK_ERROR_PTR();
 2456     CHECK_PTR(check_sljit_emit_label(compiler));
 2457 
 2458     if (compiler->last_label && compiler->last_label->size == compiler->size)
 2459         return compiler->last_label;
 2460 
 2461     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2462     PTR_FAIL_IF(!label);
 2463     set_label(label, compiler);
 2464 
 2465     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2466     PTR_FAIL_IF(!inst);
 2467 
 2468     *inst++ = 0;
 2469     *inst++ = 0;
 2470 
 2471     return label;
 2472 }
 2473 
 2474 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 2475 {
 2476     sljit_u8 *inst;
 2477     struct sljit_jump *jump;
 2478 
 2479     CHECK_ERROR_PTR();
 2480     CHECK_PTR(check_sljit_emit_jump(compiler, type));
 2481 
 2482     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2483     PTR_FAIL_IF_NULL(jump);
 2484     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2485     type &= 0xff;
 2486 
 2487     /* Worst case size. */
 2488 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2489     compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
 2490 #else
 2491     compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
 2492 #endif
 2493 
 2494     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2495     PTR_FAIL_IF_NULL(inst);
 2496 
 2497     *inst++ = 0;
 2498     *inst++ = type + 2;
 2499     return jump;
 2500 }
 2501 
 2502 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 2503 {
 2504     sljit_u8 *inst;
 2505     struct sljit_jump *jump;
 2506 
 2507     CHECK_ERROR();
 2508     CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 2509     ADJUST_LOCAL_OFFSET(src, srcw);
 2510 
 2511     CHECK_EXTRA_REGS(src, srcw, (void)0);
 2512 
 2513     if (src == SLJIT_IMM) {
 2514         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2515         FAIL_IF_NULL(jump);
 2516         set_jump(jump, compiler, JUMP_ADDR);
 2517         jump->u.target = srcw;
 2518 
 2519         /* Worst case size. */
 2520 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2521         compiler->size += 5;
 2522 #else
 2523         compiler->size += 10 + 3;
 2524 #endif
 2525 
 2526         inst = (sljit_u8*)ensure_buf(compiler, 2);
 2527         FAIL_IF_NULL(inst);
 2528 
 2529         *inst++ = 0;
 2530         *inst++ = type + 2;
 2531     }
 2532     else {
 2533 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2534         /* REX_W is not necessary (src is not immediate). */
 2535         compiler->mode32 = 1;
 2536 #endif
 2537         inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2538         FAIL_IF(!inst);
 2539         *inst++ = GROUP_FF;
 2540         *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
 2541     }
 2542     return SLJIT_SUCCESS;
 2543 }
 2544 
 2545 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
 2546     sljit_s32 dst, sljit_sw dstw,
 2547     sljit_s32 type)
 2548 {
 2549     sljit_u8 *inst;
 2550     sljit_u8 cond_set = 0;
 2551 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2552     sljit_s32 reg;
 2553 #endif
 2554     /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
 2555     sljit_s32 dst_save = dst;
 2556     sljit_sw dstw_save = dstw;
 2557 
 2558     CHECK_ERROR();
 2559     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 2560 
 2561     ADJUST_LOCAL_OFFSET(dst, dstw);
 2562     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2563 
 2564     type &= 0xff;
 2565     /* setcc = jcc + 0x10. */
 2566     cond_set = get_jump_code(type) + 0x10;
 2567 
 2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2569     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
 2570         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
 2571         FAIL_IF(!inst);
 2572         INC_SIZE(4 + 3);
 2573         /* Set low register to conditional flag. */
 2574         *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
 2575         *inst++ = GROUP_0F;
 2576         *inst++ = cond_set;
 2577         *inst++ = MOD_REG | reg_lmap[TMP_REG1];
 2578         *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
 2579         *inst++ = OR_rm8_r8;
 2580         *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
 2581         return SLJIT_SUCCESS;
 2582     }
 2583 
 2584     reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
 2585 
 2586     inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
 2587     FAIL_IF(!inst);
 2588     INC_SIZE(4 + 4);
 2589     /* Set low register to conditional flag. */
 2590     *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
 2591     *inst++ = GROUP_0F;
 2592     *inst++ = cond_set;
 2593     *inst++ = MOD_REG | reg_lmap[reg];
 2594     *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
 2595     /* The movzx instruction does not affect flags. */
 2596     *inst++ = GROUP_0F;
 2597     *inst++ = MOVZX_r_rm8;
 2598     *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
 2599 
 2600     if (reg != TMP_REG1)
 2601         return SLJIT_SUCCESS;
 2602 
 2603     if (GET_OPCODE(op) < SLJIT_ADD) {
 2604         compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
 2605         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2606     }
 2607 
 2608 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2609         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2610     compiler->skip_checks = 1;
 2611 #endif
 2612     return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 2613 
 2614 #else
 2615     /* The SLJIT_CONFIG_X86_32 code path starts here. */
 2616     if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
 2617         if (reg_map[dst] <= 4) {
 2618             /* Low byte is accessible. */
 2619             inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
 2620             FAIL_IF(!inst);
 2621             INC_SIZE(3 + 3);
 2622             /* Set low byte to conditional flag. */
 2623             *inst++ = GROUP_0F;
 2624             *inst++ = cond_set;
 2625             *inst++ = MOD_REG | reg_map[dst];
 2626 
 2627             *inst++ = GROUP_0F;
 2628             *inst++ = MOVZX_r_rm8;
 2629             *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
 2630             return SLJIT_SUCCESS;
 2631         }
 2632 
 2633         /* Low byte is not accessible. */
 2634         if (cpu_has_cmov == -1)
 2635             get_cpu_features();
 2636 
 2637         if (cpu_has_cmov) {
 2638             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
 2639             /* a xor reg, reg operation would overwrite the flags. */
 2640             EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
 2641 
 2642             inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
 2643             FAIL_IF(!inst);
 2644             INC_SIZE(3);
 2645 
 2646             *inst++ = GROUP_0F;
 2647             /* cmovcc = setcc - 0x50. */
 2648             *inst++ = cond_set - 0x50;
 2649             *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
 2650             return SLJIT_SUCCESS;
 2651         }
 2652 
 2653         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
 2654         FAIL_IF(!inst);
 2655         INC_SIZE(1 + 3 + 3 + 1);
 2656         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2657         /* Set al to conditional flag. */
 2658         *inst++ = GROUP_0F;
 2659         *inst++ = cond_set;
 2660         *inst++ = MOD_REG | 0 /* eax */;
 2661 
 2662         *inst++ = GROUP_0F;
 2663         *inst++ = MOVZX_r_rm8;
 2664         *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
 2665         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2666         return SLJIT_SUCCESS;
 2667     }
 2668 
 2669     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
 2670         SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
 2671 
 2672         if (dst != SLJIT_R0) {
 2673             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
 2674             FAIL_IF(!inst);
 2675             INC_SIZE(1 + 3 + 2 + 1);
 2676             /* Set low register to conditional flag. */
 2677             *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2678             *inst++ = GROUP_0F;
 2679             *inst++ = cond_set;
 2680             *inst++ = MOD_REG | 0 /* eax */;
 2681             *inst++ = OR_rm8_r8;
 2682             *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
 2683             *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2684         }
 2685         else {
 2686             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
 2687             FAIL_IF(!inst);
 2688             INC_SIZE(2 + 3 + 2 + 2);
 2689             /* Set low register to conditional flag. */
 2690             *inst++ = XCHG_r_rm;
 2691             *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
 2692             *inst++ = GROUP_0F;
 2693             *inst++ = cond_set;
 2694             *inst++ = MOD_REG | 1 /* ecx */;
 2695             *inst++ = OR_rm8_r8;
 2696             *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
 2697             *inst++ = XCHG_r_rm;
 2698             *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
 2699         }
 2700         return SLJIT_SUCCESS;
 2701     }
 2702 
 2703     /* Set TMP_REG1 to the bit. */
 2704     inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
 2705     FAIL_IF(!inst);
 2706     INC_SIZE(1 + 3 + 3 + 1);
 2707     *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2708     /* Set al to conditional flag. */
 2709     *inst++ = GROUP_0F;
 2710     *inst++ = cond_set;
 2711     *inst++ = MOD_REG | 0 /* eax */;
 2712 
 2713     *inst++ = GROUP_0F;
 2714     *inst++ = MOVZX_r_rm8;
 2715     *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
 2716 
 2717     *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2718 
 2719     if (GET_OPCODE(op) < SLJIT_ADD)
 2720         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2721 
 2722 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2723         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2724     compiler->skip_checks = 1;
 2725 #endif
 2726     return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 2727 #endif /* SLJIT_CONFIG_X86_64 */
 2728 }
 2729 
 2730 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
 2731     sljit_s32 dst_reg,
 2732     sljit_s32 src, sljit_sw srcw)
 2733 {
 2734     sljit_u8* inst;
 2735 
 2736     CHECK_ERROR();
 2737     CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 2738 
 2739 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2740     dst_reg &= ~SLJIT_I32_OP;
 2741 
 2742     if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
 2743         return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
 2744 #else
 2745     if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
 2746         return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
 2747 #endif
 2748 
 2749     /* ADJUST_LOCAL_OFFSET is not needed. */
 2750     CHECK_EXTRA_REGS(src, srcw, (void)0);
 2751 
 2752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2753     compiler->mode32 = dst_reg & SLJIT_I32_OP;
 2754     dst_reg &= ~SLJIT_I32_OP;
 2755 #endif
 2756 
 2757     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 2758         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
 2759         src = TMP_REG1;
 2760         srcw = 0;
 2761     }
 2762 
 2763     inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
 2764     FAIL_IF(!inst);
 2765     *inst++ = GROUP_0F;
 2766     *inst = get_jump_code(type & 0xff) - 0x40;
 2767     return SLJIT_SUCCESS;
 2768 }
 2769 
 2770 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 2771 {
 2772     CHECK_ERROR();
 2773     CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
 2774     ADJUST_LOCAL_OFFSET(dst, dstw);
 2775 
 2776     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2777 
 2778 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2779     compiler->mode32 = 0;
 2780 #endif
 2781 
 2782     ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
 2783 
 2784 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2785     if (NOT_HALFWORD(offset)) {
 2786         FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
 2787 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2788         SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
 2789         return compiler->error;
 2790 #else
 2791         return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
 2792 #endif
 2793     }
 2794 #endif
 2795 
 2796     if (offset != 0)
 2797         return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
 2798     return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
 2799 }
 2800 
 2801 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 2802 {
 2803     sljit_u8 *inst;
 2804     struct sljit_const *const_;
 2805 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2806     sljit_s32 reg;
 2807 #endif
 2808 
 2809     CHECK_ERROR_PTR();
 2810     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 2811     ADJUST_LOCAL_OFFSET(dst, dstw);
 2812 
 2813     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2814 
 2815     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2816     PTR_FAIL_IF(!const_);
 2817     set_const(const_, compiler);
 2818 
 2819 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2820     compiler->mode32 = 0;
 2821     reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2822 
 2823     if (emit_load_imm64(compiler, reg, init_value))
 2824         return NULL;
 2825 #else
 2826     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
 2827         return NULL;
 2828 #endif
 2829 
 2830     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2831     PTR_FAIL_IF(!inst);
 2832 
 2833     *inst++ = 0;
 2834     *inst++ = 1;
 2835 
 2836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2837     if (dst & SLJIT_MEM)
 2838         if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
 2839             return NULL;
 2840 #endif
 2841 
 2842     return const_;
 2843 }
 2844 
 2845 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 2846 {
 2847     SLJIT_UNUSED_ARG(executable_offset);
 2848 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2849     sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
 2850 #else
 2851     sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
 2852 #endif
 2853 }
 2854 
 2855 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 2856 {
 2857     SLJIT_UNUSED_ARG(executable_offset);
 2858     sljit_unaligned_store_sw((void*)addr, new_constant);
 2859 }