"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.44/sljit/sljitNativeX86_common.c" (19 Nov 2019, 83951 Bytes) of package /linux/misc/pcre-8.44.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_common.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.43_vs_8.44.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
   28 {
   29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   30     return "x86" SLJIT_CPUINFO " ABI:fastcall";
   31 #else
   32     return "x86" SLJIT_CPUINFO;
   33 #endif
   34 }
   35 
   36 /*
   37    32b register indexes:
   38      0 - EAX
   39      1 - ECX
   40      2 - EDX
   41      3 - EBX
   42      4 - ESP
   43      5 - EBP
   44      6 - ESI
   45      7 - EDI
   46 */
   47 
   48 /*
   49    64b register indexes:
   50      0 - RAX
   51      1 - RCX
   52      2 - RDX
   53      3 - RBX
   54      4 - RSP
   55      5 - RBP
   56      6 - RSI
   57      7 - RDI
   58      8 - R8   - From now on REX prefix is required
   59      9 - R9
   60     10 - R10
   61     11 - R11
   62     12 - R12
   63     13 - R13
   64     14 - R14
   65     15 - R15
   66 */
   67 
   68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   69 
   70 /* Last register + 1. */
   71 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   72 
   73 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
   74     0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
   75 };
   76 
   77 #define CHECK_EXTRA_REGS(p, w, do) \
   78     if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
   79         if (p <= compiler->scratches) \
   80             w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
   81         else \
   82             w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
   83         p = SLJIT_MEM1(SLJIT_SP); \
   84         do; \
   85     }
   86 
   87 #else /* SLJIT_CONFIG_X86_32 */
   88 
   89 /* Last register + 1. */
   90 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   91 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
   92 
   93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
   94    Note: avoid to use r12 and r13 for memory addessing
   95    therefore r12 is better to be a higher saved register. */
   96 #ifndef _WIN64
   97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
   98 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
   99     0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
  100 };
  101 /* low-map. reg_map & 0x7. */
  102 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  103     0, 0, 6, 7, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
  104 };
  105 #else
  106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
  107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  108     0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
  109 };
  110 /* low-map. reg_map & 0x7. */
  111 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  112     0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
  113 };
  114 #endif
  115 
  116 /* Args: xmm0-xmm3 */
  117 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  118     4, 0, 1, 2, 3, 5, 6
  119 };
  120 /* low-map. freg_map & 0x7. */
  121 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  122     4, 0, 1, 2, 3, 5, 6
  123 };
  124 
  125 #define REX_W       0x48
  126 #define REX_R       0x44
  127 #define REX_X       0x42
  128 #define REX_B       0x41
  129 #define REX     0x40
  130 
  131 #ifndef _WIN64
  132 #define HALFWORD_MAX 0x7fffffffl
  133 #define HALFWORD_MIN -0x80000000l
  134 #else
  135 #define HALFWORD_MAX 0x7fffffffll
  136 #define HALFWORD_MIN -0x80000000ll
  137 #endif
  138 
  139 #define IS_HALFWORD(x)      ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
  140 #define NOT_HALFWORD(x)     ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
  141 
  142 #define CHECK_EXTRA_REGS(p, w, do)
  143 
  144 #endif /* SLJIT_CONFIG_X86_32 */
  145 
  146 #define TMP_FREG    (0)
  147 
  148 /* Size flags for emit_x86_instruction: */
  149 #define EX86_BIN_INS        0x0010
  150 #define EX86_SHIFT_INS      0x0020
  151 #define EX86_REX        0x0040
  152 #define EX86_NO_REXW        0x0080
  153 #define EX86_BYTE_ARG       0x0100
  154 #define EX86_HALF_ARG       0x0200
  155 #define EX86_PREF_66        0x0400
  156 #define EX86_PREF_F2        0x0800
  157 #define EX86_PREF_F3        0x1000
  158 #define EX86_SSE2_OP1       0x2000
  159 #define EX86_SSE2_OP2       0x4000
  160 #define EX86_SSE2       (EX86_SSE2_OP1 | EX86_SSE2_OP2)
  161 
  162 /* --------------------------------------------------------------------- */
  163 /*  Instrucion forms                                                     */
  164 /* --------------------------------------------------------------------- */
  165 
  166 #define ADD     (/* BINARY */ 0 << 3)
  167 #define ADD_EAX_i32 0x05
  168 #define ADD_r_rm    0x03
  169 #define ADD_rm_r    0x01
  170 #define ADDSD_x_xm  0x58
  171 #define ADC     (/* BINARY */ 2 << 3)
  172 #define ADC_EAX_i32 0x15
  173 #define ADC_r_rm    0x13
  174 #define ADC_rm_r    0x11
  175 #define AND     (/* BINARY */ 4 << 3)
  176 #define AND_EAX_i32 0x25
  177 #define AND_r_rm    0x23
  178 #define AND_rm_r    0x21
  179 #define ANDPD_x_xm  0x54
  180 #define BSR_r_rm    (/* GROUP_0F */ 0xbd)
  181 #define CALL_i32    0xe8
  182 #define CALL_rm     (/* GROUP_FF */ 2 << 3)
  183 #define CDQ     0x99
  184 #define CMOVE_r_rm  (/* GROUP_0F */ 0x44)
  185 #define CMP     (/* BINARY */ 7 << 3)
  186 #define CMP_EAX_i32 0x3d
  187 #define CMP_r_rm    0x3b
  188 #define CMP_rm_r    0x39
  189 #define CVTPD2PS_x_xm   0x5a
  190 #define CVTSI2SD_x_rm   0x2a
  191 #define CVTTSD2SI_r_xm  0x2c
  192 #define DIV     (/* GROUP_F7 */ 6 << 3)
  193 #define DIVSD_x_xm  0x5e
  194 #define FSTPS       0xd9
  195 #define FSTPD       0xdd
  196 #define INT3        0xcc
  197 #define IDIV        (/* GROUP_F7 */ 7 << 3)
  198 #define IMUL        (/* GROUP_F7 */ 5 << 3)
  199 #define IMUL_r_rm   (/* GROUP_0F */ 0xaf)
  200 #define IMUL_r_rm_i8    0x6b
  201 #define IMUL_r_rm_i32   0x69
  202 #define JE_i8       0x74
  203 #define JNE_i8      0x75
  204 #define JMP_i8      0xeb
  205 #define JMP_i32     0xe9
  206 #define JMP_rm      (/* GROUP_FF */ 4 << 3)
  207 #define LEA_r_m     0x8d
  208 #define MOV_r_rm    0x8b
  209 #define MOV_r_i32   0xb8
  210 #define MOV_rm_r    0x89
  211 #define MOV_rm_i32  0xc7
  212 #define MOV_rm8_i8  0xc6
  213 #define MOV_rm8_r8  0x88
  214 #define MOVSD_x_xm  0x10
  215 #define MOVSD_xm_x  0x11
  216 #define MOVSXD_r_rm 0x63
  217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
  218 #define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
  219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
  220 #define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
  221 #define MUL     (/* GROUP_F7 */ 4 << 3)
  222 #define MULSD_x_xm  0x59
  223 #define NEG_rm      (/* GROUP_F7 */ 3 << 3)
  224 #define NOP     0x90
  225 #define NOT_rm      (/* GROUP_F7 */ 2 << 3)
  226 #define OR      (/* BINARY */ 1 << 3)
  227 #define OR_r_rm     0x0b
  228 #define OR_EAX_i32  0x0d
  229 #define OR_rm_r     0x09
  230 #define OR_rm8_r8   0x08
  231 #define POP_r       0x58
  232 #define POP_rm      0x8f
  233 #define POPF        0x9d
  234 #define PREFETCH    0x18
  235 #define PUSH_i32    0x68
  236 #define PUSH_r      0x50
  237 #define PUSH_rm     (/* GROUP_FF */ 6 << 3)
  238 #define PUSHF       0x9c
  239 #define RET_near    0xc3
  240 #define RET_i16     0xc2
  241 #define SBB     (/* BINARY */ 3 << 3)
  242 #define SBB_EAX_i32 0x1d
  243 #define SBB_r_rm    0x1b
  244 #define SBB_rm_r    0x19
  245 #define SAR     (/* SHIFT */ 7 << 3)
  246 #define SHL     (/* SHIFT */ 4 << 3)
  247 #define SHR     (/* SHIFT */ 5 << 3)
  248 #define SUB     (/* BINARY */ 5 << 3)
  249 #define SUB_EAX_i32 0x2d
  250 #define SUB_r_rm    0x2b
  251 #define SUB_rm_r    0x29
  252 #define SUBSD_x_xm  0x5c
  253 #define TEST_EAX_i32    0xa9
  254 #define TEST_rm_r   0x85
  255 #define UCOMISD_x_xm    0x2e
  256 #define UNPCKLPD_x_xm   0x14
  257 #define XCHG_EAX_r  0x90
  258 #define XCHG_r_rm   0x87
  259 #define XOR     (/* BINARY */ 6 << 3)
  260 #define XOR_EAX_i32 0x35
  261 #define XOR_r_rm    0x33
  262 #define XOR_rm_r    0x31
  263 #define XORPD_x_xm  0x57
  264 
  265 #define GROUP_0F    0x0f
  266 #define GROUP_F7    0xf7
  267 #define GROUP_FF    0xff
  268 #define GROUP_BINARY_81 0x81
  269 #define GROUP_BINARY_83 0x83
  270 #define GROUP_SHIFT_1   0xd1
  271 #define GROUP_SHIFT_N   0xc1
  272 #define GROUP_SHIFT_CL  0xd3
  273 
  274 #define MOD_REG     0xc0
  275 #define MOD_DISP8   0x40
  276 
  277 #define INC_SIZE(s)         (*inst++ = (s), compiler->size += (s))
  278 
  279 #define PUSH_REG(r)         (*inst++ = (PUSH_r + (r)))
  280 #define POP_REG(r)          (*inst++ = (POP_r + (r)))
  281 #define RET()               (*inst++ = (RET_near))
  282 #define RET_I16(n)          (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
  283 /* r32, r/m32 */
  284 #define MOV_RM(mod, reg, rm)        (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
  285 
  286 /* Multithreading does not affect these static variables, since they store
  287    built-in CPU features. Therefore they can be overwritten by different threads
  288    if they detect the CPU features in the same time. */
  289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  290 static sljit_s32 cpu_has_sse2 = -1;
  291 #endif
  292 static sljit_s32 cpu_has_cmov = -1;
  293 
  294 #ifdef _WIN32_WCE
  295 #include <cmnintrin.h>
  296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
  297 #include <intrin.h>
  298 #endif
  299 
  300 /******************************************************/
  301 /*    Unaligned-store functions                       */
  302 /******************************************************/
  303 
  304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
  305 {
  306     SLJIT_MEMCPY(addr, &value, sizeof(value));
  307 }
  308 
  309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
  310 {
  311     SLJIT_MEMCPY(addr, &value, sizeof(value));
  312 }
  313 
  314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
  315 {
  316     SLJIT_MEMCPY(addr, &value, sizeof(value));
  317 }
  318 
  319 /******************************************************/
  320 /*    Utility functions                               */
  321 /******************************************************/
  322 
  323 static void get_cpu_features(void)
  324 {
  325     sljit_u32 features;
  326 
  327 #if defined(_MSC_VER) && _MSC_VER >= 1400
  328 
  329     int CPUInfo[4];
  330     __cpuid(CPUInfo, 1);
  331     features = (sljit_u32)CPUInfo[3];
  332 
  333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
  334 
  335     /* AT&T syntax. */
  336     __asm__ (
  337         "movl $0x1, %%eax\n"
  338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  339         /* On x86-32, there is no red zone, so this
  340            should work (no need for a local variable). */
  341         "push %%ebx\n"
  342 #endif
  343         "cpuid\n"
  344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  345         "pop %%ebx\n"
  346 #endif
  347         "movl %%edx, %0\n"
  348         : "=g" (features)
  349         :
  350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  351         : "%eax", "%ecx", "%edx"
  352 #else
  353         : "%rax", "%rbx", "%rcx", "%rdx"
  354 #endif
  355     );
  356 
  357 #else /* _MSC_VER && _MSC_VER >= 1400 */
  358 
  359     /* Intel syntax. */
  360     __asm {
  361         mov eax, 1
  362         cpuid
  363         mov features, edx
  364     }
  365 
  366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
  367 
  368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  369     cpu_has_sse2 = (features >> 26) & 0x1;
  370 #endif
  371     cpu_has_cmov = (features >> 15) & 0x1;
  372 }
  373 
  374 static sljit_u8 get_jump_code(sljit_s32 type)
  375 {
  376     switch (type) {
  377     case SLJIT_EQUAL:
  378     case SLJIT_EQUAL_F64:
  379         return 0x84 /* je */;
  380 
  381     case SLJIT_NOT_EQUAL:
  382     case SLJIT_NOT_EQUAL_F64:
  383         return 0x85 /* jne */;
  384 
  385     case SLJIT_LESS:
  386     case SLJIT_LESS_F64:
  387         return 0x82 /* jc */;
  388 
  389     case SLJIT_GREATER_EQUAL:
  390     case SLJIT_GREATER_EQUAL_F64:
  391         return 0x83 /* jae */;
  392 
  393     case SLJIT_GREATER:
  394     case SLJIT_GREATER_F64:
  395         return 0x87 /* jnbe */;
  396 
  397     case SLJIT_LESS_EQUAL:
  398     case SLJIT_LESS_EQUAL_F64:
  399         return 0x86 /* jbe */;
  400 
  401     case SLJIT_SIG_LESS:
  402         return 0x8c /* jl */;
  403 
  404     case SLJIT_SIG_GREATER_EQUAL:
  405         return 0x8d /* jnl */;
  406 
  407     case SLJIT_SIG_GREATER:
  408         return 0x8f /* jnle */;
  409 
  410     case SLJIT_SIG_LESS_EQUAL:
  411         return 0x8e /* jle */;
  412 
  413     case SLJIT_OVERFLOW:
  414     case SLJIT_MUL_OVERFLOW:
  415         return 0x80 /* jo */;
  416 
  417     case SLJIT_NOT_OVERFLOW:
  418     case SLJIT_MUL_NOT_OVERFLOW:
  419         return 0x81 /* jno */;
  420 
  421     case SLJIT_UNORDERED_F64:
  422         return 0x8a /* jp */;
  423 
  424     case SLJIT_ORDERED_F64:
  425         return 0x8b /* jpo */;
  426     }
  427     return 0;
  428 }
  429 
  430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  431 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
  432 #else
  433 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
  434 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
  435 #endif
  436 
  437 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
  438 {
  439     sljit_s32 type = jump->flags >> TYPE_SHIFT;
  440     sljit_s32 short_jump;
  441     sljit_uw label_addr;
  442 
  443     if (jump->flags & JUMP_LABEL)
  444         label_addr = (sljit_uw)(code + jump->u.label->size);
  445     else
  446         label_addr = jump->u.target - executable_offset;
  447 
  448     short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
  449 
  450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  451     if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
  452         return generate_far_jump_code(jump, code_ptr);
  453 #endif
  454 
  455     if (type == SLJIT_JUMP) {
  456         if (short_jump)
  457             *code_ptr++ = JMP_i8;
  458         else
  459             *code_ptr++ = JMP_i32;
  460         jump->addr++;
  461     }
  462     else if (type >= SLJIT_FAST_CALL) {
  463         short_jump = 0;
  464         *code_ptr++ = CALL_i32;
  465         jump->addr++;
  466     }
  467     else if (short_jump) {
  468         *code_ptr++ = get_jump_code(type) - 0x10;
  469         jump->addr++;
  470     }
  471     else {
  472         *code_ptr++ = GROUP_0F;
  473         *code_ptr++ = get_jump_code(type);
  474         jump->addr += 2;
  475     }
  476 
  477     if (short_jump) {
  478         jump->flags |= PATCH_MB;
  479         code_ptr += sizeof(sljit_s8);
  480     } else {
  481         jump->flags |= PATCH_MW;
  482         code_ptr += sizeof(sljit_s32);
  483     }
  484 
  485     return code_ptr;
  486 }
  487 
  488 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  489 {
  490     struct sljit_memory_fragment *buf;
  491     sljit_u8 *code;
  492     sljit_u8 *code_ptr;
  493     sljit_u8 *buf_ptr;
  494     sljit_u8 *buf_end;
  495     sljit_u8 len;
  496     sljit_sw executable_offset;
  497     sljit_sw jump_addr;
  498 
  499     struct sljit_label *label;
  500     struct sljit_jump *jump;
  501     struct sljit_const *const_;
  502     struct sljit_put_label *put_label;
  503 
  504     CHECK_ERROR_PTR();
  505     CHECK_PTR(check_sljit_generate_code(compiler));
  506     reverse_buf(compiler);
  507 
  508     /* Second code generation pass. */
  509     code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
  510     PTR_FAIL_WITH_EXEC_IF(code);
  511     buf = compiler->buf;
  512 
  513     code_ptr = code;
  514     label = compiler->labels;
  515     jump = compiler->jumps;
  516     const_ = compiler->consts;
  517     put_label = compiler->put_labels;
  518     executable_offset = SLJIT_EXEC_OFFSET(code);
  519 
  520     do {
  521         buf_ptr = buf->memory;
  522         buf_end = buf_ptr + buf->used_size;
  523         do {
  524             len = *buf_ptr++;
  525             if (len > 0) {
  526                 /* The code is already generated. */
  527                 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
  528                 code_ptr += len;
  529                 buf_ptr += len;
  530             }
  531             else {
  532                 switch (*buf_ptr) {
  533                 case 0:
  534                     label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  535                     label->size = code_ptr - code;
  536                     label = label->next;
  537                     break;
  538                 case 1:
  539                     jump->addr = (sljit_uw)code_ptr;
  540                     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  541                         code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
  542                     else {
  543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  544                         code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
  545 #else
  546                         code_ptr = generate_far_jump_code(jump, code_ptr);
  547 #endif
  548                     }
  549                     jump = jump->next;
  550                     break;
  551                 case 2:
  552                     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
  553                     const_ = const_->next;
  554                     break;
  555                 default:
  556                     SLJIT_ASSERT(*buf_ptr == 3);
  557                     SLJIT_ASSERT(put_label->label);
  558                     put_label->addr = (sljit_uw)code_ptr;
  559 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  560                     code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
  561 #endif
  562                     put_label = put_label->next;
  563                     break;
  564                 }
  565                 buf_ptr++;
  566             }
  567         } while (buf_ptr < buf_end);
  568         SLJIT_ASSERT(buf_ptr == buf_end);
  569         buf = buf->next;
  570     } while (buf);
  571 
  572     SLJIT_ASSERT(!label);
  573     SLJIT_ASSERT(!jump);
  574     SLJIT_ASSERT(!const_);
  575     SLJIT_ASSERT(!put_label);
  576     SLJIT_ASSERT(code_ptr <= code + compiler->size);
  577 
  578     jump = compiler->jumps;
  579     while (jump) {
  580         jump_addr = jump->addr + executable_offset;
  581 
  582         if (jump->flags & PATCH_MB) {
  583             SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
  584             *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
  585         } else if (jump->flags & PATCH_MW) {
  586             if (jump->flags & JUMP_LABEL) {
  587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  588                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
  589 #else
  590                 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  591                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
  592 #endif
  593             }
  594             else {
  595 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  596                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
  597 #else
  598                 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  599                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
  600 #endif
  601             }
  602         }
  603 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  604         else if (jump->flags & PATCH_MD)
  605             sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
  606 #endif
  607 
  608         jump = jump->next;
  609     }
  610 
  611     put_label = compiler->put_labels;
  612     while (put_label) {
  613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  614         sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
  615 #else
  616         if (put_label->flags & PATCH_MD) {
  617             SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
  618             sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
  619         }
  620         else {
  621             SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
  622             sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
  623         }
  624 #endif
  625 
  626         put_label = put_label->next;
  627     }
  628 
  629     compiler->error = SLJIT_ERR_COMPILED;
  630     compiler->executable_offset = executable_offset;
  631     compiler->executable_size = code_ptr - code;
  632     return (void*)(code + executable_offset);
  633 }
  634 
  635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  636 {
  637     switch (feature_type) {
  638     case SLJIT_HAS_FPU:
  639 #ifdef SLJIT_IS_FPU_AVAILABLE
  640         return SLJIT_IS_FPU_AVAILABLE;
  641 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  642         if (cpu_has_sse2 == -1)
  643             get_cpu_features();
  644         return cpu_has_sse2;
  645 #else /* SLJIT_DETECT_SSE2 */
  646         return 1;
  647 #endif /* SLJIT_DETECT_SSE2 */
  648 
  649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  650     case SLJIT_HAS_VIRTUAL_REGISTERS:
  651         return 1;
  652 #endif
  653 
  654     case SLJIT_HAS_CLZ:
  655     case SLJIT_HAS_CMOV:
  656         if (cpu_has_cmov == -1)
  657             get_cpu_features();
  658         return cpu_has_cmov;
  659 
  660     case SLJIT_HAS_SSE2:
  661 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  662         if (cpu_has_sse2 == -1)
  663             get_cpu_features();
  664         return cpu_has_sse2;
  665 #else
  666         return 1;
  667 #endif
  668 
  669     default:
  670         return 0;
  671     }
  672 }
  673 
  674 /* --------------------------------------------------------------------- */
  675 /*  Operators                                                            */
  676 /* --------------------------------------------------------------------- */
  677 
  678 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
  679 
  680 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  681     sljit_u32 op_types,
  682     sljit_s32 dst, sljit_sw dstw,
  683     sljit_s32 src1, sljit_sw src1w,
  684     sljit_s32 src2, sljit_sw src2w);
  685 
  686 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  687     sljit_u32 op_types,
  688     sljit_s32 dst, sljit_sw dstw,
  689     sljit_s32 src1, sljit_sw src1w,
  690     sljit_s32 src2, sljit_sw src2w);
  691 
  692 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  693     sljit_s32 dst, sljit_sw dstw,
  694     sljit_s32 src, sljit_sw srcw);
  695 
  696 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  697     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  698 
  699 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  700     sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
  701 
  702 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  703     sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
  704 
  705 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  706 #include "sljitNativeX86_32.c"
  707 #else
  708 #include "sljitNativeX86_64.c"
  709 #endif
  710 
  711 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  712     sljit_s32 dst, sljit_sw dstw,
  713     sljit_s32 src, sljit_sw srcw)
  714 {
  715     sljit_u8* inst;
  716 
  717     SLJIT_ASSERT(dst != SLJIT_UNUSED);
  718 
  719     if (FAST_IS_REG(src)) {
  720         inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  721         FAIL_IF(!inst);
  722         *inst = MOV_rm_r;
  723         return SLJIT_SUCCESS;
  724     }
  725     if (src & SLJIT_IMM) {
  726         if (FAST_IS_REG(dst)) {
  727 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  728             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  729 #else
  730             if (!compiler->mode32) {
  731                 if (NOT_HALFWORD(srcw))
  732                     return emit_load_imm64(compiler, dst, srcw);
  733             }
  734             else
  735                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
  736 #endif
  737         }
  738 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  739         if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  740             /* Immediate to memory move. Only SLJIT_MOV operation copies
  741                an immediate directly into memory so TMP_REG1 can be used. */
  742             FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
  743             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  744             FAIL_IF(!inst);
  745             *inst = MOV_rm_r;
  746             return SLJIT_SUCCESS;
  747         }
  748 #endif
  749         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  750         FAIL_IF(!inst);
  751         *inst = MOV_rm_i32;
  752         return SLJIT_SUCCESS;
  753     }
  754     if (FAST_IS_REG(dst)) {
  755         inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  756         FAIL_IF(!inst);
  757         *inst = MOV_r_rm;
  758         return SLJIT_SUCCESS;
  759     }
  760 
  761     /* Memory to memory move. Only SLJIT_MOV operation copies
  762        data from memory to memory so TMP_REG1 can be used. */
  763     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  764     FAIL_IF(!inst);
  765     *inst = MOV_r_rm;
  766     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  767     FAIL_IF(!inst);
  768     *inst = MOV_rm_r;
  769     return SLJIT_SUCCESS;
  770 }
  771 
  772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
  773 {
  774     sljit_u8 *inst;
  775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  776     sljit_s32 size;
  777 #endif
  778 
  779     CHECK_ERROR();
  780     CHECK(check_sljit_emit_op0(compiler, op));
  781 
  782     switch (GET_OPCODE(op)) {
  783     case SLJIT_BREAKPOINT:
  784         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  785         FAIL_IF(!inst);
  786         INC_SIZE(1);
  787         *inst = INT3;
  788         break;
  789     case SLJIT_NOP:
  790         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  791         FAIL_IF(!inst);
  792         INC_SIZE(1);
  793         *inst = NOP;
  794         break;
  795     case SLJIT_LMUL_UW:
  796     case SLJIT_LMUL_SW:
  797     case SLJIT_DIVMOD_UW:
  798     case SLJIT_DIVMOD_SW:
  799     case SLJIT_DIV_UW:
  800     case SLJIT_DIV_SW:
  801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  802 #ifdef _WIN64
  803         SLJIT_ASSERT(
  804             reg_map[SLJIT_R0] == 0
  805             && reg_map[SLJIT_R1] == 2
  806             && reg_map[TMP_REG1] > 7);
  807 #else
  808         SLJIT_ASSERT(
  809             reg_map[SLJIT_R0] == 0
  810             && reg_map[SLJIT_R1] < 7
  811             && reg_map[TMP_REG1] == 2);
  812 #endif
  813         compiler->mode32 = op & SLJIT_I32_OP;
  814 #endif
  815         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
  816 
  817         op = GET_OPCODE(op);
  818         if ((op | 0x2) == SLJIT_DIV_UW) {
  819 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  820             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  821             inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
  822 #else
  823             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  824 #endif
  825             FAIL_IF(!inst);
  826             *inst = XOR_r_rm;
  827         }
  828 
  829         if ((op | 0x2) == SLJIT_DIV_SW) {
  830 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  831             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  832 #endif
  833 
  834 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  835             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  836             FAIL_IF(!inst);
  837             INC_SIZE(1);
  838             *inst = CDQ;
  839 #else
  840             if (compiler->mode32) {
  841                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  842                 FAIL_IF(!inst);
  843                 INC_SIZE(1);
  844                 *inst = CDQ;
  845             } else {
  846                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  847                 FAIL_IF(!inst);
  848                 INC_SIZE(2);
  849                 *inst++ = REX_W;
  850                 *inst = CDQ;
  851             }
  852 #endif
  853         }
  854 
  855 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  856         inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  857         FAIL_IF(!inst);
  858         INC_SIZE(2);
  859         *inst++ = GROUP_F7;
  860         *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
  861 #else
  862 #ifdef _WIN64
  863         size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
  864 #else
  865         size = (!compiler->mode32) ? 3 : 2;
  866 #endif
  867         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  868         FAIL_IF(!inst);
  869         INC_SIZE(size);
  870 #ifdef _WIN64
  871         if (!compiler->mode32)
  872             *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
  873         else if (op >= SLJIT_DIVMOD_UW)
  874             *inst++ = REX_B;
  875         *inst++ = GROUP_F7;
  876         *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
  877 #else
  878         if (!compiler->mode32)
  879             *inst++ = REX_W;
  880         *inst++ = GROUP_F7;
  881         *inst = MOD_REG | reg_map[SLJIT_R1];
  882 #endif
  883 #endif
  884         switch (op) {
  885         case SLJIT_LMUL_UW:
  886             *inst |= MUL;
  887             break;
  888         case SLJIT_LMUL_SW:
  889             *inst |= IMUL;
  890             break;
  891         case SLJIT_DIVMOD_UW:
  892         case SLJIT_DIV_UW:
  893             *inst |= DIV;
  894             break;
  895         case SLJIT_DIVMOD_SW:
  896         case SLJIT_DIV_SW:
  897             *inst |= IDIV;
  898             break;
  899         }
  900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  901         if (op <= SLJIT_DIVMOD_SW)
  902             EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  903 #else
  904         if (op >= SLJIT_DIV_UW)
  905             EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  906 #endif
  907         break;
  908     }
  909 
  910     return SLJIT_SUCCESS;
  911 }
  912 
  913 #define ENCODE_PREFIX(prefix) \
  914     do { \
  915         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
  916         FAIL_IF(!inst); \
  917         INC_SIZE(1); \
  918         *inst = (prefix); \
  919     } while (0)
  920 
  921 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
  922     sljit_s32 dst, sljit_sw dstw,
  923     sljit_s32 src, sljit_sw srcw)
  924 {
  925     sljit_u8* inst;
  926     sljit_s32 dst_r;
  927 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  928     sljit_s32 work_r;
  929 #endif
  930 
  931 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  932     compiler->mode32 = 0;
  933 #endif
  934 
  935     if (src & SLJIT_IMM) {
  936         if (FAST_IS_REG(dst)) {
  937 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  938             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  939 #else
  940             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  941             FAIL_IF(!inst);
  942             *inst = MOV_rm_i32;
  943             return SLJIT_SUCCESS;
  944 #endif
  945         }
  946         inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  947         FAIL_IF(!inst);
  948         *inst = MOV_rm8_i8;
  949         return SLJIT_SUCCESS;
  950     }
  951 
  952     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  953 
  954     if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
  955 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  956         if (reg_map[src] >= 4) {
  957             SLJIT_ASSERT(dst_r == TMP_REG1);
  958             EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  959         } else
  960             dst_r = src;
  961 #else
  962         dst_r = src;
  963 #endif
  964     }
  965 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  966     else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
  967         /* src, dst are registers. */
  968         SLJIT_ASSERT(SLOW_IS_REG(dst));
  969         if (reg_map[dst] < 4) {
  970             if (dst != src)
  971                 EMIT_MOV(compiler, dst, 0, src, 0);
  972             inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  973             FAIL_IF(!inst);
  974             *inst++ = GROUP_0F;
  975             *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  976         }
  977         else {
  978             if (dst != src)
  979                 EMIT_MOV(compiler, dst, 0, src, 0);
  980             if (sign) {
  981                 /* shl reg, 24 */
  982                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  983                 FAIL_IF(!inst);
  984                 *inst |= SHL;
  985                 /* sar reg, 24 */
  986                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  987                 FAIL_IF(!inst);
  988                 *inst |= SAR;
  989             }
  990             else {
  991                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
  992                 FAIL_IF(!inst);
  993                 *(inst + 1) |= AND;
  994             }
  995         }
  996         return SLJIT_SUCCESS;
  997     }
  998 #endif
  999     else {
 1000         /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
 1001         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 1002         FAIL_IF(!inst);
 1003         *inst++ = GROUP_0F;
 1004         *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
 1005     }
 1006 
 1007     if (dst & SLJIT_MEM) {
 1008 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1009         if (dst_r == TMP_REG1) {
 1010             /* Find a non-used register, whose reg_map[src] < 4. */
 1011             if ((dst & REG_MASK) == SLJIT_R0) {
 1012                 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
 1013                     work_r = SLJIT_R2;
 1014                 else
 1015                     work_r = SLJIT_R1;
 1016             }
 1017             else {
 1018                 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
 1019                     work_r = SLJIT_R0;
 1020                 else if ((dst & REG_MASK) == SLJIT_R1)
 1021                     work_r = SLJIT_R2;
 1022                 else
 1023                     work_r = SLJIT_R1;
 1024             }
 1025 
 1026             if (work_r == SLJIT_R0) {
 1027                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
 1028             }
 1029             else {
 1030                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
 1031                 FAIL_IF(!inst);
 1032                 *inst = XCHG_r_rm;
 1033             }
 1034 
 1035             inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
 1036             FAIL_IF(!inst);
 1037             *inst = MOV_rm8_r8;
 1038 
 1039             if (work_r == SLJIT_R0) {
 1040                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
 1041             }
 1042             else {
 1043                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
 1044                 FAIL_IF(!inst);
 1045                 *inst = XCHG_r_rm;
 1046             }
 1047         }
 1048         else {
 1049             inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
 1050             FAIL_IF(!inst);
 1051             *inst = MOV_rm8_r8;
 1052         }
 1053 #else
 1054         inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
 1055         FAIL_IF(!inst);
 1056         *inst = MOV_rm8_r8;
 1057 #endif
 1058     }
 1059 
 1060     return SLJIT_SUCCESS;
 1061 }
 1062 
 1063 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
 1064     sljit_s32 src, sljit_sw srcw)
 1065 {
 1066     sljit_u8* inst;
 1067 
 1068 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1069     compiler->mode32 = 1;
 1070 #endif
 1071 
 1072     inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
 1073     FAIL_IF(!inst);
 1074     *inst++ = GROUP_0F;
 1075     *inst++ = PREFETCH;
 1076 
 1077     if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
 1078         *inst |= (3 << 3);
 1079     else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
 1080         *inst |= (2 << 3);
 1081     else
 1082         *inst |= (1 << 3);
 1083 
 1084     return SLJIT_SUCCESS;
 1085 }
 1086 
 1087 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
 1088     sljit_s32 dst, sljit_sw dstw,
 1089     sljit_s32 src, sljit_sw srcw)
 1090 {
 1091     sljit_u8* inst;
 1092     sljit_s32 dst_r;
 1093 
 1094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1095     compiler->mode32 = 0;
 1096 #endif
 1097 
 1098     if (src & SLJIT_IMM) {
 1099         if (FAST_IS_REG(dst)) {
 1100 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1101             return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
 1102 #else
 1103             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
 1104             FAIL_IF(!inst);
 1105             *inst = MOV_rm_i32;
 1106             return SLJIT_SUCCESS;
 1107 #endif
 1108         }
 1109         inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
 1110         FAIL_IF(!inst);
 1111         *inst = MOV_rm_i32;
 1112         return SLJIT_SUCCESS;
 1113     }
 1114 
 1115     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1116 
 1117     if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
 1118         dst_r = src;
 1119     else {
 1120         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 1121         FAIL_IF(!inst);
 1122         *inst++ = GROUP_0F;
 1123         *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
 1124     }
 1125 
 1126     if (dst & SLJIT_MEM) {
 1127         inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
 1128         FAIL_IF(!inst);
 1129         *inst = MOV_rm_r;
 1130     }
 1131 
 1132     return SLJIT_SUCCESS;
 1133 }
 1134 
 1135 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
 1136     sljit_s32 dst, sljit_sw dstw,
 1137     sljit_s32 src, sljit_sw srcw)
 1138 {
 1139     sljit_u8* inst;
 1140 
 1141     if (dst == src && dstw == srcw) {
 1142         /* Same input and output */
 1143         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
 1144         FAIL_IF(!inst);
 1145         *inst++ = GROUP_F7;
 1146         *inst |= opcode;
 1147         return SLJIT_SUCCESS;
 1148     }
 1149 
 1150     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
 1151         dst = TMP_REG1;
 1152 
 1153     if (FAST_IS_REG(dst)) {
 1154         EMIT_MOV(compiler, dst, 0, src, srcw);
 1155         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
 1156         FAIL_IF(!inst);
 1157         *inst++ = GROUP_F7;
 1158         *inst |= opcode;
 1159         return SLJIT_SUCCESS;
 1160     }
 1161 
 1162     EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 1163     inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
 1164     FAIL_IF(!inst);
 1165     *inst++ = GROUP_F7;
 1166     *inst |= opcode;
 1167     EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1168     return SLJIT_SUCCESS;
 1169 }
 1170 
 1171 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
 1172     sljit_s32 dst, sljit_sw dstw,
 1173     sljit_s32 src, sljit_sw srcw)
 1174 {
 1175     sljit_u8* inst;
 1176 
 1177     if (dst == SLJIT_UNUSED)
 1178         dst = TMP_REG1;
 1179 
 1180     if (FAST_IS_REG(dst)) {
 1181         EMIT_MOV(compiler, dst, 0, src, srcw);
 1182         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
 1183         FAIL_IF(!inst);
 1184         *inst++ = GROUP_F7;
 1185         *inst |= NOT_rm;
 1186         inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
 1187         FAIL_IF(!inst);
 1188         *inst = OR_r_rm;
 1189         return SLJIT_SUCCESS;
 1190     }
 1191 
 1192     EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 1193     inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
 1194     FAIL_IF(!inst);
 1195     *inst++ = GROUP_F7;
 1196     *inst |= NOT_rm;
 1197     inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
 1198     FAIL_IF(!inst);
 1199     *inst = OR_r_rm;
 1200     EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1201     return SLJIT_SUCCESS;
 1202 }
 1203 
 1204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1205 static const sljit_sw emit_clz_arg = 32 + 31;
 1206 #endif
 1207 
 1208 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
 1209     sljit_s32 dst, sljit_sw dstw,
 1210     sljit_s32 src, sljit_sw srcw)
 1211 {
 1212     sljit_u8* inst;
 1213     sljit_s32 dst_r;
 1214 
 1215     SLJIT_UNUSED_ARG(op_flags);
 1216 
 1217     if (cpu_has_cmov == -1)
 1218         get_cpu_features();
 1219 
 1220     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1221 
 1222     inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 1223     FAIL_IF(!inst);
 1224     *inst++ = GROUP_0F;
 1225     *inst = BSR_r_rm;
 1226 
 1227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1228     if (cpu_has_cmov) {
 1229         if (dst_r != TMP_REG1) {
 1230             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
 1231             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
 1232         }
 1233         else
 1234             inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
 1235 
 1236         FAIL_IF(!inst);
 1237         *inst++ = GROUP_0F;
 1238         *inst = CMOVE_r_rm;
 1239     }
 1240     else
 1241         FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
 1242 
 1243     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
 1244 #else
 1245     if (cpu_has_cmov) {
 1246         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
 1247 
 1248         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1249         FAIL_IF(!inst);
 1250         *inst++ = GROUP_0F;
 1251         *inst = CMOVE_r_rm;
 1252     }
 1253     else
 1254         FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
 1255 
 1256     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
 1257 #endif
 1258 
 1259     FAIL_IF(!inst);
 1260     *(inst + 1) |= XOR;
 1261 
 1262     if (dst & SLJIT_MEM)
 1263         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1264     return SLJIT_SUCCESS;
 1265 }
 1266 
 1267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 1268     sljit_s32 dst, sljit_sw dstw,
 1269     sljit_s32 src, sljit_sw srcw)
 1270 {
 1271     sljit_s32 op_flags = GET_ALL_FLAGS(op);
 1272 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1273     sljit_s32 dst_is_ereg = 0;
 1274 #endif
 1275 
 1276     CHECK_ERROR();
 1277     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
 1278     ADJUST_LOCAL_OFFSET(dst, dstw);
 1279     ADJUST_LOCAL_OFFSET(src, srcw);
 1280 
 1281     CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
 1282     CHECK_EXTRA_REGS(src, srcw, (void)0);
 1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1284     compiler->mode32 = op_flags & SLJIT_I32_OP;
 1285 #endif
 1286 
 1287     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
 1288         if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
 1289             return emit_prefetch(compiler, op, src, srcw);
 1290         return SLJIT_SUCCESS;
 1291     }
 1292 
 1293     op = GET_OPCODE(op);
 1294 
 1295     if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
 1296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1297         compiler->mode32 = 0;
 1298 #endif
 1299 
 1300         if (FAST_IS_REG(src) && src == dst) {
 1301             if (!TYPE_CAST_NEEDED(op))
 1302                 return SLJIT_SUCCESS;
 1303         }
 1304 
 1305         if (op_flags & SLJIT_I32_OP) {
 1306 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1307             if (src & SLJIT_MEM) {
 1308                 if (op == SLJIT_MOV_S32)
 1309                     op = SLJIT_MOV_U32;
 1310             }
 1311             else if (src & SLJIT_IMM) {
 1312                 if (op == SLJIT_MOV_U32)
 1313                     op = SLJIT_MOV_S32;
 1314             }
 1315 #endif
 1316         }
 1317 
 1318         if (src & SLJIT_IMM) {
 1319             switch (op) {
 1320             case SLJIT_MOV_U8:
 1321                 srcw = (sljit_u8)srcw;
 1322                 break;
 1323             case SLJIT_MOV_S8:
 1324                 srcw = (sljit_s8)srcw;
 1325                 break;
 1326             case SLJIT_MOV_U16:
 1327                 srcw = (sljit_u16)srcw;
 1328                 break;
 1329             case SLJIT_MOV_S16:
 1330                 srcw = (sljit_s16)srcw;
 1331                 break;
 1332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1333             case SLJIT_MOV_U32:
 1334                 srcw = (sljit_u32)srcw;
 1335                 break;
 1336             case SLJIT_MOV_S32:
 1337                 srcw = (sljit_s32)srcw;
 1338                 break;
 1339 #endif
 1340             }
 1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1342             if (SLJIT_UNLIKELY(dst_is_ereg))
 1343                 return emit_mov(compiler, dst, dstw, src, srcw);
 1344 #endif
 1345         }
 1346 
 1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1348         if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
 1349             SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
 1350             dst = TMP_REG1;
 1351         }
 1352 #endif
 1353 
 1354         switch (op) {
 1355         case SLJIT_MOV:
 1356         case SLJIT_MOV_P:
 1357 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1358         case SLJIT_MOV_U32:
 1359         case SLJIT_MOV_S32:
 1360 #endif
 1361             FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 1362             break;
 1363         case SLJIT_MOV_U8:
 1364             FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
 1365             break;
 1366         case SLJIT_MOV_S8:
 1367             FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
 1368             break;
 1369         case SLJIT_MOV_U16:
 1370             FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
 1371             break;
 1372         case SLJIT_MOV_S16:
 1373             FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
 1374             break;
 1375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1376         case SLJIT_MOV_U32:
 1377             FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
 1378             break;
 1379         case SLJIT_MOV_S32:
 1380             FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
 1381             break;
 1382 #endif
 1383         }
 1384 
 1385 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1386         if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
 1387             return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
 1388 #endif
 1389         return SLJIT_SUCCESS;
 1390     }
 1391 
 1392     switch (op) {
 1393     case SLJIT_NOT:
 1394         if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
 1395             return emit_not_with_flags(compiler, dst, dstw, src, srcw);
 1396         return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
 1397 
 1398     case SLJIT_NEG:
 1399         return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
 1400 
 1401     case SLJIT_CLZ:
 1402         return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
 1403     }
 1404 
 1405     return SLJIT_SUCCESS;
 1406 }
 1407 
 1408 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1409 
 1410 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
 1411     if (IS_HALFWORD(immw) || compiler->mode32) { \
 1412         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1413         FAIL_IF(!inst); \
 1414         *(inst + 1) |= (op_imm); \
 1415     } \
 1416     else { \
 1417         FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
 1418         inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
 1419         FAIL_IF(!inst); \
 1420         *inst = (op_mr); \
 1421     }
 1422 
 1423 #define BINARY_EAX_IMM(op_eax_imm, immw) \
 1424     FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
 1425 
 1426 #else
 1427 
 1428 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
 1429     inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1430     FAIL_IF(!inst); \
 1431     *(inst + 1) |= (op_imm);
 1432 
 1433 #define BINARY_EAX_IMM(op_eax_imm, immw) \
 1434     FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
 1435 
 1436 #endif
 1437 
 1438 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
 1439     sljit_u32 op_types,
 1440     sljit_s32 dst, sljit_sw dstw,
 1441     sljit_s32 src1, sljit_sw src1w,
 1442     sljit_s32 src2, sljit_sw src2w)
 1443 {
 1444     sljit_u8* inst;
 1445     sljit_u8 op_eax_imm = (op_types >> 24);
 1446     sljit_u8 op_rm = (op_types >> 16) & 0xff;
 1447     sljit_u8 op_mr = (op_types >> 8) & 0xff;
 1448     sljit_u8 op_imm = op_types & 0xff;
 1449 
 1450     if (dst == SLJIT_UNUSED) {
 1451         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1452         if (src2 & SLJIT_IMM) {
 1453             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1454         }
 1455         else {
 1456             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1457             FAIL_IF(!inst);
 1458             *inst = op_rm;
 1459         }
 1460         return SLJIT_SUCCESS;
 1461     }
 1462 
 1463     if (dst == src1 && dstw == src1w) {
 1464         if (src2 & SLJIT_IMM) {
 1465 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1466             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1467 #else
 1468             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 1469 #endif
 1470                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1471             }
 1472             else {
 1473                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1474             }
 1475         }
 1476         else if (FAST_IS_REG(dst)) {
 1477             inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1478             FAIL_IF(!inst);
 1479             *inst = op_rm;
 1480         }
 1481         else if (FAST_IS_REG(src2)) {
 1482             /* Special exception for sljit_emit_op_flags. */
 1483             inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1484             FAIL_IF(!inst);
 1485             *inst = op_mr;
 1486         }
 1487         else {
 1488             EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
 1489             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1490             FAIL_IF(!inst);
 1491             *inst = op_mr;
 1492         }
 1493         return SLJIT_SUCCESS;
 1494     }
 1495 
 1496     /* Only for cumulative operations. */
 1497     if (dst == src2 && dstw == src2w) {
 1498         if (src1 & SLJIT_IMM) {
 1499 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1500             if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1501 #else
 1502             if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
 1503 #endif
 1504                 BINARY_EAX_IMM(op_eax_imm, src1w);
 1505             }
 1506             else {
 1507                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
 1508             }
 1509         }
 1510         else if (FAST_IS_REG(dst)) {
 1511             inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
 1512             FAIL_IF(!inst);
 1513             *inst = op_rm;
 1514         }
 1515         else if (FAST_IS_REG(src1)) {
 1516             inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
 1517             FAIL_IF(!inst);
 1518             *inst = op_mr;
 1519         }
 1520         else {
 1521             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1522             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1523             FAIL_IF(!inst);
 1524             *inst = op_mr;
 1525         }
 1526         return SLJIT_SUCCESS;
 1527     }
 1528 
 1529     /* General version. */
 1530     if (FAST_IS_REG(dst)) {
 1531         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1532         if (src2 & SLJIT_IMM) {
 1533             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1534         }
 1535         else {
 1536             inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1537             FAIL_IF(!inst);
 1538             *inst = op_rm;
 1539         }
 1540     }
 1541     else {
 1542         /* This version requires less memory writing. */
 1543         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1544         if (src2 & SLJIT_IMM) {
 1545             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1546         }
 1547         else {
 1548             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1549             FAIL_IF(!inst);
 1550             *inst = op_rm;
 1551         }
 1552         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1553     }
 1554 
 1555     return SLJIT_SUCCESS;
 1556 }
 1557 
 1558 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
 1559     sljit_u32 op_types,
 1560     sljit_s32 dst, sljit_sw dstw,
 1561     sljit_s32 src1, sljit_sw src1w,
 1562     sljit_s32 src2, sljit_sw src2w)
 1563 {
 1564     sljit_u8* inst;
 1565     sljit_u8 op_eax_imm = (op_types >> 24);
 1566     sljit_u8 op_rm = (op_types >> 16) & 0xff;
 1567     sljit_u8 op_mr = (op_types >> 8) & 0xff;
 1568     sljit_u8 op_imm = op_types & 0xff;
 1569 
 1570     if (dst == SLJIT_UNUSED) {
 1571         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1572         if (src2 & SLJIT_IMM) {
 1573             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1574         }
 1575         else {
 1576             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1577             FAIL_IF(!inst);
 1578             *inst = op_rm;
 1579         }
 1580         return SLJIT_SUCCESS;
 1581     }
 1582 
 1583     if (dst == src1 && dstw == src1w) {
 1584         if (src2 & SLJIT_IMM) {
 1585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1586             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1587 #else
 1588             if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 1589 #endif
 1590                 BINARY_EAX_IMM(op_eax_imm, src2w);
 1591             }
 1592             else {
 1593                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1594             }
 1595         }
 1596         else if (FAST_IS_REG(dst)) {
 1597             inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1598             FAIL_IF(!inst);
 1599             *inst = op_rm;
 1600         }
 1601         else if (FAST_IS_REG(src2)) {
 1602             inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1603             FAIL_IF(!inst);
 1604             *inst = op_mr;
 1605         }
 1606         else {
 1607             EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
 1608             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 1609             FAIL_IF(!inst);
 1610             *inst = op_mr;
 1611         }
 1612         return SLJIT_SUCCESS;
 1613     }
 1614 
 1615     /* General version. */
 1616     if (FAST_IS_REG(dst) && dst != src2) {
 1617         EMIT_MOV(compiler, dst, 0, src1, src1w);
 1618         if (src2 & SLJIT_IMM) {
 1619             BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1620         }
 1621         else {
 1622             inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1623             FAIL_IF(!inst);
 1624             *inst = op_rm;
 1625         }
 1626     }
 1627     else {
 1628         /* This version requires less memory writing. */
 1629         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1630         if (src2 & SLJIT_IMM) {
 1631             BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
 1632         }
 1633         else {
 1634             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1635             FAIL_IF(!inst);
 1636             *inst = op_rm;
 1637         }
 1638         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1639     }
 1640 
 1641     return SLJIT_SUCCESS;
 1642 }
 1643 
 1644 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
 1645     sljit_s32 dst, sljit_sw dstw,
 1646     sljit_s32 src1, sljit_sw src1w,
 1647     sljit_s32 src2, sljit_sw src2w)
 1648 {
 1649     sljit_u8* inst;
 1650     sljit_s32 dst_r;
 1651 
 1652     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
 1653 
 1654     /* Register destination. */
 1655     if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
 1656         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1657         FAIL_IF(!inst);
 1658         *inst++ = GROUP_0F;
 1659         *inst = IMUL_r_rm;
 1660     }
 1661     else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
 1662         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
 1663         FAIL_IF(!inst);
 1664         *inst++ = GROUP_0F;
 1665         *inst = IMUL_r_rm;
 1666     }
 1667     else if (src1 & SLJIT_IMM) {
 1668         if (src2 & SLJIT_IMM) {
 1669             EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
 1670             src2 = dst_r;
 1671             src2w = 0;
 1672         }
 1673 
 1674         if (src1w <= 127 && src1w >= -128) {
 1675             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1676             FAIL_IF(!inst);
 1677             *inst = IMUL_r_rm_i8;
 1678             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 1679             FAIL_IF(!inst);
 1680             INC_SIZE(1);
 1681             *inst = (sljit_s8)src1w;
 1682         }
 1683 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1684         else {
 1685             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1686             FAIL_IF(!inst);
 1687             *inst = IMUL_r_rm_i32;
 1688             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1689             FAIL_IF(!inst);
 1690             INC_SIZE(4);
 1691             sljit_unaligned_store_sw(inst, src1w);
 1692         }
 1693 #else
 1694         else if (IS_HALFWORD(src1w)) {
 1695             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1696             FAIL_IF(!inst);
 1697             *inst = IMUL_r_rm_i32;
 1698             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1699             FAIL_IF(!inst);
 1700             INC_SIZE(4);
 1701             sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
 1702         }
 1703         else {
 1704             if (dst_r != src2)
 1705                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
 1706             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
 1707             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1708             FAIL_IF(!inst);
 1709             *inst++ = GROUP_0F;
 1710             *inst = IMUL_r_rm;
 1711         }
 1712 #endif
 1713     }
 1714     else if (src2 & SLJIT_IMM) {
 1715         /* Note: src1 is NOT immediate. */
 1716 
 1717         if (src2w <= 127 && src2w >= -128) {
 1718             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1719             FAIL_IF(!inst);
 1720             *inst = IMUL_r_rm_i8;
 1721             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 1722             FAIL_IF(!inst);
 1723             INC_SIZE(1);
 1724             *inst = (sljit_s8)src2w;
 1725         }
 1726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1727         else {
 1728             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1729             FAIL_IF(!inst);
 1730             *inst = IMUL_r_rm_i32;
 1731             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1732             FAIL_IF(!inst);
 1733             INC_SIZE(4);
 1734             sljit_unaligned_store_sw(inst, src2w);
 1735         }
 1736 #else
 1737         else if (IS_HALFWORD(src2w)) {
 1738             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1739             FAIL_IF(!inst);
 1740             *inst = IMUL_r_rm_i32;
 1741             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
 1742             FAIL_IF(!inst);
 1743             INC_SIZE(4);
 1744             sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
 1745         }
 1746         else {
 1747             if (dst_r != src1)
 1748                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1749             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1750             inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1751             FAIL_IF(!inst);
 1752             *inst++ = GROUP_0F;
 1753             *inst = IMUL_r_rm;
 1754         }
 1755 #endif
 1756     }
 1757     else {
 1758         /* Neither argument is immediate. */
 1759         if (ADDRESSING_DEPENDS_ON(src2, dst_r))
 1760             dst_r = TMP_REG1;
 1761         EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1762         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1763         FAIL_IF(!inst);
 1764         *inst++ = GROUP_0F;
 1765         *inst = IMUL_r_rm;
 1766     }
 1767 
 1768     if (dst & SLJIT_MEM)
 1769         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 1770 
 1771     return SLJIT_SUCCESS;
 1772 }
 1773 
 1774 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
 1775     sljit_s32 dst, sljit_sw dstw,
 1776     sljit_s32 src1, sljit_sw src1w,
 1777     sljit_s32 src2, sljit_sw src2w)
 1778 {
 1779     sljit_u8* inst;
 1780     sljit_s32 dst_r, done = 0;
 1781 
 1782     /* These cases better be left to handled by normal way. */
 1783     if (dst == src1 && dstw == src1w)
 1784         return SLJIT_ERR_UNSUPPORTED;
 1785     if (dst == src2 && dstw == src2w)
 1786         return SLJIT_ERR_UNSUPPORTED;
 1787 
 1788     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1789 
 1790     if (FAST_IS_REG(src1)) {
 1791         if (FAST_IS_REG(src2)) {
 1792             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
 1793             FAIL_IF(!inst);
 1794             *inst = LEA_r_m;
 1795             done = 1;
 1796         }
 1797 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1798         if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1799             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
 1800 #else
 1801         if (src2 & SLJIT_IMM) {
 1802             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
 1803 #endif
 1804             FAIL_IF(!inst);
 1805             *inst = LEA_r_m;
 1806             done = 1;
 1807         }
 1808     }
 1809     else if (FAST_IS_REG(src2)) {
 1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1811         if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1812             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
 1813 #else
 1814         if (src1 & SLJIT_IMM) {
 1815             inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
 1816 #endif
 1817             FAIL_IF(!inst);
 1818             *inst = LEA_r_m;
 1819             done = 1;
 1820         }
 1821     }
 1822 
 1823     if (done) {
 1824         if (dst_r == TMP_REG1)
 1825             return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 1826         return SLJIT_SUCCESS;
 1827     }
 1828     return SLJIT_ERR_UNSUPPORTED;
 1829 }
 1830 
 1831 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
 1832     sljit_s32 src1, sljit_sw src1w,
 1833     sljit_s32 src2, sljit_sw src2w)
 1834 {
 1835     sljit_u8* inst;
 1836 
 1837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1838     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1839 #else
 1840     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1841 #endif
 1842         BINARY_EAX_IMM(CMP_EAX_i32, src2w);
 1843         return SLJIT_SUCCESS;
 1844     }
 1845 
 1846     if (FAST_IS_REG(src1)) {
 1847         if (src2 & SLJIT_IMM) {
 1848             BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
 1849         }
 1850         else {
 1851             inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1852             FAIL_IF(!inst);
 1853             *inst = CMP_r_rm;
 1854         }
 1855         return SLJIT_SUCCESS;
 1856     }
 1857 
 1858     if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
 1859         inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1860         FAIL_IF(!inst);
 1861         *inst = CMP_rm_r;
 1862         return SLJIT_SUCCESS;
 1863     }
 1864 
 1865     if (src2 & SLJIT_IMM) {
 1866         if (src1 & SLJIT_IMM) {
 1867             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1868             src1 = TMP_REG1;
 1869             src1w = 0;
 1870         }
 1871         BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
 1872     }
 1873     else {
 1874         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1875         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1876         FAIL_IF(!inst);
 1877         *inst = CMP_r_rm;
 1878     }
 1879     return SLJIT_SUCCESS;
 1880 }
 1881 
 1882 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
 1883     sljit_s32 src1, sljit_sw src1w,
 1884     sljit_s32 src2, sljit_sw src2w)
 1885 {
 1886     sljit_u8* inst;
 1887 
 1888 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1889     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1890 #else
 1891     if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1892 #endif
 1893         BINARY_EAX_IMM(TEST_EAX_i32, src2w);
 1894         return SLJIT_SUCCESS;
 1895     }
 1896 
 1897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1898     if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1899 #else
 1900     if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 1901 #endif
 1902         BINARY_EAX_IMM(TEST_EAX_i32, src1w);
 1903         return SLJIT_SUCCESS;
 1904     }
 1905 
 1906     if (!(src1 & SLJIT_IMM)) {
 1907         if (src2 & SLJIT_IMM) {
 1908 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1909             if (IS_HALFWORD(src2w) || compiler->mode32) {
 1910                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
 1911                 FAIL_IF(!inst);
 1912                 *inst = GROUP_F7;
 1913             }
 1914             else {
 1915                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
 1916                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
 1917                 FAIL_IF(!inst);
 1918                 *inst = TEST_rm_r;
 1919             }
 1920 #else
 1921             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
 1922             FAIL_IF(!inst);
 1923             *inst = GROUP_F7;
 1924 #endif
 1925             return SLJIT_SUCCESS;
 1926         }
 1927         else if (FAST_IS_REG(src1)) {
 1928             inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1929             FAIL_IF(!inst);
 1930             *inst = TEST_rm_r;
 1931             return SLJIT_SUCCESS;
 1932         }
 1933     }
 1934 
 1935     if (!(src2 & SLJIT_IMM)) {
 1936         if (src1 & SLJIT_IMM) {
 1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1938             if (IS_HALFWORD(src1w) || compiler->mode32) {
 1939                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
 1940                 FAIL_IF(!inst);
 1941                 *inst = GROUP_F7;
 1942             }
 1943             else {
 1944                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
 1945                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1946                 FAIL_IF(!inst);
 1947                 *inst = TEST_rm_r;
 1948             }
 1949 #else
 1950             inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
 1951             FAIL_IF(!inst);
 1952             *inst = GROUP_F7;
 1953 #endif
 1954             return SLJIT_SUCCESS;
 1955         }
 1956         else if (FAST_IS_REG(src2)) {
 1957             inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1958             FAIL_IF(!inst);
 1959             *inst = TEST_rm_r;
 1960             return SLJIT_SUCCESS;
 1961         }
 1962     }
 1963 
 1964     EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 1965     if (src2 & SLJIT_IMM) {
 1966 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1967         if (IS_HALFWORD(src2w) || compiler->mode32) {
 1968             inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
 1969             FAIL_IF(!inst);
 1970             *inst = GROUP_F7;
 1971         }
 1972         else {
 1973             FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1974             inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
 1975             FAIL_IF(!inst);
 1976             *inst = TEST_rm_r;
 1977         }
 1978 #else
 1979         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
 1980         FAIL_IF(!inst);
 1981         *inst = GROUP_F7;
 1982 #endif
 1983     }
 1984     else {
 1985         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
 1986         FAIL_IF(!inst);
 1987         *inst = TEST_rm_r;
 1988     }
 1989     return SLJIT_SUCCESS;
 1990 }
 1991 
 1992 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
 1993     sljit_u8 mode,
 1994     sljit_s32 dst, sljit_sw dstw,
 1995     sljit_s32 src1, sljit_sw src1w,
 1996     sljit_s32 src2, sljit_sw src2w)
 1997 {
 1998     sljit_u8* inst;
 1999 
 2000     if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 2001         if (dst == src1 && dstw == src1w) {
 2002             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
 2003             FAIL_IF(!inst);
 2004             *inst |= mode;
 2005             return SLJIT_SUCCESS;
 2006         }
 2007         if (dst == SLJIT_UNUSED) {
 2008             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2009             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
 2010             FAIL_IF(!inst);
 2011             *inst |= mode;
 2012             return SLJIT_SUCCESS;
 2013         }
 2014         if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
 2015             EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2016             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2017             FAIL_IF(!inst);
 2018             *inst |= mode;
 2019             EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2020             return SLJIT_SUCCESS;
 2021         }
 2022         if (FAST_IS_REG(dst)) {
 2023             EMIT_MOV(compiler, dst, 0, src1, src1w);
 2024             inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
 2025             FAIL_IF(!inst);
 2026             *inst |= mode;
 2027             return SLJIT_SUCCESS;
 2028         }
 2029 
 2030         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2031         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
 2032         FAIL_IF(!inst);
 2033         *inst |= mode;
 2034         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 2035         return SLJIT_SUCCESS;
 2036     }
 2037 
 2038     if (dst == SLJIT_PREF_SHIFT_REG) {
 2039         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2040         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2041         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2042         FAIL_IF(!inst);
 2043         *inst |= mode;
 2044         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2045     }
 2046     else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 2047         if (src1 != dst)
 2048             EMIT_MOV(compiler, dst, 0, src1, src1w);
 2049         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
 2050         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2051         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 2052         FAIL_IF(!inst);
 2053         *inst |= mode;
 2054         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2055     }
 2056     else {
 2057         /* This case is complex since ecx itself may be used for
 2058            addressing, and this case must be supported as well. */
 2059         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 2060 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2061         EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
 2062         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2063         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2064         FAIL_IF(!inst);
 2065         *inst |= mode;
 2066         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
 2067 #else
 2068         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 2069         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 2070         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 2071         FAIL_IF(!inst);
 2072         *inst |= mode;
 2073         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 2074 #endif
 2075         if (dst != SLJIT_UNUSED)
 2076             return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2077     }
 2078 
 2079     return SLJIT_SUCCESS;
 2080 }
 2081 
 2082 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
 2083     sljit_u8 mode, sljit_s32 set_flags,
 2084     sljit_s32 dst, sljit_sw dstw,
 2085     sljit_s32 src1, sljit_sw src1w,
 2086     sljit_s32 src2, sljit_sw src2w)
 2087 {
 2088     /* The CPU does not set flags if the shift count is 0. */
 2089     if (src2 & SLJIT_IMM) {
 2090 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2091         if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
 2092             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2093 #else
 2094         if ((src2w & 0x1f) != 0)
 2095             return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2096 #endif
 2097         if (!set_flags)
 2098             return emit_mov(compiler, dst, dstw, src1, src1w);
 2099         /* OR dst, src, 0 */
 2100         return emit_cum_binary(compiler, BINARY_OPCODE(OR),
 2101             dst, dstw, src1, src1w, SLJIT_IMM, 0);
 2102     }
 2103 
 2104     if (!set_flags)
 2105         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 2106 
 2107     if (!FAST_IS_REG(dst))
 2108         FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
 2109 
 2110     FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
 2111 
 2112     if (FAST_IS_REG(dst))
 2113         return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
 2114     return SLJIT_SUCCESS;
 2115 }
 2116 
 2117 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 2118     sljit_s32 dst, sljit_sw dstw,
 2119     sljit_s32 src1, sljit_sw src1w,
 2120     sljit_s32 src2, sljit_sw src2w)
 2121 {
 2122     CHECK_ERROR();
 2123     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 2124     ADJUST_LOCAL_OFFSET(dst, dstw);
 2125     ADJUST_LOCAL_OFFSET(src1, src1w);
 2126     ADJUST_LOCAL_OFFSET(src2, src2w);
 2127 
 2128     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2129     CHECK_EXTRA_REGS(src1, src1w, (void)0);
 2130     CHECK_EXTRA_REGS(src2, src2w, (void)0);
 2131 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2132     compiler->mode32 = op & SLJIT_I32_OP;
 2133 #endif
 2134 
 2135     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
 2136         return SLJIT_SUCCESS;
 2137 
 2138     switch (GET_OPCODE(op)) {
 2139     case SLJIT_ADD:
 2140         if (!HAS_FLAGS(op)) {
 2141             if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
 2142                 return compiler->error;
 2143         }
 2144         return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
 2145             dst, dstw, src1, src1w, src2, src2w);
 2146     case SLJIT_ADDC:
 2147         return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
 2148             dst, dstw, src1, src1w, src2, src2w);
 2149     case SLJIT_SUB:
 2150         if (!HAS_FLAGS(op)) {
 2151             if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
 2152                 return compiler->error;
 2153         }
 2154 
 2155         if (dst == SLJIT_UNUSED)
 2156             return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
 2157         return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
 2158             dst, dstw, src1, src1w, src2, src2w);
 2159     case SLJIT_SUBC:
 2160         return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
 2161             dst, dstw, src1, src1w, src2, src2w);
 2162     case SLJIT_MUL:
 2163         return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
 2164     case SLJIT_AND:
 2165         if (dst == SLJIT_UNUSED)
 2166             return emit_test_binary(compiler, src1, src1w, src2, src2w);
 2167         return emit_cum_binary(compiler, BINARY_OPCODE(AND),
 2168             dst, dstw, src1, src1w, src2, src2w);
 2169     case SLJIT_OR:
 2170         return emit_cum_binary(compiler, BINARY_OPCODE(OR),
 2171             dst, dstw, src1, src1w, src2, src2w);
 2172     case SLJIT_XOR:
 2173         return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
 2174             dst, dstw, src1, src1w, src2, src2w);
 2175     case SLJIT_SHL:
 2176         return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
 2177             dst, dstw, src1, src1w, src2, src2w);
 2178     case SLJIT_LSHR:
 2179         return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
 2180             dst, dstw, src1, src1w, src2, src2w);
 2181     case SLJIT_ASHR:
 2182         return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
 2183             dst, dstw, src1, src1w, src2, src2w);
 2184     }
 2185 
 2186     return SLJIT_SUCCESS;
 2187 }
 2188 
 2189 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 2190 {
 2191     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 2192 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2193     if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
 2194         return -1;
 2195 #endif
 2196     return reg_map[reg];
 2197 }
 2198 
 2199 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 2200 {
 2201     CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
 2202 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2203     return reg;
 2204 #else
 2205     return freg_map[reg];
 2206 #endif
 2207 }
 2208 
 2209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
 2210     void *instruction, sljit_s32 size)
 2211 {
 2212     sljit_u8 *inst;
 2213 
 2214     CHECK_ERROR();
 2215     CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 2216 
 2217     inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 2218     FAIL_IF(!inst);
 2219     INC_SIZE(size);
 2220     SLJIT_MEMCPY(inst, instruction, size);
 2221     return SLJIT_SUCCESS;
 2222 }
 2223 
 2224 /* --------------------------------------------------------------------- */
 2225 /*  Floating point operators                                             */
 2226 /* --------------------------------------------------------------------- */
 2227 
 2228 /* Alignment(3) + 4 * 16 bytes. */
 2229 static sljit_s32 sse2_data[3 + (4 * 4)];
 2230 static sljit_s32 *sse2_buffer;
 2231 
 2232 static void init_compiler(void)
 2233 {
 2234     /* Align to 16 bytes. */
 2235     sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
 2236 
 2237     /* Single precision constants (each constant is 16 byte long). */
 2238     sse2_buffer[0] = 0x80000000;
 2239     sse2_buffer[4] = 0x7fffffff;
 2240     /* Double precision constants (each constant is 16 byte long). */
 2241     sse2_buffer[8] = 0;
 2242     sse2_buffer[9] = 0x80000000;
 2243     sse2_buffer[12] = 0xffffffff;
 2244     sse2_buffer[13] = 0x7fffffff;
 2245 }
 2246 
 2247 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
 2248     sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 2249 {
 2250     sljit_u8 *inst;
 2251 
 2252     inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2253     FAIL_IF(!inst);
 2254     *inst++ = GROUP_0F;
 2255     *inst = opcode;
 2256     return SLJIT_SUCCESS;
 2257 }
 2258 
 2259 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
 2260     sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 2261 {
 2262     sljit_u8 *inst;
 2263 
 2264     inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2265     FAIL_IF(!inst);
 2266     *inst++ = GROUP_0F;
 2267     *inst = opcode;
 2268     return SLJIT_SUCCESS;
 2269 }
 2270 
 2271 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
 2272     sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
 2273 {
 2274     return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
 2275 }
 2276 
 2277 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
 2278     sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
 2279 {
 2280     return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
 2281 }
 2282 
 2283 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
 2284     sljit_s32 dst, sljit_sw dstw,
 2285     sljit_s32 src, sljit_sw srcw)
 2286 {
 2287     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2288     sljit_u8 *inst;
 2289 
 2290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2291     if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
 2292         compiler->mode32 = 0;
 2293 #endif
 2294 
 2295     inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
 2296     FAIL_IF(!inst);
 2297     *inst++ = GROUP_0F;
 2298     *inst = CVTTSD2SI_r_xm;
 2299 
 2300     if (dst & SLJIT_MEM)
 2301         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2302     return SLJIT_SUCCESS;
 2303 }
 2304 
 2305 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
 2306     sljit_s32 dst, sljit_sw dstw,
 2307     sljit_s32 src, sljit_sw srcw)
 2308 {
 2309     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
 2310     sljit_u8 *inst;
 2311 
 2312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2313     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
 2314         compiler->mode32 = 0;
 2315 #endif
 2316 
 2317     if (src & SLJIT_IMM) {
 2318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2319         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 2320             srcw = (sljit_s32)srcw;
 2321 #endif
 2322         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
 2323         src = TMP_REG1;
 2324         srcw = 0;
 2325     }
 2326 
 2327     inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
 2328     FAIL_IF(!inst);
 2329     *inst++ = GROUP_0F;
 2330     *inst = CVTSI2SD_x_rm;
 2331 
 2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2333     compiler->mode32 = 1;
 2334 #endif
 2335     if (dst_r == TMP_FREG)
 2336         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2337     return SLJIT_SUCCESS;
 2338 }
 2339 
 2340 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
 2341     sljit_s32 src1, sljit_sw src1w,
 2342     sljit_s32 src2, sljit_sw src2w)
 2343 {
 2344     if (!FAST_IS_REG(src1)) {
 2345         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2346         src1 = TMP_FREG;
 2347     }
 2348 
 2349     return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
 2350 }
 2351 
 2352 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 2353     sljit_s32 dst, sljit_sw dstw,
 2354     sljit_s32 src, sljit_sw srcw)
 2355 {
 2356     sljit_s32 dst_r;
 2357 
 2358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2359     compiler->mode32 = 1;
 2360 #endif
 2361 
 2362     CHECK_ERROR();
 2363     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 2364 
 2365     if (GET_OPCODE(op) == SLJIT_MOV_F64) {
 2366         if (FAST_IS_REG(dst))
 2367             return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
 2368         if (FAST_IS_REG(src))
 2369             return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
 2370         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
 2371         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2372     }
 2373 
 2374     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
 2375         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
 2376         if (FAST_IS_REG(src)) {
 2377             /* We overwrite the high bits of source. From SLJIT point of view,
 2378                this is not an issue.
 2379                Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
 2380             FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
 2381         }
 2382         else {
 2383             FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
 2384             src = TMP_FREG;
 2385         }
 2386 
 2387         FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
 2388         if (dst_r == TMP_FREG)
 2389             return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2390         return SLJIT_SUCCESS;
 2391     }
 2392 
 2393     if (FAST_IS_REG(dst)) {
 2394         dst_r = dst;
 2395         if (dst != src)
 2396             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
 2397     }
 2398     else {
 2399         dst_r = TMP_FREG;
 2400         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
 2401     }
 2402 
 2403     switch (GET_OPCODE(op)) {
 2404     case SLJIT_NEG_F64:
 2405         FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
 2406         break;
 2407 
 2408     case SLJIT_ABS_F64:
 2409         FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
 2410         break;
 2411     }
 2412 
 2413     if (dst_r == TMP_FREG)
 2414         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2415     return SLJIT_SUCCESS;
 2416 }
 2417 
 2418 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 2419     sljit_s32 dst, sljit_sw dstw,
 2420     sljit_s32 src1, sljit_sw src1w,
 2421     sljit_s32 src2, sljit_sw src2w)
 2422 {
 2423     sljit_s32 dst_r;
 2424 
 2425     CHECK_ERROR();
 2426     CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 2427     ADJUST_LOCAL_OFFSET(dst, dstw);
 2428     ADJUST_LOCAL_OFFSET(src1, src1w);
 2429     ADJUST_LOCAL_OFFSET(src2, src2w);
 2430 
 2431 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2432     compiler->mode32 = 1;
 2433 #endif
 2434 
 2435     if (FAST_IS_REG(dst)) {
 2436         dst_r = dst;
 2437         if (dst == src1)
 2438             ; /* Do nothing here. */
 2439         else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
 2440             /* Swap arguments. */
 2441             src2 = src1;
 2442             src2w = src1w;
 2443         }
 2444         else if (dst != src2)
 2445             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
 2446         else {
 2447             dst_r = TMP_FREG;
 2448             FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2449         }
 2450     }
 2451     else {
 2452         dst_r = TMP_FREG;
 2453         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
 2454     }
 2455 
 2456     switch (GET_OPCODE(op)) {
 2457     case SLJIT_ADD_F64:
 2458         FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2459         break;
 2460 
 2461     case SLJIT_SUB_F64:
 2462         FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2463         break;
 2464 
 2465     case SLJIT_MUL_F64:
 2466         FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2467         break;
 2468 
 2469     case SLJIT_DIV_F64:
 2470         FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
 2471         break;
 2472     }
 2473 
 2474     if (dst_r == TMP_FREG)
 2475         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
 2476     return SLJIT_SUCCESS;
 2477 }
 2478 
 2479 /* --------------------------------------------------------------------- */
 2480 /*  Conditional instructions                                             */
 2481 /* --------------------------------------------------------------------- */
 2482 
 2483 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2484 {
 2485     sljit_u8 *inst;
 2486     struct sljit_label *label;
 2487 
 2488     CHECK_ERROR_PTR();
 2489     CHECK_PTR(check_sljit_emit_label(compiler));
 2490 
 2491     if (compiler->last_label && compiler->last_label->size == compiler->size)
 2492         return compiler->last_label;
 2493 
 2494     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2495     PTR_FAIL_IF(!label);
 2496     set_label(label, compiler);
 2497 
 2498     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2499     PTR_FAIL_IF(!inst);
 2500 
 2501     *inst++ = 0;
 2502     *inst++ = 0;
 2503 
 2504     return label;
 2505 }
 2506 
 2507 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 2508 {
 2509     sljit_u8 *inst;
 2510     struct sljit_jump *jump;
 2511 
 2512     CHECK_ERROR_PTR();
 2513     CHECK_PTR(check_sljit_emit_jump(compiler, type));
 2514 
 2515     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2516     PTR_FAIL_IF_NULL(jump);
 2517     set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
 2518     type &= 0xff;
 2519 
 2520     /* Worst case size. */
 2521 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2522     compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
 2523 #else
 2524     compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
 2525 #endif
 2526 
 2527     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2528     PTR_FAIL_IF_NULL(inst);
 2529 
 2530     *inst++ = 0;
 2531     *inst++ = 1;
 2532     return jump;
 2533 }
 2534 
 2535 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 2536 {
 2537     sljit_u8 *inst;
 2538     struct sljit_jump *jump;
 2539 
 2540     CHECK_ERROR();
 2541     CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 2542     ADJUST_LOCAL_OFFSET(src, srcw);
 2543 
 2544     CHECK_EXTRA_REGS(src, srcw, (void)0);
 2545 
 2546     if (src == SLJIT_IMM) {
 2547         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2548         FAIL_IF_NULL(jump);
 2549         set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
 2550         jump->u.target = srcw;
 2551 
 2552         /* Worst case size. */
 2553 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2554         compiler->size += 5;
 2555 #else
 2556         compiler->size += 10 + 3;
 2557 #endif
 2558 
 2559         inst = (sljit_u8*)ensure_buf(compiler, 2);
 2560         FAIL_IF_NULL(inst);
 2561 
 2562         *inst++ = 0;
 2563         *inst++ = 1;
 2564     }
 2565     else {
 2566 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2567         /* REX_W is not necessary (src is not immediate). */
 2568         compiler->mode32 = 1;
 2569 #endif
 2570         inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2571         FAIL_IF(!inst);
 2572         *inst++ = GROUP_FF;
 2573         *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
 2574     }
 2575     return SLJIT_SUCCESS;
 2576 }
 2577 
 2578 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
 2579     sljit_s32 dst, sljit_sw dstw,
 2580     sljit_s32 type)
 2581 {
 2582     sljit_u8 *inst;
 2583     sljit_u8 cond_set = 0;
 2584 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2585     sljit_s32 reg;
 2586 #endif
 2587     /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
 2588     sljit_s32 dst_save = dst;
 2589     sljit_sw dstw_save = dstw;
 2590 
 2591     CHECK_ERROR();
 2592     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 2593 
 2594     ADJUST_LOCAL_OFFSET(dst, dstw);
 2595     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2596 
 2597     type &= 0xff;
 2598     /* setcc = jcc + 0x10. */
 2599     cond_set = get_jump_code(type) + 0x10;
 2600 
 2601 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2602     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
 2603         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
 2604         FAIL_IF(!inst);
 2605         INC_SIZE(4 + 3);
 2606         /* Set low register to conditional flag. */
 2607         *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
 2608         *inst++ = GROUP_0F;
 2609         *inst++ = cond_set;
 2610         *inst++ = MOD_REG | reg_lmap[TMP_REG1];
 2611         *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
 2612         *inst++ = OR_rm8_r8;
 2613         *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
 2614         return SLJIT_SUCCESS;
 2615     }
 2616 
 2617     reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
 2618 
 2619     inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
 2620     FAIL_IF(!inst);
 2621     INC_SIZE(4 + 4);
 2622     /* Set low register to conditional flag. */
 2623     *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
 2624     *inst++ = GROUP_0F;
 2625     *inst++ = cond_set;
 2626     *inst++ = MOD_REG | reg_lmap[reg];
 2627     *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
 2628     /* The movzx instruction does not affect flags. */
 2629     *inst++ = GROUP_0F;
 2630     *inst++ = MOVZX_r_rm8;
 2631     *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
 2632 
 2633     if (reg != TMP_REG1)
 2634         return SLJIT_SUCCESS;
 2635 
 2636     if (GET_OPCODE(op) < SLJIT_ADD) {
 2637         compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
 2638         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2639     }
 2640 
 2641 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2642         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2643     compiler->skip_checks = 1;
 2644 #endif
 2645     return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 2646 
 2647 #else
 2648     /* The SLJIT_CONFIG_X86_32 code path starts here. */
 2649     if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
 2650         if (reg_map[dst] <= 4) {
 2651             /* Low byte is accessible. */
 2652             inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
 2653             FAIL_IF(!inst);
 2654             INC_SIZE(3 + 3);
 2655             /* Set low byte to conditional flag. */
 2656             *inst++ = GROUP_0F;
 2657             *inst++ = cond_set;
 2658             *inst++ = MOD_REG | reg_map[dst];
 2659 
 2660             *inst++ = GROUP_0F;
 2661             *inst++ = MOVZX_r_rm8;
 2662             *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
 2663             return SLJIT_SUCCESS;
 2664         }
 2665 
 2666         /* Low byte is not accessible. */
 2667         if (cpu_has_cmov == -1)
 2668             get_cpu_features();
 2669 
 2670         if (cpu_has_cmov) {
 2671             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
 2672             /* a xor reg, reg operation would overwrite the flags. */
 2673             EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
 2674 
 2675             inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
 2676             FAIL_IF(!inst);
 2677             INC_SIZE(3);
 2678 
 2679             *inst++ = GROUP_0F;
 2680             /* cmovcc = setcc - 0x50. */
 2681             *inst++ = cond_set - 0x50;
 2682             *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
 2683             return SLJIT_SUCCESS;
 2684         }
 2685 
 2686         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
 2687         FAIL_IF(!inst);
 2688         INC_SIZE(1 + 3 + 3 + 1);
 2689         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2690         /* Set al to conditional flag. */
 2691         *inst++ = GROUP_0F;
 2692         *inst++ = cond_set;
 2693         *inst++ = MOD_REG | 0 /* eax */;
 2694 
 2695         *inst++ = GROUP_0F;
 2696         *inst++ = MOVZX_r_rm8;
 2697         *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
 2698         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2699         return SLJIT_SUCCESS;
 2700     }
 2701 
 2702     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
 2703         SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
 2704 
 2705         if (dst != SLJIT_R0) {
 2706             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
 2707             FAIL_IF(!inst);
 2708             INC_SIZE(1 + 3 + 2 + 1);
 2709             /* Set low register to conditional flag. */
 2710             *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2711             *inst++ = GROUP_0F;
 2712             *inst++ = cond_set;
 2713             *inst++ = MOD_REG | 0 /* eax */;
 2714             *inst++ = OR_rm8_r8;
 2715             *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
 2716             *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2717         }
 2718         else {
 2719             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
 2720             FAIL_IF(!inst);
 2721             INC_SIZE(2 + 3 + 2 + 2);
 2722             /* Set low register to conditional flag. */
 2723             *inst++ = XCHG_r_rm;
 2724             *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
 2725             *inst++ = GROUP_0F;
 2726             *inst++ = cond_set;
 2727             *inst++ = MOD_REG | 1 /* ecx */;
 2728             *inst++ = OR_rm8_r8;
 2729             *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
 2730             *inst++ = XCHG_r_rm;
 2731             *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
 2732         }
 2733         return SLJIT_SUCCESS;
 2734     }
 2735 
 2736     /* Set TMP_REG1 to the bit. */
 2737     inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
 2738     FAIL_IF(!inst);
 2739     INC_SIZE(1 + 3 + 3 + 1);
 2740     *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2741     /* Set al to conditional flag. */
 2742     *inst++ = GROUP_0F;
 2743     *inst++ = cond_set;
 2744     *inst++ = MOD_REG | 0 /* eax */;
 2745 
 2746     *inst++ = GROUP_0F;
 2747     *inst++ = MOVZX_r_rm8;
 2748     *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
 2749 
 2750     *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
 2751 
 2752     if (GET_OPCODE(op) < SLJIT_ADD)
 2753         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 2754 
 2755 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2756         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2757     compiler->skip_checks = 1;
 2758 #endif
 2759     return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 2760 #endif /* SLJIT_CONFIG_X86_64 */
 2761 }
 2762 
 2763 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
 2764     sljit_s32 dst_reg,
 2765     sljit_s32 src, sljit_sw srcw)
 2766 {
 2767     sljit_u8* inst;
 2768 
 2769     CHECK_ERROR();
 2770     CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 2771 
 2772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2773     dst_reg &= ~SLJIT_I32_OP;
 2774 
 2775     if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
 2776         return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
 2777 #else
 2778     if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
 2779         return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
 2780 #endif
 2781 
 2782     /* ADJUST_LOCAL_OFFSET is not needed. */
 2783     CHECK_EXTRA_REGS(src, srcw, (void)0);
 2784 
 2785 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2786     compiler->mode32 = dst_reg & SLJIT_I32_OP;
 2787     dst_reg &= ~SLJIT_I32_OP;
 2788 #endif
 2789 
 2790     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 2791         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
 2792         src = TMP_REG1;
 2793         srcw = 0;
 2794     }
 2795 
 2796     inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
 2797     FAIL_IF(!inst);
 2798     *inst++ = GROUP_0F;
 2799     *inst = get_jump_code(type & 0xff) - 0x40;
 2800     return SLJIT_SUCCESS;
 2801 }
 2802 
 2803 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 2804 {
 2805     CHECK_ERROR();
 2806     CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
 2807     ADJUST_LOCAL_OFFSET(dst, dstw);
 2808 
 2809     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2810 
 2811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2812     compiler->mode32 = 0;
 2813 #endif
 2814 
 2815     ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
 2816 
 2817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2818     if (NOT_HALFWORD(offset)) {
 2819         FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
 2820 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2821         SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
 2822         return compiler->error;
 2823 #else
 2824         return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
 2825 #endif
 2826     }
 2827 #endif
 2828 
 2829     if (offset != 0)
 2830         return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
 2831     return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
 2832 }
 2833 
 2834 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 2835 {
 2836     sljit_u8 *inst;
 2837     struct sljit_const *const_;
 2838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2839     sljit_s32 reg;
 2840 #endif
 2841 
 2842     CHECK_ERROR_PTR();
 2843     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 2844     ADJUST_LOCAL_OFFSET(dst, dstw);
 2845 
 2846     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2847 
 2848     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2849     PTR_FAIL_IF(!const_);
 2850     set_const(const_, compiler);
 2851 
 2852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2853     compiler->mode32 = 0;
 2854     reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2855 
 2856     if (emit_load_imm64(compiler, reg, init_value))
 2857         return NULL;
 2858 #else
 2859     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
 2860         return NULL;
 2861 #endif
 2862 
 2863     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2864     PTR_FAIL_IF(!inst);
 2865 
 2866     *inst++ = 0;
 2867     *inst++ = 2;
 2868 
 2869 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2870     if (dst & SLJIT_MEM)
 2871         if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
 2872             return NULL;
 2873 #endif
 2874 
 2875     return const_;
 2876 }
 2877 
 2878 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 2879 {
 2880     struct sljit_put_label *put_label;
 2881     sljit_u8 *inst;
 2882 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2883     sljit_s32 reg;
 2884     sljit_uw start_size;
 2885 #endif
 2886 
 2887     CHECK_ERROR_PTR();
 2888     CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
 2889     ADJUST_LOCAL_OFFSET(dst, dstw);
 2890 
 2891     CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2892 
 2893     put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
 2894     PTR_FAIL_IF(!put_label);
 2895     set_put_label(put_label, compiler, 0);
 2896 
 2897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2898     compiler->mode32 = 0;
 2899     reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2900 
 2901     if (emit_load_imm64(compiler, reg, 0))
 2902         return NULL;
 2903 #else
 2904     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
 2905         return NULL;
 2906 #endif
 2907 
 2908 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2909     if (dst & SLJIT_MEM) {
 2910         start_size = compiler->size;
 2911         if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
 2912             return NULL;
 2913         put_label->flags = compiler->size - start_size;
 2914     }
 2915 #endif
 2916 
 2917     inst = (sljit_u8*)ensure_buf(compiler, 2);
 2918     PTR_FAIL_IF(!inst);
 2919 
 2920     *inst++ = 0;
 2921     *inst++ = 3;
 2922 
 2923     return put_label;
 2924 }
 2925 
 2926 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 2927 {
 2928     SLJIT_UNUSED_ARG(executable_offset);
 2929 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2930     sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
 2931 #else
 2932     sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
 2933 #endif
 2934 }
 2935 
 2936 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 2937 {
 2938     SLJIT_UNUSED_ARG(executable_offset);
 2939     sljit_unaligned_store_sw((void*)addr, new_constant);
 2940 }