"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.44/sljit/sljitNativeX86_64.c" (19 Nov 2019, 25528 Bytes) of package /linux/misc/pcre-8.44.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_64.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.43_vs_8.44.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 /* x86 64-bit arch dependent functions. */
   28 
   29 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
   30 {
   31     sljit_u8 *inst;
   32 
   33     inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
   34     FAIL_IF(!inst);
   35     INC_SIZE(2 + sizeof(sljit_sw));
   36     *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
   37     *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
   38     sljit_unaligned_store_sw(inst, imm);
   39     return SLJIT_SUCCESS;
   40 }
   41 
   42 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
   43 {
   44     sljit_s32 type = jump->flags >> TYPE_SHIFT;
   45 
   46     int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
   47 
   48     /* The relative jump below specialized for this case. */
   49     SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
   50 
   51     if (type < SLJIT_JUMP) {
   52         /* Invert type. */
   53         *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
   54         *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
   55     }
   56 
   57     *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
   58     *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
   59     jump->addr = (sljit_uw)code_ptr;
   60 
   61     if (jump->flags & JUMP_LABEL)
   62         jump->flags |= PATCH_MD;
   63     else if (short_addr)
   64         sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
   65     else
   66         sljit_unaligned_store_sw(code_ptr, jump->u.target);
   67 
   68     code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
   69 
   70     *code_ptr++ = REX_B;
   71     *code_ptr++ = GROUP_FF;
   72     *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
   73 
   74     return code_ptr;
   75 }
   76 
   77 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
   78 {
   79     if (max_label > HALFWORD_MAX) {
   80         put_label->addr -= put_label->flags;
   81         put_label->flags = PATCH_MD;
   82         return code_ptr;
   83     }
   84 
   85     if (put_label->flags == 0) {
   86         /* Destination is register. */
   87         code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
   88 
   89         SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
   90         SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
   91 
   92         if ((code_ptr[0] & 0x07) != 0) {
   93             code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
   94             code_ptr += 2 + sizeof(sljit_s32);
   95         }
   96         else {
   97             code_ptr[0] = code_ptr[1];
   98             code_ptr += 1 + sizeof(sljit_s32);
   99         }
  100 
  101         put_label->addr = (sljit_uw)code_ptr;
  102         return code_ptr;
  103     }
  104 
  105     code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
  106     SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
  107 
  108     SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  109 
  110     if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
  111         code_ptr += 2 + sizeof(sljit_uw);
  112         SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  113     }
  114 
  115     SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
  116 
  117     code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
  118     code_ptr[1] = MOV_rm_i32;
  119     code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
  120 
  121     code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
  122     put_label->addr = (sljit_uw)code_ptr;
  123     put_label->flags = 0;
  124     return code_ptr;
  125 }
  126 
  127 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  128     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  129     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  130 {
  131     sljit_s32 args, i, tmp, size, saved_register_size;
  132     sljit_u8 *inst;
  133 
  134     CHECK_ERROR();
  135     CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  136     set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  137 
  138     compiler->mode32 = 0;
  139 
  140 #ifdef _WIN64
  141     /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  142     if (fscratches >= 6 || fsaveds >= 1)
  143         compiler->locals_offset = 6 * sizeof(sljit_sw);
  144     else
  145         compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  146 #endif
  147 
  148     /* Including the return address saved by the call instruction. */
  149     saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  150 
  151     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  152     for (i = SLJIT_S0; i >= tmp; i--) {
  153         size = reg_map[i] >= 8 ? 2 : 1;
  154         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  155         FAIL_IF(!inst);
  156         INC_SIZE(size);
  157         if (reg_map[i] >= 8)
  158             *inst++ = REX_B;
  159         PUSH_REG(reg_lmap[i]);
  160     }
  161 
  162     for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  163         size = reg_map[i] >= 8 ? 2 : 1;
  164         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  165         FAIL_IF(!inst);
  166         INC_SIZE(size);
  167         if (reg_map[i] >= 8)
  168             *inst++ = REX_B;
  169         PUSH_REG(reg_lmap[i]);
  170     }
  171 
  172     args = get_arg_count(arg_types);
  173 
  174     if (args > 0) {
  175         size = args * 3;
  176         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  177         FAIL_IF(!inst);
  178 
  179         INC_SIZE(size);
  180 
  181 #ifndef _WIN64
  182         if (args > 0) {
  183             inst[0] = REX_W;
  184             inst[1] = MOV_r_rm;
  185             inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
  186             inst += 3;
  187         }
  188         if (args > 1) {
  189             inst[0] = REX_W | REX_R;
  190             inst[1] = MOV_r_rm;
  191             inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
  192             inst += 3;
  193         }
  194         if (args > 2) {
  195             inst[0] = REX_W | REX_R;
  196             inst[1] = MOV_r_rm;
  197             inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
  198         }
  199 #else
  200         if (args > 0) {
  201             inst[0] = REX_W;
  202             inst[1] = MOV_r_rm;
  203             inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
  204             inst += 3;
  205         }
  206         if (args > 1) {
  207             inst[0] = REX_W;
  208             inst[1] = MOV_r_rm;
  209             inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
  210             inst += 3;
  211         }
  212         if (args > 2) {
  213             inst[0] = REX_W | REX_B;
  214             inst[1] = MOV_r_rm;
  215             inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
  216         }
  217 #endif
  218     }
  219 
  220     local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  221     compiler->local_size = local_size;
  222 
  223 #ifdef _WIN64
  224     if (local_size > 0) {
  225         if (local_size <= 4 * 4096) {
  226             if (local_size > 4096)
  227                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
  228             if (local_size > 2 * 4096)
  229                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
  230             if (local_size > 3 * 4096)
  231                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
  232         }
  233         else {
  234             EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
  235             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
  236 
  237             SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
  238 
  239             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
  240             FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  241                 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
  242             FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  243                 TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
  244 
  245             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  246             FAIL_IF(!inst);
  247 
  248             INC_SIZE(2);
  249             inst[0] = JNE_i8;
  250             inst[1] = (sljit_s8) -19;
  251         }
  252 
  253         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
  254     }
  255 #endif
  256 
  257     if (local_size > 0) {
  258         FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  259             SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  260     }
  261 
  262 #ifdef _WIN64
  263     /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
  264     if (fscratches >= 6 || fsaveds >= 1) {
  265         inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  266         FAIL_IF(!inst);
  267         INC_SIZE(5);
  268         *inst++ = GROUP_0F;
  269         sljit_unaligned_store_s32(inst, 0x20247429);
  270     }
  271 #endif
  272 
  273     return SLJIT_SUCCESS;
  274 }
  275 
  276 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  277     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  278     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  279 {
  280     sljit_s32 saved_register_size;
  281 
  282     CHECK_ERROR();
  283     CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  284     set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  285 
  286 #ifdef _WIN64
  287     /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  288     if (fscratches >= 6 || fsaveds >= 1)
  289         compiler->locals_offset = 6 * sizeof(sljit_sw);
  290     else
  291         compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  292 #endif
  293 
  294     /* Including the return address saved by the call instruction. */
  295     saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  296     compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  297     return SLJIT_SUCCESS;
  298 }
  299 
  300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  301 {
  302     sljit_s32 i, tmp, size;
  303     sljit_u8 *inst;
  304 
  305     CHECK_ERROR();
  306     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  307 
  308     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  309 
  310 #ifdef _WIN64
  311     /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
  312     if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
  313         inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  314         FAIL_IF(!inst);
  315         INC_SIZE(5);
  316         *inst++ = GROUP_0F;
  317         sljit_unaligned_store_s32(inst, 0x20247428);
  318     }
  319 #endif
  320 
  321     if (compiler->local_size > 0) {
  322         if (compiler->local_size <= 127) {
  323             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  324             FAIL_IF(!inst);
  325             INC_SIZE(4);
  326             *inst++ = REX_W;
  327             *inst++ = GROUP_BINARY_83;
  328             *inst++ = MOD_REG | ADD | 4;
  329             *inst = compiler->local_size;
  330         }
  331         else {
  332             inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
  333             FAIL_IF(!inst);
  334             INC_SIZE(7);
  335             *inst++ = REX_W;
  336             *inst++ = GROUP_BINARY_81;
  337             *inst++ = MOD_REG | ADD | 4;
  338             sljit_unaligned_store_s32(inst, compiler->local_size);
  339         }
  340     }
  341 
  342     tmp = compiler->scratches;
  343     for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
  344         size = reg_map[i] >= 8 ? 2 : 1;
  345         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  346         FAIL_IF(!inst);
  347         INC_SIZE(size);
  348         if (reg_map[i] >= 8)
  349             *inst++ = REX_B;
  350         POP_REG(reg_lmap[i]);
  351     }
  352 
  353     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  354     for (i = tmp; i <= SLJIT_S0; i++) {
  355         size = reg_map[i] >= 8 ? 2 : 1;
  356         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  357         FAIL_IF(!inst);
  358         INC_SIZE(size);
  359         if (reg_map[i] >= 8)
  360             *inst++ = REX_B;
  361         POP_REG(reg_lmap[i]);
  362     }
  363 
  364     inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  365     FAIL_IF(!inst);
  366     INC_SIZE(1);
  367     RET();
  368     return SLJIT_SUCCESS;
  369 }
  370 
  371 /* --------------------------------------------------------------------- */
  372 /*  Operators                                                            */
  373 /* --------------------------------------------------------------------- */
  374 
  375 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
  376 {
  377     sljit_u8 *inst;
  378     sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
  379 
  380     inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
  381     FAIL_IF(!inst);
  382     INC_SIZE(length);
  383     if (rex)
  384         *inst++ = rex;
  385     *inst++ = opcode;
  386     sljit_unaligned_store_s32(inst, imm);
  387     return SLJIT_SUCCESS;
  388 }
  389 
  390 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
  391     /* The register or immediate operand. */
  392     sljit_s32 a, sljit_sw imma,
  393     /* The general operand (not immediate). */
  394     sljit_s32 b, sljit_sw immb)
  395 {
  396     sljit_u8 *inst;
  397     sljit_u8 *buf_ptr;
  398     sljit_u8 rex = 0;
  399     sljit_s32 flags = size & ~0xf;
  400     sljit_s32 inst_size;
  401 
  402     /* The immediate operand must be 32 bit. */
  403     SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  404     /* Both cannot be switched on. */
  405     SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  406     /* Size flags not allowed for typed instructions. */
  407     SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  408     /* Both size flags cannot be switched on. */
  409     SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  410     /* SSE2 and immediate is not possible. */
  411     SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  412     SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
  413         && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
  414         && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
  415 
  416     size &= 0xf;
  417     inst_size = size;
  418 
  419     if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  420         rex |= REX_W;
  421     else if (flags & EX86_REX)
  422         rex |= REX;
  423 
  424     if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
  425         inst_size++;
  426     if (flags & EX86_PREF_66)
  427         inst_size++;
  428 
  429     /* Calculate size of b. */
  430     inst_size += 1; /* mod r/m byte. */
  431     if (b & SLJIT_MEM) {
  432         if (!(b & OFFS_REG_MASK)) {
  433             if (NOT_HALFWORD(immb)) {
  434                 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
  435                 immb = 0;
  436                 if (b & REG_MASK)
  437                     b |= TO_OFFS_REG(TMP_REG2);
  438                 else
  439                     b |= TMP_REG2;
  440             }
  441             else if (reg_lmap[b & REG_MASK] == 4)
  442                 b |= TO_OFFS_REG(SLJIT_SP);
  443         }
  444 
  445         if ((b & REG_MASK) == SLJIT_UNUSED)
  446             inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
  447         else {
  448             if (reg_map[b & REG_MASK] >= 8)
  449                 rex |= REX_B;
  450 
  451             if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
  452                 /* Immediate operand. */
  453                 if (immb <= 127 && immb >= -128)
  454                     inst_size += sizeof(sljit_s8);
  455                 else
  456                     inst_size += sizeof(sljit_s32);
  457             }
  458             else if (reg_lmap[b & REG_MASK] == 5)
  459                 inst_size += sizeof(sljit_s8);
  460 
  461             if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
  462                 inst_size += 1; /* SIB byte. */
  463                 if (reg_map[OFFS_REG(b)] >= 8)
  464                     rex |= REX_X;
  465             }
  466         }
  467     }
  468     else if (!(flags & EX86_SSE2_OP2)) {
  469         if (reg_map[b] >= 8)
  470             rex |= REX_B;
  471     }
  472     else if (freg_map[b] >= 8)
  473         rex |= REX_B;
  474 
  475     if (a & SLJIT_IMM) {
  476         if (flags & EX86_BIN_INS) {
  477             if (imma <= 127 && imma >= -128) {
  478                 inst_size += 1;
  479                 flags |= EX86_BYTE_ARG;
  480             } else
  481                 inst_size += 4;
  482         }
  483         else if (flags & EX86_SHIFT_INS) {
  484             imma &= compiler->mode32 ? 0x1f : 0x3f;
  485             if (imma != 1) {
  486                 inst_size ++;
  487                 flags |= EX86_BYTE_ARG;
  488             }
  489         } else if (flags & EX86_BYTE_ARG)
  490             inst_size++;
  491         else if (flags & EX86_HALF_ARG)
  492             inst_size += sizeof(short);
  493         else
  494             inst_size += sizeof(sljit_s32);
  495     }
  496     else {
  497         SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  498         /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  499         if (!(flags & EX86_SSE2_OP1)) {
  500             if (reg_map[a] >= 8)
  501                 rex |= REX_R;
  502         }
  503         else if (freg_map[a] >= 8)
  504             rex |= REX_R;
  505     }
  506 
  507     if (rex)
  508         inst_size++;
  509 
  510     inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
  511     PTR_FAIL_IF(!inst);
  512 
  513     /* Encoding the byte. */
  514     INC_SIZE(inst_size);
  515     if (flags & EX86_PREF_F2)
  516         *inst++ = 0xf2;
  517     if (flags & EX86_PREF_F3)
  518         *inst++ = 0xf3;
  519     if (flags & EX86_PREF_66)
  520         *inst++ = 0x66;
  521     if (rex)
  522         *inst++ = rex;
  523     buf_ptr = inst + size;
  524 
  525     /* Encode mod/rm byte. */
  526     if (!(flags & EX86_SHIFT_INS)) {
  527         if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  528             *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
  529 
  530         if (a & SLJIT_IMM)
  531             *buf_ptr = 0;
  532         else if (!(flags & EX86_SSE2_OP1))
  533             *buf_ptr = reg_lmap[a] << 3;
  534         else
  535             *buf_ptr = freg_lmap[a] << 3;
  536     }
  537     else {
  538         if (a & SLJIT_IMM) {
  539             if (imma == 1)
  540                 *inst = GROUP_SHIFT_1;
  541             else
  542                 *inst = GROUP_SHIFT_N;
  543         } else
  544             *inst = GROUP_SHIFT_CL;
  545         *buf_ptr = 0;
  546     }
  547 
  548     if (!(b & SLJIT_MEM))
  549         *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]);
  550     else if ((b & REG_MASK) != SLJIT_UNUSED) {
  551         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
  552             if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  553                 if (immb <= 127 && immb >= -128)
  554                     *buf_ptr |= 0x40;
  555                 else
  556                     *buf_ptr |= 0x80;
  557             }
  558 
  559             if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
  560                 *buf_ptr++ |= reg_lmap[b & REG_MASK];
  561             else {
  562                 *buf_ptr++ |= 0x04;
  563                 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
  564             }
  565 
  566             if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  567                 if (immb <= 127 && immb >= -128)
  568                     *buf_ptr++ = immb; /* 8 bit displacement. */
  569                 else {
  570                     sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  571                     buf_ptr += sizeof(sljit_s32);
  572                 }
  573             }
  574         }
  575         else {
  576             if (reg_lmap[b & REG_MASK] == 5)
  577                 *buf_ptr |= 0x40;
  578             *buf_ptr++ |= 0x04;
  579             *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
  580             if (reg_lmap[b & REG_MASK] == 5)
  581                 *buf_ptr++ = 0;
  582         }
  583     }
  584     else {
  585         *buf_ptr++ |= 0x04;
  586         *buf_ptr++ = 0x25;
  587         sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  588         buf_ptr += sizeof(sljit_s32);
  589     }
  590 
  591     if (a & SLJIT_IMM) {
  592         if (flags & EX86_BYTE_ARG)
  593             *buf_ptr = imma;
  594         else if (flags & EX86_HALF_ARG)
  595             sljit_unaligned_store_s16(buf_ptr, imma);
  596         else if (!(flags & EX86_SHIFT_INS))
  597             sljit_unaligned_store_s32(buf_ptr, imma);
  598     }
  599 
  600     return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
  601 }
  602 
  603 /* --------------------------------------------------------------------- */
  604 /*  Call / return instructions                                           */
  605 /* --------------------------------------------------------------------- */
  606 
  607 #ifndef _WIN64
  608 
  609 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  610 {
  611     sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  612     sljit_s32 word_arg_count = 0;
  613 
  614     SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
  615 
  616     compiler->mode32 = 0;
  617 
  618     /* Remove return value. */
  619     arg_types >>= SLJIT_DEF_SHIFT;
  620 
  621     while (arg_types) {
  622         if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
  623             word_arg_count++;
  624         arg_types >>= SLJIT_DEF_SHIFT;
  625     }
  626 
  627     if (word_arg_count == 0)
  628         return SLJIT_SUCCESS;
  629 
  630     if (src & SLJIT_MEM) {
  631         ADJUST_LOCAL_OFFSET(src, srcw);
  632         EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  633         *src_ptr = TMP_REG2;
  634     }
  635     else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2)
  636         *src_ptr = TMP_REG1;
  637 
  638     if (word_arg_count >= 3)
  639         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
  640     return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
  641 }
  642 
  643 #else
  644 
  645 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  646 {
  647     sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  648     sljit_s32 arg_count = 0;
  649     sljit_s32 word_arg_count = 0;
  650     sljit_s32 float_arg_count = 0;
  651     sljit_s32 types = 0;
  652     sljit_s32 data_trandfer = 0;
  653     static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
  654 
  655     SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
  656 
  657     compiler->mode32 = 0;
  658     arg_types >>= SLJIT_DEF_SHIFT;
  659 
  660     while (arg_types) {
  661         types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
  662 
  663         switch (arg_types & SLJIT_DEF_MASK) {
  664         case SLJIT_ARG_TYPE_F32:
  665         case SLJIT_ARG_TYPE_F64:
  666             arg_count++;
  667             float_arg_count++;
  668 
  669             if (arg_count != float_arg_count)
  670                 data_trandfer = 1;
  671             break;
  672         default:
  673             arg_count++;
  674             word_arg_count++;
  675 
  676             if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
  677                 data_trandfer = 1;
  678 
  679                 if (src == word_arg_regs[arg_count]) {
  680                     EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
  681                     *src_ptr = TMP_REG2;
  682                 }
  683             }
  684             break;
  685         }
  686 
  687         arg_types >>= SLJIT_DEF_SHIFT;
  688     }
  689 
  690     if (!data_trandfer)
  691         return SLJIT_SUCCESS;
  692 
  693     if (src & SLJIT_MEM) {
  694         ADJUST_LOCAL_OFFSET(src, srcw);
  695         EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  696         *src_ptr = TMP_REG2;
  697     }
  698 
  699     while (types) {
  700         switch (types & SLJIT_DEF_MASK) {
  701         case SLJIT_ARG_TYPE_F32:
  702             if (arg_count != float_arg_count)
  703                 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
  704             arg_count--;
  705             float_arg_count--;
  706             break;
  707         case SLJIT_ARG_TYPE_F64:
  708             if (arg_count != float_arg_count)
  709                 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
  710             arg_count--;
  711             float_arg_count--;
  712             break;
  713         default:
  714             if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
  715                 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
  716             arg_count--;
  717             word_arg_count--;
  718             break;
  719         }
  720 
  721         types >>= SLJIT_DEF_SHIFT;
  722     }
  723 
  724     return SLJIT_SUCCESS;
  725 }
  726 
  727 #endif
  728 
  729 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
  730     sljit_s32 arg_types)
  731 {
  732     CHECK_ERROR_PTR();
  733     CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
  734 
  735     PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0));
  736 
  737 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  738         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  739     compiler->skip_checks = 1;
  740 #endif
  741 
  742     return sljit_emit_jump(compiler, type);
  743 }
  744 
  745 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
  746     sljit_s32 arg_types,
  747     sljit_s32 src, sljit_sw srcw)
  748 {
  749     CHECK_ERROR();
  750     CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
  751 
  752     FAIL_IF(call_with_args(compiler, arg_types, &src, srcw));
  753 
  754 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  755         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  756     compiler->skip_checks = 1;
  757 #endif
  758 
  759     return sljit_emit_ijump(compiler, type, src, srcw);
  760 }
  761 
  762 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  763 {
  764     sljit_u8 *inst;
  765 
  766     CHECK_ERROR();
  767     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  768     ADJUST_LOCAL_OFFSET(dst, dstw);
  769 
  770     /* For UNUSED dst. Uncommon, but possible. */
  771     if (dst == SLJIT_UNUSED)
  772         dst = TMP_REG1;
  773 
  774     if (FAST_IS_REG(dst)) {
  775         if (reg_map[dst] < 8) {
  776             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  777             FAIL_IF(!inst);
  778             INC_SIZE(1);
  779             POP_REG(reg_lmap[dst]);
  780             return SLJIT_SUCCESS;
  781         }
  782 
  783         inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  784         FAIL_IF(!inst);
  785         INC_SIZE(2);
  786         *inst++ = REX_B;
  787         POP_REG(reg_lmap[dst]);
  788         return SLJIT_SUCCESS;
  789     }
  790 
  791     /* REX_W is not necessary (src is not immediate). */
  792     compiler->mode32 = 1;
  793     inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  794     FAIL_IF(!inst);
  795     *inst++ = POP_rm;
  796     return SLJIT_SUCCESS;
  797 }
  798 
  799 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
  800 {
  801     sljit_u8 *inst;
  802 
  803     CHECK_ERROR();
  804     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
  805     ADJUST_LOCAL_OFFSET(src, srcw);
  806 
  807     if (FAST_IS_REG(src)) {
  808         if (reg_map[src] < 8) {
  809             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
  810             FAIL_IF(!inst);
  811 
  812             INC_SIZE(1 + 1);
  813             PUSH_REG(reg_lmap[src]);
  814         }
  815         else {
  816             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
  817             FAIL_IF(!inst);
  818 
  819             INC_SIZE(2 + 1);
  820             *inst++ = REX_B;
  821             PUSH_REG(reg_lmap[src]);
  822         }
  823     }
  824     else {
  825         /* REX_W is not necessary (src is not immediate). */
  826         compiler->mode32 = 1;
  827         inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  828         FAIL_IF(!inst);
  829         *inst++ = GROUP_FF;
  830         *inst |= PUSH_rm;
  831 
  832         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  833         FAIL_IF(!inst);
  834         INC_SIZE(1);
  835     }
  836 
  837     RET();
  838     return SLJIT_SUCCESS;
  839 }
  840 
  841 /* --------------------------------------------------------------------- */
  842 /*  Extend input                                                         */
  843 /* --------------------------------------------------------------------- */
  844 
  845 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
  846     sljit_s32 dst, sljit_sw dstw,
  847     sljit_s32 src, sljit_sw srcw)
  848 {
  849     sljit_u8* inst;
  850     sljit_s32 dst_r;
  851 
  852     compiler->mode32 = 0;
  853 
  854     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  855         return SLJIT_SUCCESS; /* Empty instruction. */
  856 
  857     if (src & SLJIT_IMM) {
  858         if (FAST_IS_REG(dst)) {
  859             if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  860                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  861                 FAIL_IF(!inst);
  862                 *inst = MOV_rm_i32;
  863                 return SLJIT_SUCCESS;
  864             }
  865             return emit_load_imm64(compiler, dst, srcw);
  866         }
  867         compiler->mode32 = 1;
  868         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  869         FAIL_IF(!inst);
  870         *inst = MOV_rm_i32;
  871         compiler->mode32 = 0;
  872         return SLJIT_SUCCESS;
  873     }
  874 
  875     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  876 
  877     if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  878         dst_r = src;
  879     else {
  880         if (sign) {
  881             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  882             FAIL_IF(!inst);
  883             *inst++ = MOVSXD_r_rm;
  884         } else {
  885             compiler->mode32 = 1;
  886             FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  887             compiler->mode32 = 0;
  888         }
  889     }
  890 
  891     if (dst & SLJIT_MEM) {
  892         compiler->mode32 = 1;
  893         inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  894         FAIL_IF(!inst);
  895         *inst = MOV_rm_r;
  896         compiler->mode32 = 0;
  897     }
  898 
  899     return SLJIT_SUCCESS;
  900 }