"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.42/sljit/sljitNativeX86_64.c" (13 Mar 2018, 24103 Bytes) of package /linux/misc/pcre-8.42.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_64.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.41_vs_8.42.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 /* x86 64-bit arch dependent functions. */
   28 
   29 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
   30 {
   31     sljit_u8 *inst;
   32 
   33     inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
   34     FAIL_IF(!inst);
   35     INC_SIZE(2 + sizeof(sljit_sw));
   36     *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
   37     *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
   38     sljit_unaligned_store_sw(inst, imm);
   39     return SLJIT_SUCCESS;
   40 }
   41 
   42 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
   43 {
   44     int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
   45 
   46     /* The relative jump below specialized for this case. */
   47     SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
   48 
   49     if (type < SLJIT_JUMP) {
   50         /* Invert type. */
   51         *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
   52         *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
   53     }
   54 
   55     *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
   56     *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
   57     jump->addr = (sljit_uw)code_ptr;
   58 
   59     if (jump->flags & JUMP_LABEL)
   60         jump->flags |= PATCH_MD;
   61     else if (short_addr)
   62         sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
   63     else
   64         sljit_unaligned_store_sw(code_ptr, jump->u.target);
   65 
   66     code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
   67 
   68     *code_ptr++ = REX_B;
   69     *code_ptr++ = GROUP_FF;
   70     *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
   71 
   72     return code_ptr;
   73 }
   74 
   75 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
   76     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
   77     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
   78 {
   79     sljit_s32 args, i, tmp, size, saved_register_size;
   80     sljit_u8 *inst;
   81 
   82     CHECK_ERROR();
   83     CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
   84     set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
   85 
   86     compiler->mode32 = 0;
   87 
   88 #ifdef _WIN64
   89     /* Two/four register slots for parameters plus space for xmm6 register if needed. */
   90     if (fscratches >= 6 || fsaveds >= 1)
   91         compiler->locals_offset = 6 * sizeof(sljit_sw);
   92     else
   93         compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
   94 #endif
   95 
   96     /* Including the return address saved by the call instruction. */
   97     saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
   98 
   99     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  100     for (i = SLJIT_S0; i >= tmp; i--) {
  101         size = reg_map[i] >= 8 ? 2 : 1;
  102         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  103         FAIL_IF(!inst);
  104         INC_SIZE(size);
  105         if (reg_map[i] >= 8)
  106             *inst++ = REX_B;
  107         PUSH_REG(reg_lmap[i]);
  108     }
  109 
  110     for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  111         size = reg_map[i] >= 8 ? 2 : 1;
  112         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  113         FAIL_IF(!inst);
  114         INC_SIZE(size);
  115         if (reg_map[i] >= 8)
  116             *inst++ = REX_B;
  117         PUSH_REG(reg_lmap[i]);
  118     }
  119 
  120     args = get_arg_count(arg_types);
  121 
  122     if (args > 0) {
  123         size = args * 3;
  124         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  125         FAIL_IF(!inst);
  126 
  127         INC_SIZE(size);
  128 
  129 #ifndef _WIN64
  130         if (args > 0) {
  131             inst[0] = REX_W;
  132             inst[1] = MOV_r_rm;
  133             inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
  134             inst += 3;
  135         }
  136         if (args > 1) {
  137             inst[0] = REX_W | REX_R;
  138             inst[1] = MOV_r_rm;
  139             inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
  140             inst += 3;
  141         }
  142         if (args > 2) {
  143             inst[0] = REX_W | REX_R;
  144             inst[1] = MOV_r_rm;
  145             inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
  146         }
  147 #else
  148         if (args > 0) {
  149             inst[0] = REX_W;
  150             inst[1] = MOV_r_rm;
  151             inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
  152             inst += 3;
  153         }
  154         if (args > 1) {
  155             inst[0] = REX_W;
  156             inst[1] = MOV_r_rm;
  157             inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
  158             inst += 3;
  159         }
  160         if (args > 2) {
  161             inst[0] = REX_W | REX_B;
  162             inst[1] = MOV_r_rm;
  163             inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
  164         }
  165 #endif
  166     }
  167 
  168     local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  169     compiler->local_size = local_size;
  170 
  171 #ifdef _WIN64
  172     if (local_size > 0) {
  173         if (local_size <= 4 * 4096) {
  174             if (local_size > 4096)
  175                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
  176             if (local_size > 2 * 4096)
  177                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
  178             if (local_size > 3 * 4096)
  179                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
  180         }
  181         else {
  182             EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
  183             EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
  184 
  185             SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
  186 
  187             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
  188             FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  189                 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
  190             FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  191                 TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
  192 
  193             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  194             FAIL_IF(!inst);
  195 
  196             INC_SIZE(2);
  197             inst[0] = JNE_i8;
  198             inst[1] = (sljit_s8) -19;
  199         }
  200 
  201         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
  202     }
  203 #endif
  204 
  205     if (local_size > 0) {
  206         FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  207             SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  208     }
  209 
  210 #ifdef _WIN64
  211     /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
  212     if (fscratches >= 6 || fsaveds >= 1) {
  213         inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  214         FAIL_IF(!inst);
  215         INC_SIZE(5);
  216         *inst++ = GROUP_0F;
  217         sljit_unaligned_store_s32(inst, 0x20247429);
  218     }
  219 #endif
  220 
  221     return SLJIT_SUCCESS;
  222 }
  223 
  224 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  225     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  226     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  227 {
  228     sljit_s32 saved_register_size;
  229 
  230     CHECK_ERROR();
  231     CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  232     set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  233 
  234 #ifdef _WIN64
  235     /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  236     if (fscratches >= 6 || fsaveds >= 1)
  237         compiler->locals_offset = 6 * sizeof(sljit_sw);
  238     else
  239         compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  240 #endif
  241 
  242     /* Including the return address saved by the call instruction. */
  243     saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  244     compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  245     return SLJIT_SUCCESS;
  246 }
  247 
  248 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  249 {
  250     sljit_s32 i, tmp, size;
  251     sljit_u8 *inst;
  252 
  253     CHECK_ERROR();
  254     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  255 
  256     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  257 
  258 #ifdef _WIN64
  259     /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
  260     if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
  261         inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  262         FAIL_IF(!inst);
  263         INC_SIZE(5);
  264         *inst++ = GROUP_0F;
  265         sljit_unaligned_store_s32(inst, 0x20247428);
  266     }
  267 #endif
  268 
  269     if (compiler->local_size > 0) {
  270         if (compiler->local_size <= 127) {
  271             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  272             FAIL_IF(!inst);
  273             INC_SIZE(4);
  274             *inst++ = REX_W;
  275             *inst++ = GROUP_BINARY_83;
  276             *inst++ = MOD_REG | ADD | 4;
  277             *inst = compiler->local_size;
  278         }
  279         else {
  280             inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
  281             FAIL_IF(!inst);
  282             INC_SIZE(7);
  283             *inst++ = REX_W;
  284             *inst++ = GROUP_BINARY_81;
  285             *inst++ = MOD_REG | ADD | 4;
  286             sljit_unaligned_store_s32(inst, compiler->local_size);
  287         }
  288     }
  289 
  290     tmp = compiler->scratches;
  291     for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
  292         size = reg_map[i] >= 8 ? 2 : 1;
  293         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  294         FAIL_IF(!inst);
  295         INC_SIZE(size);
  296         if (reg_map[i] >= 8)
  297             *inst++ = REX_B;
  298         POP_REG(reg_lmap[i]);
  299     }
  300 
  301     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  302     for (i = tmp; i <= SLJIT_S0; i++) {
  303         size = reg_map[i] >= 8 ? 2 : 1;
  304         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  305         FAIL_IF(!inst);
  306         INC_SIZE(size);
  307         if (reg_map[i] >= 8)
  308             *inst++ = REX_B;
  309         POP_REG(reg_lmap[i]);
  310     }
  311 
  312     inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  313     FAIL_IF(!inst);
  314     INC_SIZE(1);
  315     RET();
  316     return SLJIT_SUCCESS;
  317 }
  318 
  319 /* --------------------------------------------------------------------- */
  320 /*  Operators                                                            */
  321 /* --------------------------------------------------------------------- */
  322 
  323 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
  324 {
  325     sljit_u8 *inst;
  326     sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
  327 
  328     inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
  329     FAIL_IF(!inst);
  330     INC_SIZE(length);
  331     if (rex)
  332         *inst++ = rex;
  333     *inst++ = opcode;
  334     sljit_unaligned_store_s32(inst, imm);
  335     return SLJIT_SUCCESS;
  336 }
  337 
  338 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
  339     /* The register or immediate operand. */
  340     sljit_s32 a, sljit_sw imma,
  341     /* The general operand (not immediate). */
  342     sljit_s32 b, sljit_sw immb)
  343 {
  344     sljit_u8 *inst;
  345     sljit_u8 *buf_ptr;
  346     sljit_u8 rex = 0;
  347     sljit_s32 flags = size & ~0xf;
  348     sljit_s32 inst_size;
  349 
  350     /* The immediate operand must be 32 bit. */
  351     SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  352     /* Both cannot be switched on. */
  353     SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  354     /* Size flags not allowed for typed instructions. */
  355     SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  356     /* Both size flags cannot be switched on. */
  357     SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  358     /* SSE2 and immediate is not possible. */
  359     SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  360     SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
  361         && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
  362         && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
  363 
  364     size &= 0xf;
  365     inst_size = size;
  366 
  367     if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  368         rex |= REX_W;
  369     else if (flags & EX86_REX)
  370         rex |= REX;
  371 
  372     if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
  373         inst_size++;
  374     if (flags & EX86_PREF_66)
  375         inst_size++;
  376 
  377     /* Calculate size of b. */
  378     inst_size += 1; /* mod r/m byte. */
  379     if (b & SLJIT_MEM) {
  380         if (!(b & OFFS_REG_MASK)) {
  381             if (NOT_HALFWORD(immb)) {
  382                 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
  383                 immb = 0;
  384                 if (b & REG_MASK)
  385                     b |= TO_OFFS_REG(TMP_REG2);
  386                 else
  387                     b |= TMP_REG2;
  388             }
  389             else if (reg_lmap[b & REG_MASK] == 4)
  390                 b |= TO_OFFS_REG(SLJIT_SP);
  391         }
  392 
  393         if ((b & REG_MASK) == SLJIT_UNUSED)
  394             inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
  395         else {
  396             if (reg_map[b & REG_MASK] >= 8)
  397                 rex |= REX_B;
  398 
  399             if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
  400                 /* Immediate operand. */
  401                 if (immb <= 127 && immb >= -128)
  402                     inst_size += sizeof(sljit_s8);
  403                 else
  404                     inst_size += sizeof(sljit_s32);
  405             }
  406             else if (reg_lmap[b & REG_MASK] == 5)
  407                 inst_size += sizeof(sljit_s8);
  408 
  409             if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
  410                 inst_size += 1; /* SIB byte. */
  411                 if (reg_map[OFFS_REG(b)] >= 8)
  412                     rex |= REX_X;
  413             }
  414         }
  415     }
  416     else if (!(flags & EX86_SSE2_OP2)) {
  417         if (reg_map[b] >= 8)
  418             rex |= REX_B;
  419     }
  420     else if (freg_map[b] >= 8)
  421         rex |= REX_B;
  422 
  423     if (a & SLJIT_IMM) {
  424         if (flags & EX86_BIN_INS) {
  425             if (imma <= 127 && imma >= -128) {
  426                 inst_size += 1;
  427                 flags |= EX86_BYTE_ARG;
  428             } else
  429                 inst_size += 4;
  430         }
  431         else if (flags & EX86_SHIFT_INS) {
  432             imma &= compiler->mode32 ? 0x1f : 0x3f;
  433             if (imma != 1) {
  434                 inst_size ++;
  435                 flags |= EX86_BYTE_ARG;
  436             }
  437         } else if (flags & EX86_BYTE_ARG)
  438             inst_size++;
  439         else if (flags & EX86_HALF_ARG)
  440             inst_size += sizeof(short);
  441         else
  442             inst_size += sizeof(sljit_s32);
  443     }
  444     else {
  445         SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  446         /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  447         if (!(flags & EX86_SSE2_OP1)) {
  448             if (reg_map[a] >= 8)
  449                 rex |= REX_R;
  450         }
  451         else if (freg_map[a] >= 8)
  452             rex |= REX_R;
  453     }
  454 
  455     if (rex)
  456         inst_size++;
  457 
  458     inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
  459     PTR_FAIL_IF(!inst);
  460 
  461     /* Encoding the byte. */
  462     INC_SIZE(inst_size);
  463     if (flags & EX86_PREF_F2)
  464         *inst++ = 0xf2;
  465     if (flags & EX86_PREF_F3)
  466         *inst++ = 0xf3;
  467     if (flags & EX86_PREF_66)
  468         *inst++ = 0x66;
  469     if (rex)
  470         *inst++ = rex;
  471     buf_ptr = inst + size;
  472 
  473     /* Encode mod/rm byte. */
  474     if (!(flags & EX86_SHIFT_INS)) {
  475         if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  476             *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
  477 
  478         if (a & SLJIT_IMM)
  479             *buf_ptr = 0;
  480         else if (!(flags & EX86_SSE2_OP1))
  481             *buf_ptr = reg_lmap[a] << 3;
  482         else
  483             *buf_ptr = freg_lmap[a] << 3;
  484     }
  485     else {
  486         if (a & SLJIT_IMM) {
  487             if (imma == 1)
  488                 *inst = GROUP_SHIFT_1;
  489             else
  490                 *inst = GROUP_SHIFT_N;
  491         } else
  492             *inst = GROUP_SHIFT_CL;
  493         *buf_ptr = 0;
  494     }
  495 
  496     if (!(b & SLJIT_MEM))
  497         *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]);
  498     else if ((b & REG_MASK) != SLJIT_UNUSED) {
  499         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
  500             if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  501                 if (immb <= 127 && immb >= -128)
  502                     *buf_ptr |= 0x40;
  503                 else
  504                     *buf_ptr |= 0x80;
  505             }
  506 
  507             if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
  508                 *buf_ptr++ |= reg_lmap[b & REG_MASK];
  509             else {
  510                 *buf_ptr++ |= 0x04;
  511                 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
  512             }
  513 
  514             if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  515                 if (immb <= 127 && immb >= -128)
  516                     *buf_ptr++ = immb; /* 8 bit displacement. */
  517                 else {
  518                     sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  519                     buf_ptr += sizeof(sljit_s32);
  520                 }
  521             }
  522         }
  523         else {
  524             if (reg_lmap[b & REG_MASK] == 5)
  525                 *buf_ptr |= 0x40;
  526             *buf_ptr++ |= 0x04;
  527             *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
  528             if (reg_lmap[b & REG_MASK] == 5)
  529                 *buf_ptr++ = 0;
  530         }
  531     }
  532     else {
  533         *buf_ptr++ |= 0x04;
  534         *buf_ptr++ = 0x25;
  535         sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  536         buf_ptr += sizeof(sljit_s32);
  537     }
  538 
  539     if (a & SLJIT_IMM) {
  540         if (flags & EX86_BYTE_ARG)
  541             *buf_ptr = imma;
  542         else if (flags & EX86_HALF_ARG)
  543             sljit_unaligned_store_s16(buf_ptr, imma);
  544         else if (!(flags & EX86_SHIFT_INS))
  545             sljit_unaligned_store_s32(buf_ptr, imma);
  546     }
  547 
  548     return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
  549 }
  550 
  551 /* --------------------------------------------------------------------- */
  552 /*  Call / return instructions                                           */
  553 /* --------------------------------------------------------------------- */
  554 
  555 #ifndef _WIN64
  556 
  557 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  558 {
  559     sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  560     sljit_s32 word_arg_count = 0;
  561 
  562     SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
  563 
  564     compiler->mode32 = 0;
  565 
  566     /* Remove return value. */
  567     arg_types >>= SLJIT_DEF_SHIFT;
  568 
  569     while (arg_types) {
  570         if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
  571             word_arg_count++;
  572         arg_types >>= SLJIT_DEF_SHIFT;
  573     }
  574 
  575     if (word_arg_count == 0)
  576         return SLJIT_SUCCESS;
  577 
  578     if (src & SLJIT_MEM) {
  579         ADJUST_LOCAL_OFFSET(src, srcw);
  580         EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  581         *src_ptr = TMP_REG2;
  582     }
  583     else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2)
  584         *src_ptr = TMP_REG1;
  585 
  586     if (word_arg_count >= 3)
  587         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
  588     return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
  589 }
  590 
  591 #else
  592 
  593 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  594 {
  595     sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  596     sljit_s32 arg_count = 0;
  597     sljit_s32 word_arg_count = 0;
  598     sljit_s32 float_arg_count = 0;
  599     sljit_s32 types = 0;
  600     sljit_s32 data_trandfer = 0;
  601     static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
  602 
  603     SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
  604 
  605     compiler->mode32 = 0;
  606     arg_types >>= SLJIT_DEF_SHIFT;
  607 
  608     while (arg_types) {
  609         types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
  610 
  611         switch (arg_types & SLJIT_DEF_MASK) {
  612         case SLJIT_ARG_TYPE_F32:
  613         case SLJIT_ARG_TYPE_F64:
  614             arg_count++;
  615             float_arg_count++;
  616 
  617             if (arg_count != float_arg_count)
  618                 data_trandfer = 1;
  619             break;
  620         default:
  621             arg_count++;
  622             word_arg_count++;
  623 
  624             if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
  625                 data_trandfer = 1;
  626 
  627                 if (src == word_arg_regs[arg_count]) {
  628                     EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
  629                     *src_ptr = TMP_REG2;
  630                 }
  631             }
  632             break;
  633         }
  634 
  635         arg_types >>= SLJIT_DEF_SHIFT;
  636     }
  637 
  638     if (!data_trandfer)
  639         return SLJIT_SUCCESS;
  640 
  641     if (src & SLJIT_MEM) {
  642         ADJUST_LOCAL_OFFSET(src, srcw);
  643         EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  644         *src_ptr = TMP_REG2;
  645     }
  646 
  647     while (types) {
  648         switch (types & SLJIT_DEF_MASK) {
  649         case SLJIT_ARG_TYPE_F32:
  650             if (arg_count != float_arg_count)
  651                 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
  652             arg_count--;
  653             float_arg_count--;
  654             break;
  655         case SLJIT_ARG_TYPE_F64:
  656             if (arg_count != float_arg_count)
  657                 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
  658             arg_count--;
  659             float_arg_count--;
  660             break;
  661         default:
  662             if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
  663                 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
  664             arg_count--;
  665             word_arg_count--;
  666             break;
  667         }
  668 
  669         types >>= SLJIT_DEF_SHIFT;
  670     }
  671 
  672     return SLJIT_SUCCESS;
  673 }
  674 
  675 #endif
  676 
  677 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
  678     sljit_s32 arg_types)
  679 {
  680     CHECK_ERROR_PTR();
  681     CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
  682 
  683     PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0));
  684 
  685 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  686         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  687     compiler->skip_checks = 1;
  688 #endif
  689 
  690     return sljit_emit_jump(compiler, type);
  691 }
  692 
  693 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
  694     sljit_s32 arg_types,
  695     sljit_s32 src, sljit_sw srcw)
  696 {
  697     CHECK_ERROR();
  698     CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
  699 
  700     FAIL_IF(call_with_args(compiler, arg_types, &src, srcw));
  701 
  702 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  703         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  704     compiler->skip_checks = 1;
  705 #endif
  706 
  707     return sljit_emit_ijump(compiler, type, src, srcw);
  708 }
  709 
  710 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  711 {
  712     sljit_u8 *inst;
  713 
  714     CHECK_ERROR();
  715     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  716     ADJUST_LOCAL_OFFSET(dst, dstw);
  717 
  718     /* For UNUSED dst. Uncommon, but possible. */
  719     if (dst == SLJIT_UNUSED)
  720         dst = TMP_REG1;
  721 
  722     if (FAST_IS_REG(dst)) {
  723         if (reg_map[dst] < 8) {
  724             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  725             FAIL_IF(!inst);
  726             INC_SIZE(1);
  727             POP_REG(reg_lmap[dst]);
  728             return SLJIT_SUCCESS;
  729         }
  730 
  731         inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  732         FAIL_IF(!inst);
  733         INC_SIZE(2);
  734         *inst++ = REX_B;
  735         POP_REG(reg_lmap[dst]);
  736         return SLJIT_SUCCESS;
  737     }
  738 
  739     /* REX_W is not necessary (src is not immediate). */
  740     compiler->mode32 = 1;
  741     inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  742     FAIL_IF(!inst);
  743     *inst++ = POP_rm;
  744     return SLJIT_SUCCESS;
  745 }
  746 
  747 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
  748 {
  749     sljit_u8 *inst;
  750 
  751     CHECK_ERROR();
  752     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
  753     ADJUST_LOCAL_OFFSET(src, srcw);
  754 
  755     if (FAST_IS_REG(src)) {
  756         if (reg_map[src] < 8) {
  757             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
  758             FAIL_IF(!inst);
  759 
  760             INC_SIZE(1 + 1);
  761             PUSH_REG(reg_lmap[src]);
  762         }
  763         else {
  764             inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
  765             FAIL_IF(!inst);
  766 
  767             INC_SIZE(2 + 1);
  768             *inst++ = REX_B;
  769             PUSH_REG(reg_lmap[src]);
  770         }
  771     }
  772     else {
  773         /* REX_W is not necessary (src is not immediate). */
  774         compiler->mode32 = 1;
  775         inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  776         FAIL_IF(!inst);
  777         *inst++ = GROUP_FF;
  778         *inst |= PUSH_rm;
  779 
  780         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  781         FAIL_IF(!inst);
  782         INC_SIZE(1);
  783     }
  784 
  785     RET();
  786     return SLJIT_SUCCESS;
  787 }
  788 
  789 /* --------------------------------------------------------------------- */
  790 /*  Extend input                                                         */
  791 /* --------------------------------------------------------------------- */
  792 
  793 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
  794     sljit_s32 dst, sljit_sw dstw,
  795     sljit_s32 src, sljit_sw srcw)
  796 {
  797     sljit_u8* inst;
  798     sljit_s32 dst_r;
  799 
  800     compiler->mode32 = 0;
  801 
  802     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  803         return SLJIT_SUCCESS; /* Empty instruction. */
  804 
  805     if (src & SLJIT_IMM) {
  806         if (FAST_IS_REG(dst)) {
  807             if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  808                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  809                 FAIL_IF(!inst);
  810                 *inst = MOV_rm_i32;
  811                 return SLJIT_SUCCESS;
  812             }
  813             return emit_load_imm64(compiler, dst, srcw);
  814         }
  815         compiler->mode32 = 1;
  816         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  817         FAIL_IF(!inst);
  818         *inst = MOV_rm_i32;
  819         compiler->mode32 = 0;
  820         return SLJIT_SUCCESS;
  821     }
  822 
  823     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  824 
  825     if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  826         dst_r = src;
  827     else {
  828         if (sign) {
  829             inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  830             FAIL_IF(!inst);
  831             *inst++ = MOVSXD_r_rm;
  832         } else {
  833             compiler->mode32 = 1;
  834             FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  835             compiler->mode32 = 0;
  836         }
  837     }
  838 
  839     if (dst & SLJIT_MEM) {
  840         compiler->mode32 = 1;
  841         inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  842         FAIL_IF(!inst);
  843         *inst = MOV_rm_r;
  844         compiler->mode32 = 0;
  845     }
  846 
  847     return SLJIT_SUCCESS;
  848 }