"Fossies" - the Fresh Open Source Software Archive

Member "AutoHotkey_L-1.1.33.09/source/lib_pcre/pcre/sljit/sljitNativeX86_64.c" (8 May 2021, 22576 Bytes) of package /windows/misc/AutoHotkey_L-1.1.33.09.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeX86_64.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 /* x86 64-bit arch dependent functions. */
   28 
   29 static int emit_load_imm64(struct sljit_compiler *compiler, int reg, sljit_w imm)
   30 {
   31     sljit_ub *buf;
   32 
   33     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_w));
   34     FAIL_IF(!buf);
   35     INC_SIZE(2 + sizeof(sljit_w));
   36     *buf++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
   37     *buf++ = 0xb8 + (reg_map[reg] & 0x7);
   38     *(sljit_w*)buf = imm;
   39     return SLJIT_SUCCESS;
   40 }
   41 
   42 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type)
   43 {
   44     if (type < SLJIT_JUMP) {
   45         *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
   46         *code_ptr++ = 10 + 3;
   47     }
   48 
   49     SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
   50     *code_ptr++ = REX_W | REX_B;
   51     *code_ptr++ = 0xb8 + 1;
   52     jump->addr = (sljit_uw)code_ptr;
   53 
   54     if (jump->flags & JUMP_LABEL)
   55         jump->flags |= PATCH_MD;
   56     else
   57         *(sljit_w*)code_ptr = jump->u.target;
   58 
   59     code_ptr += sizeof(sljit_w);
   60     *code_ptr++ = REX_B;
   61     *code_ptr++ = 0xff;
   62     *code_ptr++ = (type >= SLJIT_FAST_CALL) ? 0xd1 /* call */ : 0xe1 /* jmp */;
   63 
   64     return code_ptr;
   65 }
   66 
   67 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type)
   68 {
   69     sljit_w delta = addr - ((sljit_w)code_ptr + 1 + sizeof(sljit_hw));
   70 
   71     if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) {
   72         *code_ptr++ = (type == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
   73         *(sljit_w*)code_ptr = delta;
   74     }
   75     else {
   76         SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
   77         *code_ptr++ = REX_W | REX_B;
   78         *code_ptr++ = 0xb8 + 1;
   79         *(sljit_w*)code_ptr = addr;
   80         code_ptr += sizeof(sljit_w);
   81         *code_ptr++ = REX_B;
   82         *code_ptr++ = 0xff;
   83         *code_ptr++ = (type == 2) ? 0xd1 /* call */ : 0xe1 /* jmp */;
   84     }
   85 
   86     return code_ptr;
   87 }
   88 
   89 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
   90 {
   91     int size, pushed_size;
   92     sljit_ub *buf;
   93 
   94     CHECK_ERROR();
   95     check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
   96 
   97     compiler->temporaries = temporaries;
   98     compiler->saveds = saveds;
   99     compiler->flags_saved = 0;
  100 
  101     size = saveds;
  102     /* Including the return address saved by the call instruction. */
  103     pushed_size = (saveds + 1) * sizeof(sljit_w);
  104 #ifndef _WIN64
  105     if (saveds >= 2)
  106         size += saveds - 1;
  107 #else
  108     /* Saving the virtual stack pointer. */
  109     compiler->has_locals = local_size > 0;
  110     if (local_size > 0) {
  111         size += 2;
  112         pushed_size += sizeof(sljit_w);
  113     }
  114     if (saveds >= 4)
  115         size += saveds - 3;
  116     if (temporaries >= 5) {
  117         size += (5 - 4) * 2;
  118         pushed_size += sizeof(sljit_w);
  119     }
  120 #endif
  121     size += args * 3;
  122     if (size > 0) {
  123         buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  124         FAIL_IF(!buf);
  125 
  126         INC_SIZE(size);
  127         if (saveds >= 5) {
  128             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
  129             *buf++ = REX_B;
  130             PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
  131         }
  132         if (saveds >= 4) {
  133             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
  134             *buf++ = REX_B;
  135             PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
  136         }
  137         if (saveds >= 3) {
  138 #ifndef _WIN64
  139             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
  140             *buf++ = REX_B;
  141 #else
  142             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
  143 #endif
  144             PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
  145         }
  146         if (saveds >= 2) {
  147 #ifndef _WIN64
  148             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
  149             *buf++ = REX_B;
  150 #else
  151             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
  152 #endif
  153             PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
  154         }
  155         if (saveds >= 1) {
  156             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
  157             PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
  158         }
  159 #ifdef _WIN64
  160         if (temporaries >= 5) {
  161             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
  162             *buf++ = REX_B;
  163             PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
  164         }
  165         if (local_size > 0) {
  166             SLJIT_COMPILE_ASSERT(reg_map[SLJIT_LOCALS_REG] >= 8, locals_reg_is_hireg);
  167             *buf++ = REX_B;
  168             PUSH_REG(reg_lmap[SLJIT_LOCALS_REG]);
  169         }
  170 #endif
  171 
  172 #ifndef _WIN64
  173         if (args > 0) {
  174             *buf++ = REX_W;
  175             *buf++ = 0x8b;
  176             *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7;
  177         }
  178         if (args > 1) {
  179             *buf++ = REX_W | REX_R;
  180             *buf++ = 0x8b;
  181             *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6;
  182         }
  183         if (args > 2) {
  184             *buf++ = REX_W | REX_R;
  185             *buf++ = 0x8b;
  186             *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2;
  187         }
  188 #else
  189         if (args > 0) {
  190             *buf++ = REX_W;
  191             *buf++ = 0x8b;
  192             *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1;
  193         }
  194         if (args > 1) {
  195             *buf++ = REX_W;
  196             *buf++ = 0x8b;
  197             *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2;
  198         }
  199         if (args > 2) {
  200             *buf++ = REX_W | REX_B;
  201             *buf++ = 0x8b;
  202             *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0;
  203         }
  204 #endif
  205     }
  206 
  207     local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
  208 #ifdef _WIN64
  209     local_size += 4 * sizeof(sljit_w);
  210     compiler->local_size = local_size;
  211     if (local_size > 1024) {
  212         /* Allocate the stack for the function itself. */
  213         buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  214         FAIL_IF(!buf);
  215         INC_SIZE(4);
  216         *buf++ = REX_W;
  217         *buf++ = 0x83;
  218         *buf++ = 0xc0 | (5 << 3) | 4;
  219         /* Pushed size must be divisible by 8. */
  220         SLJIT_ASSERT(!(pushed_size & 0x7));
  221         if (pushed_size & 0x8) {
  222             *buf++ = 5 * sizeof(sljit_w);
  223             local_size -= 5 * sizeof(sljit_w);
  224         } else {
  225             *buf++ = 4 * sizeof(sljit_w);
  226             local_size -= 4 * sizeof(sljit_w);
  227         }
  228         FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size));
  229         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack)));
  230     }
  231 #else
  232     compiler->local_size = local_size;
  233     if (local_size > 0) {
  234 #endif
  235         /* In case of Win64, local_size is always > 4 * sizeof(sljit_w) */
  236         if (local_size <= 127) {
  237             buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  238             FAIL_IF(!buf);
  239             INC_SIZE(4);
  240             *buf++ = REX_W;
  241             *buf++ = 0x83;
  242             *buf++ = 0xc0 | (5 << 3) | 4;
  243             *buf++ = local_size;
  244         }
  245         else {
  246             buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
  247             FAIL_IF(!buf);
  248             INC_SIZE(7);
  249             *buf++ = REX_W;
  250             *buf++ = 0x81;
  251             *buf++ = 0xc0 | (5 << 3) | 4;
  252             *(sljit_hw*)buf = local_size;
  253             buf += sizeof(sljit_hw);
  254         }
  255 #ifndef _WIN64
  256     }
  257 #endif
  258 
  259 #ifdef _WIN64
  260     if (compiler->has_locals) {
  261         buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  262         FAIL_IF(!buf);
  263         INC_SIZE(5);
  264         *buf++ = REX_W | REX_R;
  265         *buf++ = 0x8d;
  266         *buf++ = 0x40 | (reg_lmap[SLJIT_LOCALS_REG] << 3) | 0x4;
  267         *buf++ = 0x24;
  268         *buf = 4 * sizeof(sljit_w);
  269     }
  270 #endif
  271 
  272     return SLJIT_SUCCESS;
  273 }
  274 
  275 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
  276 {
  277     int pushed_size;
  278 
  279     CHECK_ERROR_VOID();
  280     check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
  281 
  282     compiler->temporaries = temporaries;
  283     compiler->saveds = saveds;
  284     /* Including the return address saved by the call instruction. */
  285     pushed_size = (saveds + 1) * sizeof(sljit_w);
  286 #ifdef _WIN64
  287     compiler->has_locals = local_size > 0;
  288     if (local_size > 0)
  289         pushed_size += sizeof(sljit_w);
  290     if (temporaries >= 5)
  291         pushed_size += sizeof(sljit_w);
  292 #endif
  293     compiler->local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
  294 #ifdef _WIN64
  295     compiler->local_size += 4 * sizeof(sljit_w);
  296 #endif
  297 }
  298 
  299 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
  300 {
  301     int size;
  302     sljit_ub *buf;
  303 
  304     CHECK_ERROR();
  305     check_sljit_emit_return(compiler, op, src, srcw);
  306 
  307     compiler->flags_saved = 0;
  308     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  309 
  310     if (compiler->local_size > 0) {
  311         if (compiler->local_size <= 127) {
  312             buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  313             FAIL_IF(!buf);
  314             INC_SIZE(4);
  315             *buf++ = REX_W;
  316             *buf++ = 0x83;
  317             *buf++ = 0xc0 | (0 << 3) | 4;
  318             *buf = compiler->local_size;
  319         }
  320         else {
  321             buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
  322             FAIL_IF(!buf);
  323             INC_SIZE(7);
  324             *buf++ = REX_W;
  325             *buf++ = 0x81;
  326             *buf++ = 0xc0 | (0 << 3) | 4;
  327             *(sljit_hw*)buf = compiler->local_size;
  328         }
  329     }
  330 
  331     size = 1 + compiler->saveds;
  332 #ifndef _WIN64
  333     if (compiler->saveds >= 2)
  334         size += compiler->saveds - 1;
  335 #else
  336     if (compiler->has_locals)
  337         size += 2;
  338     if (compiler->saveds >= 4)
  339         size += compiler->saveds - 3;
  340     if (compiler->temporaries >= 5)
  341         size += (5 - 4) * 2;
  342 #endif
  343     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  344     FAIL_IF(!buf);
  345 
  346     INC_SIZE(size);
  347 
  348 #ifdef _WIN64
  349     if (compiler->has_locals) {
  350         *buf++ = REX_B;
  351         POP_REG(reg_lmap[SLJIT_LOCALS_REG]);
  352     }
  353     if (compiler->temporaries >= 5) {
  354         *buf++ = REX_B;
  355         POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
  356     }
  357 #endif
  358     if (compiler->saveds >= 1)
  359         POP_REG(reg_map[SLJIT_SAVED_REG1]);
  360     if (compiler->saveds >= 2) {
  361 #ifndef _WIN64
  362         *buf++ = REX_B;
  363 #endif
  364         POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
  365     }
  366     if (compiler->saveds >= 3) {
  367 #ifndef _WIN64
  368         *buf++ = REX_B;
  369 #endif
  370         POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
  371     }
  372     if (compiler->saveds >= 4) {
  373         *buf++ = REX_B;
  374         POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
  375     }
  376     if (compiler->saveds >= 5) {
  377         *buf++ = REX_B;
  378         POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
  379     }
  380 
  381     RET();
  382     return SLJIT_SUCCESS;
  383 }
  384 
  385 /* --------------------------------------------------------------------- */
  386 /*  Operators                                                            */
  387 /* --------------------------------------------------------------------- */
  388 
  389 static int emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_w imm)
  390 {
  391     sljit_ub *buf;
  392 
  393     if (rex != 0) {
  394         buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_hw));
  395         FAIL_IF(!buf);
  396         INC_SIZE(2 + sizeof(sljit_hw));
  397         *buf++ = rex;
  398         *buf++ = opcode;
  399         *(sljit_hw*)buf = (sljit_hw)imm;
  400     }
  401     else {
  402         buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_hw));
  403         FAIL_IF(!buf);
  404         INC_SIZE(1 + sizeof(sljit_hw));
  405         *buf++ = opcode;
  406         *(sljit_hw*)buf = (sljit_hw)imm;
  407     }
  408     return SLJIT_SUCCESS;
  409 }
  410 
  411 static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size,
  412     /* The register or immediate operand. */
  413     int a, sljit_w imma,
  414     /* The general operand (not immediate). */
  415     int b, sljit_w immb)
  416 {
  417     sljit_ub *buf;
  418     sljit_ub *buf_ptr;
  419     sljit_ub rex = 0;
  420     int flags = size & ~0xf;
  421     int inst_size;
  422 
  423     /* The immediate operand must be 32 bit. */
  424     SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  425     /* Both cannot be switched on. */
  426     SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  427     /* Size flags not allowed for typed instructions. */
  428     SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  429     /* Both size flags cannot be switched on. */
  430     SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  431 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  432     /* SSE2 and immediate is not possible. */
  433     SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  434 #endif
  435 
  436     size &= 0xf;
  437     inst_size = size;
  438 
  439     if ((b & SLJIT_MEM) && !(b & 0xf0) && NOT_HALFWORD(immb)) {
  440         if (emit_load_imm64(compiler, TMP_REG3, immb))
  441             return NULL;
  442         immb = 0;
  443         if (b & 0xf)
  444             b |= TMP_REG3 << 4;
  445         else
  446             b |= TMP_REG3;
  447     }
  448 
  449     if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  450         rex |= REX_W;
  451     else if (flags & EX86_REX)
  452         rex |= REX;
  453 
  454 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  455     if (flags & EX86_PREF_F2)
  456         inst_size++;
  457 #endif
  458     if (flags & EX86_PREF_66)
  459         inst_size++;
  460 
  461     /* Calculate size of b. */
  462     inst_size += 1; /* mod r/m byte. */
  463     if (b & SLJIT_MEM) {
  464         if ((b & 0x0f) == SLJIT_UNUSED)
  465             inst_size += 1 + sizeof(sljit_hw); /* SIB byte required to avoid RIP based addressing. */
  466         else {
  467             if (reg_map[b & 0x0f] >= 8)
  468                 rex |= REX_B;
  469             if (immb != 0 && !(b & 0xf0)) {
  470                 /* Immediate operand. */
  471                 if (immb <= 127 && immb >= -128)
  472                     inst_size += sizeof(sljit_b);
  473                 else
  474                     inst_size += sizeof(sljit_hw);
  475             }
  476         }
  477 
  478 #ifndef _WIN64
  479         if ((b & 0xf) == SLJIT_LOCALS_REG && (b & 0xf0) == 0)
  480             b |= SLJIT_LOCALS_REG << 4;
  481 #endif
  482 
  483         if ((b & 0xf0) != SLJIT_UNUSED) {
  484             inst_size += 1; /* SIB byte. */
  485             if (reg_map[(b >> 4) & 0x0f] >= 8)
  486                 rex |= REX_X;
  487         }
  488     }
  489 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  490     else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
  491         rex |= REX_B;
  492 #else
  493     else if (reg_map[b] >= 8)
  494         rex |= REX_B;
  495 #endif
  496 
  497     if (a & SLJIT_IMM) {
  498         if (flags & EX86_BIN_INS) {
  499             if (imma <= 127 && imma >= -128) {
  500                 inst_size += 1;
  501                 flags |= EX86_BYTE_ARG;
  502             } else
  503                 inst_size += 4;
  504         }
  505         else if (flags & EX86_SHIFT_INS) {
  506             imma &= compiler->mode32 ? 0x1f : 0x3f;
  507             if (imma != 1) {
  508                 inst_size ++;
  509                 flags |= EX86_BYTE_ARG;
  510             }
  511         } else if (flags & EX86_BYTE_ARG)
  512             inst_size++;
  513         else if (flags & EX86_HALF_ARG)
  514             inst_size += sizeof(short);
  515         else
  516             inst_size += sizeof(sljit_hw);
  517     }
  518     else {
  519         SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  520         /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  521 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  522         if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
  523             rex |= REX_R;
  524 #else
  525         if (reg_map[a] >= 8)
  526             rex |= REX_R;
  527 #endif
  528     }
  529 
  530     if (rex)
  531         inst_size++;
  532 
  533     buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
  534     PTR_FAIL_IF(!buf);
  535 
  536     /* Encoding the byte. */
  537     INC_SIZE(inst_size);
  538 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  539     if (flags & EX86_PREF_F2)
  540         *buf++ = 0xf2;
  541 #endif
  542     if (flags & EX86_PREF_66)
  543         *buf++ = 0x66;
  544     if (rex)
  545         *buf++ = rex;
  546     buf_ptr = buf + size;
  547 
  548     /* Encode mod/rm byte. */
  549     if (!(flags & EX86_SHIFT_INS)) {
  550         if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  551             *buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81;
  552 
  553         if ((a & SLJIT_IMM) || (a == 0))
  554             *buf_ptr = 0;
  555 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  556         else if (!(flags & EX86_SSE2))
  557             *buf_ptr = reg_lmap[a] << 3;
  558         else
  559             *buf_ptr = a << 3;
  560 #else
  561         else
  562             *buf_ptr = reg_lmap[a] << 3;
  563 #endif
  564     }
  565     else {
  566         if (a & SLJIT_IMM) {
  567             if (imma == 1)
  568                 *buf = 0xd1;
  569             else
  570                 *buf = 0xc1;
  571         } else
  572             *buf = 0xd3;
  573         *buf_ptr = 0;
  574     }
  575 
  576     if (!(b & SLJIT_MEM))
  577 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  578         *buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
  579 #else
  580         *buf_ptr++ |= 0xc0 + reg_lmap[b];
  581 #endif
  582     else if ((b & 0x0f) != SLJIT_UNUSED) {
  583 #ifdef _WIN64
  584         SLJIT_ASSERT((b & 0xf0) != (SLJIT_LOCALS_REG << 4));
  585 #endif
  586         if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) {
  587             if (immb != 0) {
  588                 if (immb <= 127 && immb >= -128)
  589                     *buf_ptr |= 0x40;
  590                 else
  591                     *buf_ptr |= 0x80;
  592             }
  593 
  594             if ((b & 0xf0) == SLJIT_UNUSED)
  595                 *buf_ptr++ |= reg_lmap[b & 0x0f];
  596             else {
  597                 *buf_ptr++ |= 0x04;
  598                 *buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3);
  599             }
  600 
  601             if (immb != 0) {
  602                 if (immb <= 127 && immb >= -128)
  603                     *buf_ptr++ = (sljit_ub)immb; /* 8 bit displacement. */
  604                 else {
  605                     *(sljit_hw*)buf_ptr = (sljit_hw)immb; /* 32 bit displacement. */
  606                     buf_ptr += sizeof(sljit_hw);
  607                 }
  608             }
  609         }
  610         else {
  611             *buf_ptr++ |= 0x04;
  612             *buf_ptr++ = (sljit_ub)(reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3) | (immb << 6));
  613         }
  614     }
  615     else {
  616         *buf_ptr++ |= 0x04;
  617         *buf_ptr++ = 0x25;
  618         *(sljit_hw*)buf_ptr = (sljit_hw)immb; /* 32 bit displacement. */
  619         buf_ptr += sizeof(sljit_hw);
  620     }
  621 
  622     if (a & SLJIT_IMM) {
  623         if (flags & EX86_BYTE_ARG)
  624             *buf_ptr = (sljit_ub)imma;
  625         else if (flags & EX86_HALF_ARG)
  626             *(short*)buf_ptr = (short)imma;
  627         else if (!(flags & EX86_SHIFT_INS))
  628             *(sljit_hw*)buf_ptr = (sljit_hw)imma;
  629     }
  630 
  631     return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1);
  632 }
  633 
  634 /* --------------------------------------------------------------------- */
  635 /*  Call / return instructions                                           */
  636 /* --------------------------------------------------------------------- */
  637 
  638 static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type)
  639 {
  640     sljit_ub *buf;
  641 
  642 #ifndef _WIN64
  643     SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 6 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
  644 
  645     buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
  646     FAIL_IF(!buf);
  647     INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
  648     if (type >= SLJIT_CALL3) {
  649         *buf++ = REX_W;
  650         *buf++ = 0x8b;
  651         *buf++ = 0xc0 | (0x2 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
  652     }
  653     *buf++ = REX_W;
  654     *buf++ = 0x8b;
  655     *buf++ = 0xc0 | (0x7 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
  656 #else
  657     SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 2 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
  658 
  659     buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
  660     FAIL_IF(!buf);
  661     INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
  662     if (type >= SLJIT_CALL3) {
  663         *buf++ = REX_W | REX_R;
  664         *buf++ = 0x8b;
  665         *buf++ = 0xc0 | (0x0 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
  666     }
  667     *buf++ = REX_W;
  668     *buf++ = 0x8b;
  669     *buf++ = 0xc0 | (0x1 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
  670 #endif
  671     return SLJIT_SUCCESS;
  672 }
  673 
  674 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
  675 {
  676     sljit_ub *buf;
  677 
  678     CHECK_ERROR();
  679     check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
  680 
  681     compiler->temporaries = temporaries;
  682     compiler->saveds = saveds;
  683     compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
  684 #ifdef _WIN64
  685     compiler->local_size += 4 * sizeof(sljit_w);
  686 #endif
  687 
  688     /* For UNUSED dst. Uncommon, but possible. */
  689     if (dst == SLJIT_UNUSED)
  690         dst = TMP_REGISTER;
  691 
  692     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  693         if (reg_map[dst] < 8) {
  694             buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  695             FAIL_IF(!buf);
  696 
  697             INC_SIZE(1);
  698             POP_REG(reg_lmap[dst]);
  699         }
  700         else {
  701             buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  702             FAIL_IF(!buf);
  703 
  704             INC_SIZE(2);
  705             *buf++ = REX_B;
  706             POP_REG(reg_lmap[dst]);
  707         }
  708     }
  709     else if (dst & SLJIT_MEM) {
  710 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  711         /* REX_W is not necessary (src is not immediate). */
  712         compiler->mode32 = 1;
  713 #endif
  714         buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  715         FAIL_IF(!buf);
  716         *buf++ = 0x8f;
  717     }
  718     return SLJIT_SUCCESS;
  719 }
  720 
  721 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
  722 {
  723     sljit_ub *buf;
  724 
  725     CHECK_ERROR();
  726     check_sljit_emit_fast_return(compiler, src, srcw);
  727 
  728     CHECK_EXTRA_REGS(src, srcw, (void)0);
  729 
  730     if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
  731         FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw));
  732         src = TMP_REGISTER;
  733     }
  734 
  735     if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
  736         if (reg_map[src] < 8) {
  737             buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
  738             FAIL_IF(!buf);
  739 
  740             INC_SIZE(1 + 1);
  741             PUSH_REG(reg_lmap[src]);
  742         }
  743         else {
  744             buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
  745             FAIL_IF(!buf);
  746 
  747             INC_SIZE(2 + 1);
  748             *buf++ = REX_B;
  749             PUSH_REG(reg_lmap[src]);
  750         }
  751     }
  752     else if (src & SLJIT_MEM) {
  753 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  754         /* REX_W is not necessary (src is not immediate). */
  755         compiler->mode32 = 1;
  756 #endif
  757         buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  758         FAIL_IF(!buf);
  759         *buf++ = 0xff;
  760         *buf |= 6 << 3;
  761 
  762         buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  763         FAIL_IF(!buf);
  764         INC_SIZE(1);
  765     }
  766     else {
  767         SLJIT_ASSERT(IS_HALFWORD(srcw));
  768         /* SLJIT_IMM. */
  769         buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
  770         FAIL_IF(!buf);
  771 
  772         INC_SIZE(5 + 1);
  773         *buf++ = 0x68;
  774         *(sljit_hw*)buf = (sljit_hw)srcw;
  775         buf += sizeof(sljit_hw);
  776     }
  777 
  778     RET();
  779     return SLJIT_SUCCESS;
  780 }
  781 
  782 
  783 /* --------------------------------------------------------------------- */
  784 /*  Extend input                                                         */
  785 /* --------------------------------------------------------------------- */
  786 
  787 static int emit_mov_int(struct sljit_compiler *compiler, int sign,
  788     int dst, sljit_w dstw,
  789     int src, sljit_w srcw)
  790 {
  791     sljit_ub* code;
  792     int dst_r;
  793 
  794     compiler->mode32 = 0;
  795 
  796     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  797         return SLJIT_SUCCESS; /* Empty instruction. */
  798 
  799     if (src & SLJIT_IMM) {
  800         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  801             if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  802                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
  803                 FAIL_IF(!code);
  804                 *code = 0xc7;
  805                 return SLJIT_SUCCESS;
  806             }
  807             return emit_load_imm64(compiler, dst, srcw);
  808         }
  809         compiler->mode32 = 1;
  810         code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
  811         FAIL_IF(!code);
  812         *code = 0xc7;
  813         compiler->mode32 = 0;
  814         return SLJIT_SUCCESS;
  815     }
  816 
  817     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_SAVED_REG3) ? dst : TMP_REGISTER;
  818 
  819     if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_SAVED_REG3))
  820         dst_r = src;
  821     else {
  822         if (sign) {
  823             code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  824             FAIL_IF(!code);
  825             *code++ = 0x63;
  826         } else {
  827             compiler->mode32 = 1;
  828             FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  829             compiler->mode32 = 0;
  830         }
  831     }
  832 
  833     if (dst & SLJIT_MEM) {
  834         compiler->mode32 = 1;
  835         code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  836         FAIL_IF(!code);
  837         *code = 0x89;
  838         compiler->mode32 = 0;
  839     }
  840 
  841     return SLJIT_SUCCESS;
  842 }