"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.42/sljit/sljitNativeARM_64.c" (13 Mar 2018, 60027 Bytes) of package /linux/misc/pcre-8.42.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeARM_64.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.41_vs_8.42.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
   28 {
   29     return "ARM-64" SLJIT_CPUINFO;
   30 }
   31 
   32 /* Length of an instruction word */
   33 typedef sljit_u32 sljit_ins;
   34 
   35 #define TMP_ZERO    (0)
   36 
   37 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   38 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
   39 #define TMP_LR      (SLJIT_NUMBER_OF_REGISTERS + 4)
   40 #define TMP_FP      (SLJIT_NUMBER_OF_REGISTERS + 5)
   41 
   42 #define TMP_FREG1   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
   43 #define TMP_FREG2   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
   44 
   45 /* r18 - platform register, currently not used */
   46 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
   47     31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
   48 };
   49 
   50 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
   51     0, 0, 1, 2, 3, 4, 5, 6, 7
   52 };
   53 
   54 #define W_OP (1 << 31)
   55 #define RD(rd) (reg_map[rd])
   56 #define RT(rt) (reg_map[rt])
   57 #define RN(rn) (reg_map[rn] << 5)
   58 #define RT2(rt2) (reg_map[rt2] << 10)
   59 #define RM(rm) (reg_map[rm] << 16)
   60 #define VD(vd) (freg_map[vd])
   61 #define VT(vt) (freg_map[vt])
   62 #define VN(vn) (freg_map[vn] << 5)
   63 #define VM(vm) (freg_map[vm] << 16)
   64 
   65 /* --------------------------------------------------------------------- */
   66 /*  Instrucion forms                                                     */
   67 /* --------------------------------------------------------------------- */
   68 
   69 #define ADC 0x9a000000
   70 #define ADD 0x8b000000
   71 #define ADDE 0x8b200000
   72 #define ADDI 0x91000000
   73 #define AND 0x8a000000
   74 #define ANDI 0x92000000
   75 #define ASRV 0x9ac02800
   76 #define B 0x14000000
   77 #define B_CC 0x54000000
   78 #define BL 0x94000000
   79 #define BLR 0xd63f0000
   80 #define BR 0xd61f0000
   81 #define BRK 0xd4200000
   82 #define CBZ 0xb4000000
   83 #define CLZ 0xdac01000
   84 #define CSEL 0x9a800000
   85 #define CSINC 0x9a800400
   86 #define EOR 0xca000000
   87 #define EORI 0xd2000000
   88 #define FABS 0x1e60c000
   89 #define FADD 0x1e602800
   90 #define FCMP 0x1e602000
   91 #define FCVT 0x1e224000
   92 #define FCVTZS 0x9e780000
   93 #define FDIV 0x1e601800
   94 #define FMOV 0x1e604000
   95 #define FMUL 0x1e600800
   96 #define FNEG 0x1e614000
   97 #define FSUB 0x1e603800
   98 #define LDRI 0xf9400000
   99 #define LDP 0xa9400000
  100 #define LDP_PRE 0xa9c00000
  101 #define LDR_PRE 0xf8400c00
  102 #define LSLV 0x9ac02000
  103 #define LSRV 0x9ac02400
  104 #define MADD 0x9b000000
  105 #define MOVK 0xf2800000
  106 #define MOVN 0x92800000
  107 #define MOVZ 0xd2800000
  108 #define NOP 0xd503201f
  109 #define ORN 0xaa200000
  110 #define ORR 0xaa000000
  111 #define ORRI 0xb2000000
  112 #define RET 0xd65f0000
  113 #define SBC 0xda000000
  114 #define SBFM 0x93000000
  115 #define SCVTF 0x9e620000
  116 #define SDIV 0x9ac00c00
  117 #define SMADDL 0x9b200000
  118 #define SMULH 0x9b403c00
  119 #define STP 0xa9000000
  120 #define STP_PRE 0xa9800000
  121 #define STRB 0x38206800
  122 #define STRBI 0x39000000
  123 #define STRI 0xf9000000
  124 #define STR_FI 0x3d000000
  125 #define STR_FR 0x3c206800
  126 #define STUR_FI 0x3c000000
  127 #define STURBI 0x38000000
  128 #define SUB 0xcb000000
  129 #define SUBI 0xd1000000
  130 #define SUBS 0xeb000000
  131 #define UBFM 0xd3000000
  132 #define UDIV 0x9ac00800
  133 #define UMULH 0x9bc03c00
  134 
  135 /* dest_reg is the absolute name of the register
  136    Useful for reordering instructions in the delay slot. */
  137 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
  138 {
  139     sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
  140     FAIL_IF(!ptr);
  141     *ptr = ins;
  142     compiler->size++;
  143     return SLJIT_SUCCESS;
  144 }
  145 
  146 static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
  147 {
  148     FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
  149     FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
  150     FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
  151     return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
  152 }
  153 
  154 static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
  155 {
  156     sljit_s32 dst = inst[0] & 0x1f;
  157     SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
  158     inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
  159     inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
  160     inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
  161     inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
  162 }
  163 
  164 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
  165 {
  166     sljit_sw diff;
  167     sljit_uw target_addr;
  168 
  169     if (jump->flags & SLJIT_REWRITABLE_JUMP) {
  170         jump->flags |= PATCH_ABS64;
  171         return 0;
  172     }
  173 
  174     if (jump->flags & JUMP_ADDR)
  175         target_addr = jump->u.target;
  176     else {
  177         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  178         target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
  179     }
  180 
  181     diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
  182 
  183     if (jump->flags & IS_COND) {
  184         diff += sizeof(sljit_ins);
  185         if (diff <= 0xfffff && diff >= -0x100000) {
  186             code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
  187             jump->addr -= sizeof(sljit_ins);
  188             jump->flags |= PATCH_COND;
  189             return 5;
  190         }
  191         diff -= sizeof(sljit_ins);
  192     }
  193 
  194     if (diff <= 0x7ffffff && diff >= -0x8000000) {
  195         jump->flags |= PATCH_B;
  196         return 4;
  197     }
  198 
  199     if (target_addr <= 0xffffffffl) {
  200         if (jump->flags & IS_COND)
  201             code_ptr[-5] -= (2 << 5);
  202         code_ptr[-2] = code_ptr[0];
  203         return 2;
  204     }
  205 
  206     if (target_addr <= 0xffffffffffffl) {
  207         if (jump->flags & IS_COND)
  208             code_ptr[-5] -= (1 << 5);
  209         jump->flags |= PATCH_ABS48;
  210         code_ptr[-1] = code_ptr[0];
  211         return 1;
  212     }
  213 
  214     jump->flags |= PATCH_ABS64;
  215     return 0;
  216 }
  217 
  218 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  219 {
  220     struct sljit_memory_fragment *buf;
  221     sljit_ins *code;
  222     sljit_ins *code_ptr;
  223     sljit_ins *buf_ptr;
  224     sljit_ins *buf_end;
  225     sljit_uw word_count;
  226     sljit_sw executable_offset;
  227     sljit_uw addr;
  228     sljit_s32 dst;
  229 
  230     struct sljit_label *label;
  231     struct sljit_jump *jump;
  232     struct sljit_const *const_;
  233 
  234     CHECK_ERROR_PTR();
  235     CHECK_PTR(check_sljit_generate_code(compiler));
  236     reverse_buf(compiler);
  237 
  238     code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
  239     PTR_FAIL_WITH_EXEC_IF(code);
  240     buf = compiler->buf;
  241 
  242     code_ptr = code;
  243     word_count = 0;
  244     executable_offset = SLJIT_EXEC_OFFSET(code);
  245 
  246     label = compiler->labels;
  247     jump = compiler->jumps;
  248     const_ = compiler->consts;
  249 
  250     do {
  251         buf_ptr = (sljit_ins*)buf->memory;
  252         buf_end = buf_ptr + (buf->used_size >> 2);
  253         do {
  254             *code_ptr = *buf_ptr++;
  255             /* These structures are ordered by their address. */
  256             SLJIT_ASSERT(!label || label->size >= word_count);
  257             SLJIT_ASSERT(!jump || jump->addr >= word_count);
  258             SLJIT_ASSERT(!const_ || const_->addr >= word_count);
  259             if (label && label->size == word_count) {
  260                 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  261                 label->size = code_ptr - code;
  262                 label = label->next;
  263             }
  264             if (jump && jump->addr == word_count) {
  265                     jump->addr = (sljit_uw)(code_ptr - 4);
  266                     code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
  267                     jump = jump->next;
  268             }
  269             if (const_ && const_->addr == word_count) {
  270                 const_->addr = (sljit_uw)code_ptr;
  271                 const_ = const_->next;
  272             }
  273             code_ptr ++;
  274             word_count ++;
  275         } while (buf_ptr < buf_end);
  276 
  277         buf = buf->next;
  278     } while (buf);
  279 
  280     if (label && label->size == word_count) {
  281         label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  282         label->size = code_ptr - code;
  283         label = label->next;
  284     }
  285 
  286     SLJIT_ASSERT(!label);
  287     SLJIT_ASSERT(!jump);
  288     SLJIT_ASSERT(!const_);
  289     SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
  290 
  291     jump = compiler->jumps;
  292     while (jump) {
  293         do {
  294             addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
  295             buf_ptr = (sljit_ins *)jump->addr;
  296 
  297             if (jump->flags & PATCH_B) {
  298                 addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
  299                 SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
  300                 buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
  301                 if (jump->flags & IS_COND)
  302                     buf_ptr[-1] -= (4 << 5);
  303                 break;
  304             }
  305             if (jump->flags & PATCH_COND) {
  306                 addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
  307                 SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
  308                 buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
  309                 break;
  310             }
  311 
  312             SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
  313             SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
  314 
  315             dst = buf_ptr[0] & 0x1f;
  316             buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
  317             buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
  318             if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
  319                 buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
  320             if (jump->flags & PATCH_ABS64)
  321                 buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
  322         } while (0);
  323         jump = jump->next;
  324     }
  325 
  326     compiler->error = SLJIT_ERR_COMPILED;
  327     compiler->executable_offset = executable_offset;
  328     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
  329 
  330     code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
  331     code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  332 
  333     SLJIT_CACHE_FLUSH(code, code_ptr);
  334     return code;
  335 }
  336 
  337 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  338 {
  339     switch (feature_type) {
  340     case SLJIT_HAS_FPU:
  341 #ifdef SLJIT_IS_FPU_AVAILABLE
  342         return SLJIT_IS_FPU_AVAILABLE;
  343 #else
  344         /* Available by default. */
  345         return 1;
  346 #endif
  347 
  348     case SLJIT_HAS_CLZ:
  349     case SLJIT_HAS_CMOV:
  350         return 1;
  351 
  352     default:
  353         return 0;
  354     }
  355 }
  356 
  357 /* --------------------------------------------------------------------- */
  358 /*  Core code generator functions.                                       */
  359 /* --------------------------------------------------------------------- */
  360 
  361 #define COUNT_TRAILING_ZERO(value, result) \
  362     result = 0; \
  363     if (!(value & 0xffffffff)) { \
  364         result += 32; \
  365         value >>= 32; \
  366     } \
  367     if (!(value & 0xffff)) { \
  368         result += 16; \
  369         value >>= 16; \
  370     } \
  371     if (!(value & 0xff)) { \
  372         result += 8; \
  373         value >>= 8; \
  374     } \
  375     if (!(value & 0xf)) { \
  376         result += 4; \
  377         value >>= 4; \
  378     } \
  379     if (!(value & 0x3)) { \
  380         result += 2; \
  381         value >>= 2; \
  382     } \
  383     if (!(value & 0x1)) { \
  384         result += 1; \
  385         value >>= 1; \
  386     }
  387 
  388 #define LOGICAL_IMM_CHECK 0x100
  389 
  390 static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len)
  391 {
  392     sljit_s32 negated, ones, right;
  393     sljit_uw mask, uimm;
  394     sljit_ins ins;
  395 
  396     if (len & LOGICAL_IMM_CHECK) {
  397         len &= ~LOGICAL_IMM_CHECK;
  398         if (len == 32 && (imm == 0 || imm == -1))
  399             return 0;
  400         if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1))
  401             return 0;
  402     }
  403 
  404     SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
  405         || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
  406 
  407     uimm = (sljit_uw)imm;
  408     while (1) {
  409         if (len <= 0) {
  410             SLJIT_UNREACHABLE();
  411             return 0;
  412         }
  413 
  414         mask = ((sljit_uw)1 << len) - 1;
  415         if ((uimm & mask) != ((uimm >> len) & mask))
  416             break;
  417         len >>= 1;
  418     }
  419 
  420     len <<= 1;
  421 
  422     negated = 0;
  423     if (uimm & 0x1) {
  424         negated = 1;
  425         uimm = ~uimm;
  426     }
  427 
  428     if (len < 64)
  429         uimm &= ((sljit_uw)1 << len) - 1;
  430 
  431     /* Unsigned right shift. */
  432     COUNT_TRAILING_ZERO(uimm, right);
  433 
  434     /* Signed shift. We also know that the highest bit is set. */
  435     imm = (sljit_sw)~uimm;
  436     SLJIT_ASSERT(imm < 0);
  437 
  438     COUNT_TRAILING_ZERO(imm, ones);
  439 
  440     if (~imm)
  441         return 0;
  442 
  443     if (len == 64)
  444         ins = 1 << 22;
  445     else
  446         ins = (0x3f - ((len << 1) - 1)) << 10;
  447 
  448     if (negated)
  449         return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
  450 
  451     return ins | ((ones - 1) << 10) | ((len - right) << 16);
  452 }
  453 
  454 #undef COUNT_TRAILING_ZERO
  455 
  456 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm)
  457 {
  458     sljit_uw imm = (sljit_uw)simm;
  459     sljit_s32 i, zeros, ones, first;
  460     sljit_ins bitmask;
  461 
  462     /* Handling simple immediates first. */
  463     if (imm <= 0xffff)
  464         return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
  465 
  466     if (simm < 0 && simm >= -0x10000)
  467         return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
  468 
  469     if (imm <= 0xffffffffl) {
  470         if ((imm & 0xffff) == 0)
  471             return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21));
  472         if ((imm & 0xffff0000l) == 0xffff0000)
  473             return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
  474         if ((imm & 0xffff) == 0xffff)
  475             return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
  476 
  477         bitmask = logical_imm(simm, 16);
  478         if (bitmask != 0)
  479             return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
  480 
  481         FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
  482         return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
  483     }
  484 
  485     bitmask = logical_imm(simm, 32);
  486     if (bitmask != 0)
  487         return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
  488 
  489     if (simm < 0 && simm >= -0x100000000l) {
  490         if ((imm & 0xffff) == 0xffff)
  491             return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
  492 
  493         FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
  494         return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
  495     }
  496 
  497     /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */
  498 
  499     zeros = 0;
  500     ones = 0;
  501     for (i = 4; i > 0; i--) {
  502         if ((simm & 0xffff) == 0)
  503             zeros++;
  504         if ((simm & 0xffff) == 0xffff)
  505             ones++;
  506         simm >>= 16;
  507     }
  508 
  509     simm = (sljit_sw)imm;
  510     first = 1;
  511     if (ones > zeros) {
  512         simm = ~simm;
  513         for (i = 0; i < 4; i++) {
  514             if (!(simm & 0xffff)) {
  515                 simm >>= 16;
  516                 continue;
  517             }
  518             if (first) {
  519                 first = 0;
  520                 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
  521             }
  522             else
  523                 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
  524             simm >>= 16;
  525         }
  526         return SLJIT_SUCCESS;
  527     }
  528 
  529     for (i = 0; i < 4; i++) {
  530         if (!(simm & 0xffff)) {
  531             simm >>= 16;
  532             continue;
  533         }
  534         if (first) {
  535             first = 0;
  536             FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
  537         }
  538         else
  539             FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
  540         simm >>= 16;
  541     }
  542     return SLJIT_SUCCESS;
  543 }
  544 
  545 #define ARG1_IMM    0x0010000
  546 #define ARG2_IMM    0x0020000
  547 #define INT_OP      0x0040000
  548 #define SET_FLAGS   0x0080000
  549 #define UNUSED_RETURN   0x0100000
  550 
  551 #define CHECK_FLAGS(flag_bits) \
  552     if (flags & SET_FLAGS) { \
  553         inv_bits |= flag_bits; \
  554         if (flags & UNUSED_RETURN) \
  555             dst = TMP_ZERO; \
  556     }
  557 
  558 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2)
  559 {
  560     /* dst must be register, TMP_REG1
  561        arg1 must be register, TMP_REG1, imm
  562        arg2 must be register, TMP_REG2, imm */
  563     sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
  564     sljit_ins inst_bits;
  565     sljit_s32 op = (flags & 0xffff);
  566     sljit_s32 reg;
  567     sljit_sw imm, nimm;
  568 
  569     if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
  570         /* Both are immediates. */
  571         flags &= ~ARG1_IMM;
  572         if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
  573             arg1 = TMP_ZERO;
  574         else {
  575             FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
  576             arg1 = TMP_REG1;
  577         }
  578     }
  579 
  580     if (flags & (ARG1_IMM | ARG2_IMM)) {
  581         reg = (flags & ARG2_IMM) ? arg1 : arg2;
  582         imm = (flags & ARG2_IMM) ? arg2 : arg1;
  583 
  584         switch (op) {
  585         case SLJIT_MUL:
  586         case SLJIT_NEG:
  587         case SLJIT_CLZ:
  588         case SLJIT_ADDC:
  589         case SLJIT_SUBC:
  590             /* No form with immediate operand (except imm 0, which
  591             is represented by a ZERO register). */
  592             break;
  593         case SLJIT_MOV:
  594             SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
  595             return load_immediate(compiler, dst, imm);
  596         case SLJIT_NOT:
  597             SLJIT_ASSERT(flags & ARG2_IMM);
  598             FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
  599             goto set_flags;
  600         case SLJIT_SUB:
  601             if (flags & ARG1_IMM)
  602                 break;
  603             imm = -imm;
  604             /* Fall through. */
  605         case SLJIT_ADD:
  606             if (imm == 0) {
  607                 CHECK_FLAGS(1 << 29);
  608                 return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
  609             }
  610             if (imm > 0 && imm <= 0xfff) {
  611                 CHECK_FLAGS(1 << 29);
  612                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
  613             }
  614             nimm = -imm;
  615             if (nimm > 0 && nimm <= 0xfff) {
  616                 CHECK_FLAGS(1 << 29);
  617                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
  618             }
  619             if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
  620                 CHECK_FLAGS(1 << 29);
  621                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
  622             }
  623             if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
  624                 CHECK_FLAGS(1 << 29);
  625                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
  626             }
  627             if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
  628                 FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
  629                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
  630             }
  631             if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
  632                 FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
  633                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
  634             }
  635             break;
  636         case SLJIT_AND:
  637             inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
  638             if (!inst_bits)
  639                 break;
  640             CHECK_FLAGS(3 << 29);
  641             return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
  642         case SLJIT_OR:
  643         case SLJIT_XOR:
  644             inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
  645             if (!inst_bits)
  646                 break;
  647             if (op == SLJIT_OR)
  648                 inst_bits |= ORRI;
  649             else
  650                 inst_bits |= EORI;
  651             FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
  652             goto set_flags;
  653         case SLJIT_SHL:
  654             if (flags & ARG1_IMM)
  655                 break;
  656             if (flags & INT_OP) {
  657                 imm &= 0x1f;
  658                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
  659             }
  660             else {
  661                 imm &= 0x3f;
  662                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
  663             }
  664             goto set_flags;
  665         case SLJIT_LSHR:
  666         case SLJIT_ASHR:
  667             if (flags & ARG1_IMM)
  668                 break;
  669             if (op == SLJIT_ASHR)
  670                 inv_bits |= 1 << 30;
  671             if (flags & INT_OP) {
  672                 imm &= 0x1f;
  673                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
  674             }
  675             else {
  676                 imm &= 0x3f;
  677                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
  678             }
  679             goto set_flags;
  680         default:
  681             SLJIT_UNREACHABLE();
  682             break;
  683         }
  684 
  685         if (flags & ARG2_IMM) {
  686             if (arg2 == 0)
  687                 arg2 = TMP_ZERO;
  688             else {
  689                 FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
  690                 arg2 = TMP_REG2;
  691             }
  692         }
  693         else {
  694             if (arg1 == 0)
  695                 arg1 = TMP_ZERO;
  696             else {
  697                 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
  698                 arg1 = TMP_REG1;
  699             }
  700         }
  701     }
  702 
  703     /* Both arguments are registers. */
  704     switch (op) {
  705     case SLJIT_MOV:
  706     case SLJIT_MOV_P:
  707         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  708         if (dst == arg2)
  709             return SLJIT_SUCCESS;
  710         return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
  711     case SLJIT_MOV_U8:
  712         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  713         return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
  714     case SLJIT_MOV_S8:
  715         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  716         if (!(flags & INT_OP))
  717             inv_bits |= 1 << 22;
  718         return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
  719     case SLJIT_MOV_U16:
  720         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  721         return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
  722     case SLJIT_MOV_S16:
  723         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  724         if (!(flags & INT_OP))
  725             inv_bits |= 1 << 22;
  726         return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
  727     case SLJIT_MOV_U32:
  728         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  729         if ((flags & INT_OP) && dst == arg2)
  730             return SLJIT_SUCCESS;
  731         return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
  732     case SLJIT_MOV_S32:
  733         SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
  734         if ((flags & INT_OP) && dst == arg2)
  735             return SLJIT_SUCCESS;
  736         return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
  737     case SLJIT_NOT:
  738         SLJIT_ASSERT(arg1 == TMP_REG1);
  739         FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
  740         break; /* Set flags. */
  741     case SLJIT_NEG:
  742         SLJIT_ASSERT(arg1 == TMP_REG1);
  743         if (flags & SET_FLAGS)
  744             inv_bits |= 1 << 29;
  745         return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
  746     case SLJIT_CLZ:
  747         SLJIT_ASSERT(arg1 == TMP_REG1);
  748         return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
  749     case SLJIT_ADD:
  750         CHECK_FLAGS(1 << 29);
  751         return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
  752     case SLJIT_ADDC:
  753         CHECK_FLAGS(1 << 29);
  754         return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
  755     case SLJIT_SUB:
  756         CHECK_FLAGS(1 << 29);
  757         return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
  758     case SLJIT_SUBC:
  759         CHECK_FLAGS(1 << 29);
  760         return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
  761     case SLJIT_MUL:
  762         if (!(flags & SET_FLAGS))
  763             return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
  764         if (flags & INT_OP) {
  765             FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
  766             FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
  767             return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
  768         }
  769         FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
  770         FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
  771         return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
  772     case SLJIT_AND:
  773         CHECK_FLAGS(3 << 29);
  774         return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
  775     case SLJIT_OR:
  776         FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
  777         break; /* Set flags. */
  778     case SLJIT_XOR:
  779         FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
  780         break; /* Set flags. */
  781     case SLJIT_SHL:
  782         FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
  783         break; /* Set flags. */
  784     case SLJIT_LSHR:
  785         FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
  786         break; /* Set flags. */
  787     case SLJIT_ASHR:
  788         FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
  789         break; /* Set flags. */
  790     default:
  791         SLJIT_UNREACHABLE();
  792         return SLJIT_SUCCESS;
  793     }
  794 
  795 set_flags:
  796     if (flags & SET_FLAGS)
  797         return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
  798     return SLJIT_SUCCESS;
  799 }
  800 
  801 #define STORE       0x10
  802 #define SIGNED      0x20
  803 
  804 #define BYTE_SIZE   0x0
  805 #define HALF_SIZE   0x1
  806 #define INT_SIZE    0x2
  807 #define WORD_SIZE   0x3
  808 
  809 #define MEM_SIZE_SHIFT(flags) ((flags) & 0x3)
  810 
  811 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
  812     sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
  813 {
  814     sljit_u32 shift = MEM_SIZE_SHIFT(flags);
  815     sljit_u32 type = (shift << 30);
  816 
  817     if (!(flags & STORE))
  818         type |= (flags & SIGNED) ? 0x00800000 : 0x00400000;
  819 
  820     SLJIT_ASSERT(arg & SLJIT_MEM);
  821 
  822     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
  823         argw &= 0x3;
  824 
  825         if (argw == 0 || argw == shift)
  826             return push_inst(compiler, STRB | type | RT(reg)
  827                 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
  828 
  829         FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10)));
  830         return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg));
  831     }
  832 
  833     arg &= REG_MASK;
  834 
  835     if (arg == SLJIT_UNUSED) {
  836         FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift)));
  837 
  838         argw = (argw >> shift) & 0xfff;
  839 
  840         return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10));
  841     }
  842 
  843     if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
  844         if ((argw >> shift) <= 0xfff) {
  845             return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift)));
  846         }
  847 
  848         if (argw <= 0xffffff) {
  849             FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10)));
  850 
  851             argw = ((argw & 0xfff) >> shift);
  852             return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10));
  853         }
  854     }
  855 
  856     if (argw <= 255 && argw >= -256)
  857         return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
  858 
  859     FAIL_IF(load_immediate(compiler, tmp_reg, argw));
  860 
  861     return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
  862 }
  863 
  864 /* --------------------------------------------------------------------- */
  865 /*  Entry, exit                                                          */
  866 /* --------------------------------------------------------------------- */
  867 
  868 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  869     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  870     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  871 {
  872     sljit_s32 args, i, tmp, offs, prev, saved_regs_size;
  873 
  874     CHECK_ERROR();
  875     CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  876     set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  877 
  878     saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
  879     if (saved_regs_size & 0x8)
  880         saved_regs_size += sizeof(sljit_sw);
  881 
  882     local_size = (local_size + 15) & ~0xf;
  883     compiler->local_size = local_size + saved_regs_size;
  884 
  885     FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
  886         | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
  887 
  888 #ifdef _WIN32
  889     if (local_size >= 4096)
  890         FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
  891     else if (local_size > 256)
  892         FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
  893 #endif
  894 
  895     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  896     prev = -1;
  897     offs = 2 << 15;
  898     for (i = SLJIT_S0; i >= tmp; i--) {
  899         if (prev == -1) {
  900             prev = i;
  901             continue;
  902         }
  903         FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
  904         offs += 2 << 15;
  905         prev = -1;
  906     }
  907 
  908     for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  909         if (prev == -1) {
  910             prev = i;
  911             continue;
  912         }
  913         FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
  914         offs += 2 << 15;
  915         prev = -1;
  916     }
  917 
  918     if (prev != -1)
  919         FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
  920 
  921 
  922     FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
  923 
  924     args = get_arg_count(arg_types);
  925 
  926     if (args >= 1)
  927         FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
  928     if (args >= 2)
  929         FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
  930     if (args >= 3)
  931         FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
  932 
  933 #ifdef _WIN32
  934     if (local_size >= 4096) {
  935         if (local_size < 4 * 4096) {
  936             /* No need for a loop. */
  937             if (local_size >= 2 * 4096) {
  938                 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  939                 FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
  940                 local_size -= 4096;
  941             }
  942 
  943             if (local_size >= 2 * 4096) {
  944                 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  945                 FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
  946                 local_size -= 4096;
  947             }
  948 
  949             FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  950             local_size -= 4096;
  951         }
  952         else {
  953             FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
  954             FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  955             FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
  956             FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
  957             FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
  958             FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  959 
  960             local_size &= 0xfff;
  961         }
  962 
  963         if (local_size > 256) {
  964             FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
  965             FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  966         }
  967         else if (local_size > 0)
  968             FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
  969 
  970         FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
  971     }
  972     else if (local_size > 256) {
  973         FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
  974         FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
  975     }
  976     else if (local_size > 0)
  977         FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
  978 
  979 #else /* !_WIN32 */
  980 
  981     /* The local_size does not include saved registers size. */
  982     if (local_size > 0xfff) {
  983         FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
  984         local_size &= 0xfff;
  985     }
  986     if (local_size != 0)
  987         FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
  988 
  989 #endif /* _WIN32 */
  990 
  991     return SLJIT_SUCCESS;
  992 }
  993 
  994 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  995     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  996     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  997 {
  998     sljit_s32 saved_regs_size;
  999 
 1000     CHECK_ERROR();
 1001     CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 1002     set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 1003 
 1004     saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
 1005     if (saved_regs_size & 0x8)
 1006         saved_regs_size += sizeof(sljit_sw);
 1007 
 1008     compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
 1009     return SLJIT_SUCCESS;
 1010 }
 1011 
 1012 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
 1013 {
 1014     sljit_s32 local_size;
 1015     sljit_s32 i, tmp, offs, prev, saved_regs_size;
 1016 
 1017     CHECK_ERROR();
 1018     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
 1019 
 1020     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
 1021 
 1022     saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
 1023     if (saved_regs_size & 0x8)
 1024         saved_regs_size += sizeof(sljit_sw);
 1025 
 1026     local_size = compiler->local_size - saved_regs_size;
 1027 
 1028     /* Load LR as early as possible. */
 1029     if (local_size == 0)
 1030         FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
 1031     else if (local_size < 63 * sizeof(sljit_sw)) {
 1032         FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
 1033             | RN(SLJIT_SP) | (local_size << (15 - 3))));
 1034     }
 1035     else {
 1036         if (local_size > 0xfff) {
 1037             FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
 1038             local_size &= 0xfff;
 1039         }
 1040         if (local_size)
 1041             FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
 1042 
 1043         FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
 1044     }
 1045 
 1046     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
 1047     prev = -1;
 1048     offs = 2 << 15;
 1049     for (i = SLJIT_S0; i >= tmp; i--) {
 1050         if (prev == -1) {
 1051             prev = i;
 1052             continue;
 1053         }
 1054         FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 1055         offs += 2 << 15;
 1056         prev = -1;
 1057     }
 1058 
 1059     for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
 1060         if (prev == -1) {
 1061             prev = i;
 1062             continue;
 1063         }
 1064         FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 1065         offs += 2 << 15;
 1066         prev = -1;
 1067     }
 1068 
 1069     if (prev != -1)
 1070         FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
 1071 
 1072     /* These two can be executed in parallel. */
 1073     FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
 1074     return push_inst(compiler, RET | RN(TMP_LR));
 1075 }
 1076 
 1077 /* --------------------------------------------------------------------- */
 1078 /*  Operators                                                            */
 1079 /* --------------------------------------------------------------------- */
 1080 
 1081 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 1082 {
 1083     sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0;
 1084 
 1085     CHECK_ERROR();
 1086     CHECK(check_sljit_emit_op0(compiler, op));
 1087 
 1088     op = GET_OPCODE(op);
 1089     switch (op) {
 1090     case SLJIT_BREAKPOINT:
 1091         return push_inst(compiler, BRK);
 1092     case SLJIT_NOP:
 1093         return push_inst(compiler, NOP);
 1094     case SLJIT_LMUL_UW:
 1095     case SLJIT_LMUL_SW:
 1096         FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
 1097         FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
 1098         return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
 1099     case SLJIT_DIVMOD_UW:
 1100     case SLJIT_DIVMOD_SW:
 1101         FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
 1102         FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
 1103         FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
 1104         return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
 1105     case SLJIT_DIV_UW:
 1106     case SLJIT_DIV_SW:
 1107         return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
 1108     }
 1109 
 1110     return SLJIT_SUCCESS;
 1111 }
 1112 
 1113 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 1114     sljit_s32 dst, sljit_sw dstw,
 1115     sljit_s32 src, sljit_sw srcw)
 1116 {
 1117     sljit_s32 dst_r, flags, mem_flags;
 1118     sljit_s32 op_flags = GET_ALL_FLAGS(op);
 1119 
 1120     CHECK_ERROR();
 1121     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
 1122     ADJUST_LOCAL_OFFSET(dst, dstw);
 1123     ADJUST_LOCAL_OFFSET(src, srcw);
 1124 
 1125     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
 1126         if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) {
 1127             SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
 1128 
 1129             if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
 1130                 dst = 5;
 1131             else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
 1132                 dst = 3;
 1133             else
 1134                 dst = 1;
 1135 
 1136             /* Signed word sized load is the prefetch instruction. */
 1137             return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw, TMP_REG1);
 1138         }
 1139         return SLJIT_SUCCESS;
 1140     }
 1141 
 1142     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
 1143 
 1144     op = GET_OPCODE(op);
 1145     if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
 1146         /* Both operands are registers. */
 1147         if (dst_r != TMP_REG1 && FAST_IS_REG(src))
 1148             return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
 1149 
 1150         switch (op) {
 1151         case SLJIT_MOV:
 1152         case SLJIT_MOV_P:
 1153             mem_flags = WORD_SIZE;
 1154             break;
 1155         case SLJIT_MOV_U8:
 1156             mem_flags = BYTE_SIZE;
 1157             if (src & SLJIT_IMM)
 1158                 srcw = (sljit_u8)srcw;
 1159             break;
 1160         case SLJIT_MOV_S8:
 1161             mem_flags = BYTE_SIZE | SIGNED;
 1162             if (src & SLJIT_IMM)
 1163                 srcw = (sljit_s8)srcw;
 1164             break;
 1165         case SLJIT_MOV_U16:
 1166             mem_flags = HALF_SIZE;
 1167             if (src & SLJIT_IMM)
 1168                 srcw = (sljit_u16)srcw;
 1169             break;
 1170         case SLJIT_MOV_S16:
 1171             mem_flags = HALF_SIZE | SIGNED;
 1172             if (src & SLJIT_IMM)
 1173                 srcw = (sljit_s16)srcw;
 1174             break;
 1175         case SLJIT_MOV_U32:
 1176             mem_flags = INT_SIZE;
 1177             if (src & SLJIT_IMM)
 1178                 srcw = (sljit_u32)srcw;
 1179             break;
 1180         case SLJIT_MOV_S32:
 1181             mem_flags = INT_SIZE | SIGNED;
 1182             if (src & SLJIT_IMM)
 1183                 srcw = (sljit_s32)srcw;
 1184             break;
 1185         default:
 1186             SLJIT_UNREACHABLE();
 1187             mem_flags = 0;
 1188             break;
 1189         }
 1190 
 1191         if (src & SLJIT_IMM)
 1192             FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
 1193         else if (!(src & SLJIT_MEM))
 1194             dst_r = src;
 1195         else
 1196             FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1));
 1197 
 1198         if (dst & SLJIT_MEM)
 1199             return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
 1200         return SLJIT_SUCCESS;
 1201     }
 1202 
 1203     flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
 1204     mem_flags = WORD_SIZE;
 1205 
 1206     if (op_flags & SLJIT_I32_OP) {
 1207         flags |= INT_OP;
 1208         mem_flags = INT_SIZE;
 1209     }
 1210 
 1211     if (dst == SLJIT_UNUSED)
 1212         flags |= UNUSED_RETURN;
 1213 
 1214     if (src & SLJIT_MEM) {
 1215         FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2));
 1216         src = TMP_REG2;
 1217     }
 1218 
 1219     emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src);
 1220 
 1221     if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
 1222         return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
 1223     return SLJIT_SUCCESS;
 1224 }
 1225 
 1226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 1227     sljit_s32 dst, sljit_sw dstw,
 1228     sljit_s32 src1, sljit_sw src1w,
 1229     sljit_s32 src2, sljit_sw src2w)
 1230 {
 1231     sljit_s32 dst_r, flags, mem_flags;
 1232 
 1233     CHECK_ERROR();
 1234     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1235     ADJUST_LOCAL_OFFSET(dst, dstw);
 1236     ADJUST_LOCAL_OFFSET(src1, src1w);
 1237     ADJUST_LOCAL_OFFSET(src2, src2w);
 1238 
 1239     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
 1240         return SLJIT_SUCCESS;
 1241 
 1242     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
 1243     flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 1244     mem_flags = WORD_SIZE;
 1245 
 1246     if (op & SLJIT_I32_OP) {
 1247         flags |= INT_OP;
 1248         mem_flags = INT_SIZE;
 1249     }
 1250 
 1251     if (dst == SLJIT_UNUSED)
 1252         flags |= UNUSED_RETURN;
 1253 
 1254     if (src1 & SLJIT_MEM) {
 1255         FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1));
 1256         src1 = TMP_REG1;
 1257     }
 1258 
 1259     if (src2 & SLJIT_MEM) {
 1260         FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2));
 1261         src2 = TMP_REG2;
 1262     }
 1263 
 1264     if (src1 & SLJIT_IMM)
 1265         flags |= ARG1_IMM;
 1266     else
 1267         src1w = src1;
 1268 
 1269     if (src2 & SLJIT_IMM)
 1270         flags |= ARG2_IMM;
 1271     else
 1272         src2w = src2;
 1273 
 1274     emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
 1275 
 1276     if (dst & SLJIT_MEM)
 1277         return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
 1278     return SLJIT_SUCCESS;
 1279 }
 1280 
 1281 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 1282 {
 1283     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 1284     return reg_map[reg];
 1285 }
 1286 
 1287 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 1288 {
 1289     CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
 1290     return freg_map[reg];
 1291 }
 1292 
 1293 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
 1294     void *instruction, sljit_s32 size)
 1295 {
 1296     CHECK_ERROR();
 1297     CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 1298 
 1299     return push_inst(compiler, *(sljit_ins*)instruction);
 1300 }
 1301 
 1302 /* --------------------------------------------------------------------- */
 1303 /*  Floating point operators                                             */
 1304 /* --------------------------------------------------------------------- */
 1305 
 1306 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 1307 {
 1308     sljit_u32 shift = MEM_SIZE_SHIFT(flags);
 1309     sljit_ins type = (shift << 30);
 1310 
 1311     SLJIT_ASSERT(arg & SLJIT_MEM);
 1312 
 1313     if (!(flags & STORE))
 1314         type |= 0x00400000;
 1315 
 1316     if (arg & OFFS_REG_MASK) {
 1317         argw &= 3;
 1318         if (argw == 0 || argw == shift)
 1319             return push_inst(compiler, STR_FR | type | VT(reg)
 1320                 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
 1321 
 1322         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10)));
 1323         return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1));
 1324     }
 1325 
 1326     arg &= REG_MASK;
 1327 
 1328     if (arg == SLJIT_UNUSED) {
 1329         FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift)));
 1330 
 1331         argw = (argw >> shift) & 0xfff;
 1332 
 1333         return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10));
 1334     }
 1335 
 1336     if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
 1337         if ((argw >> shift) <= 0xfff)
 1338             return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift)));
 1339 
 1340         if (argw <= 0xffffff) {
 1341             FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10)));
 1342 
 1343             argw = ((argw & 0xfff) >> shift);
 1344             return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10));
 1345         }
 1346     }
 1347 
 1348     if (argw <= 255 && argw >= -256)
 1349         return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
 1350 
 1351     FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
 1352     return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1));
 1353 }
 1354 
 1355 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
 1356     sljit_s32 dst, sljit_sw dstw,
 1357     sljit_s32 src, sljit_sw srcw)
 1358 {
 1359     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1360     sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 1361 
 1362     if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
 1363         inv_bits |= (1 << 31);
 1364 
 1365     if (src & SLJIT_MEM) {
 1366         emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
 1367         src = TMP_FREG1;
 1368     }
 1369 
 1370     FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
 1371 
 1372     if (dst & SLJIT_MEM)
 1373         return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2);
 1374     return SLJIT_SUCCESS;
 1375 }
 1376 
 1377 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
 1378     sljit_s32 dst, sljit_sw dstw,
 1379     sljit_s32 src, sljit_sw srcw)
 1380 {
 1381     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1382     sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 1383 
 1384     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 1385         inv_bits |= (1 << 31);
 1386 
 1387     if (src & SLJIT_MEM) {
 1388         emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
 1389         src = TMP_REG1;
 1390     } else if (src & SLJIT_IMM) {
 1391 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1392         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 1393             srcw = (sljit_s32)srcw;
 1394 #endif
 1395         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 1396         src = TMP_REG1;
 1397     }
 1398 
 1399     FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
 1400 
 1401     if (dst & SLJIT_MEM)
 1402         return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
 1403     return SLJIT_SUCCESS;
 1404 }
 1405 
 1406 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
 1407     sljit_s32 src1, sljit_sw src1w,
 1408     sljit_s32 src2, sljit_sw src2w)
 1409 {
 1410     sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
 1411     sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 1412 
 1413     if (src1 & SLJIT_MEM) {
 1414         emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
 1415         src1 = TMP_FREG1;
 1416     }
 1417 
 1418     if (src2 & SLJIT_MEM) {
 1419         emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
 1420         src2 = TMP_FREG2;
 1421     }
 1422 
 1423     return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
 1424 }
 1425 
 1426 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 1427     sljit_s32 dst, sljit_sw dstw,
 1428     sljit_s32 src, sljit_sw srcw)
 1429 {
 1430     sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
 1431     sljit_ins inv_bits;
 1432 
 1433     CHECK_ERROR();
 1434 
 1435     SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference);
 1436     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 1437 
 1438     inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 1439     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1440 
 1441     if (src & SLJIT_MEM) {
 1442         emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw);
 1443         src = dst_r;
 1444     }
 1445 
 1446     switch (GET_OPCODE(op)) {
 1447     case SLJIT_MOV_F64:
 1448         if (src != dst_r) {
 1449             if (dst_r != TMP_FREG1)
 1450                 FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
 1451             else
 1452                 dst_r = src;
 1453         }
 1454         break;
 1455     case SLJIT_NEG_F64:
 1456         FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
 1457         break;
 1458     case SLJIT_ABS_F64:
 1459         FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
 1460         break;
 1461     case SLJIT_CONV_F64_FROM_F32:
 1462         FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
 1463         break;
 1464     }
 1465 
 1466     if (dst & SLJIT_MEM)
 1467         return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
 1468     return SLJIT_SUCCESS;
 1469 }
 1470 
 1471 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 1472     sljit_s32 dst, sljit_sw dstw,
 1473     sljit_s32 src1, sljit_sw src1w,
 1474     sljit_s32 src2, sljit_sw src2w)
 1475 {
 1476     sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
 1477     sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 1478 
 1479     CHECK_ERROR();
 1480     CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1481     ADJUST_LOCAL_OFFSET(dst, dstw);
 1482     ADJUST_LOCAL_OFFSET(src1, src1w);
 1483     ADJUST_LOCAL_OFFSET(src2, src2w);
 1484 
 1485     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1486     if (src1 & SLJIT_MEM) {
 1487         emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
 1488         src1 = TMP_FREG1;
 1489     }
 1490     if (src2 & SLJIT_MEM) {
 1491         emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
 1492         src2 = TMP_FREG2;
 1493     }
 1494 
 1495     switch (GET_OPCODE(op)) {
 1496     case SLJIT_ADD_F64:
 1497         FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
 1498         break;
 1499     case SLJIT_SUB_F64:
 1500         FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
 1501         break;
 1502     case SLJIT_MUL_F64:
 1503         FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
 1504         break;
 1505     case SLJIT_DIV_F64:
 1506         FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
 1507         break;
 1508     }
 1509 
 1510     if (!(dst & SLJIT_MEM))
 1511         return SLJIT_SUCCESS;
 1512     return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
 1513 }
 1514 
 1515 /* --------------------------------------------------------------------- */
 1516 /*  Other instructions                                                   */
 1517 /* --------------------------------------------------------------------- */
 1518 
 1519 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 1520 {
 1521     CHECK_ERROR();
 1522     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
 1523     ADJUST_LOCAL_OFFSET(dst, dstw);
 1524 
 1525     if (FAST_IS_REG(dst))
 1526         return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
 1527 
 1528     /* Memory. */
 1529     return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1);
 1530 }
 1531 
 1532 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
 1533 {
 1534     CHECK_ERROR();
 1535     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
 1536     ADJUST_LOCAL_OFFSET(src, srcw);
 1537 
 1538     if (FAST_IS_REG(src))
 1539         FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
 1540     else
 1541         FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
 1542 
 1543     return push_inst(compiler, RET | RN(TMP_LR));
 1544 }
 1545 
 1546 /* --------------------------------------------------------------------- */
 1547 /*  Conditional instructions                                             */
 1548 /* --------------------------------------------------------------------- */
 1549 
 1550 static sljit_uw get_cc(sljit_s32 type)
 1551 {
 1552     switch (type) {
 1553     case SLJIT_EQUAL:
 1554     case SLJIT_MUL_NOT_OVERFLOW:
 1555     case SLJIT_EQUAL_F64:
 1556         return 0x1;
 1557 
 1558     case SLJIT_NOT_EQUAL:
 1559     case SLJIT_MUL_OVERFLOW:
 1560     case SLJIT_NOT_EQUAL_F64:
 1561         return 0x0;
 1562 
 1563     case SLJIT_LESS:
 1564     case SLJIT_LESS_F64:
 1565         return 0x2;
 1566 
 1567     case SLJIT_GREATER_EQUAL:
 1568     case SLJIT_GREATER_EQUAL_F64:
 1569         return 0x3;
 1570 
 1571     case SLJIT_GREATER:
 1572     case SLJIT_GREATER_F64:
 1573         return 0x9;
 1574 
 1575     case SLJIT_LESS_EQUAL:
 1576     case SLJIT_LESS_EQUAL_F64:
 1577         return 0x8;
 1578 
 1579     case SLJIT_SIG_LESS:
 1580         return 0xa;
 1581 
 1582     case SLJIT_SIG_GREATER_EQUAL:
 1583         return 0xb;
 1584 
 1585     case SLJIT_SIG_GREATER:
 1586         return 0xd;
 1587 
 1588     case SLJIT_SIG_LESS_EQUAL:
 1589         return 0xc;
 1590 
 1591     case SLJIT_OVERFLOW:
 1592     case SLJIT_UNORDERED_F64:
 1593         return 0x7;
 1594 
 1595     case SLJIT_NOT_OVERFLOW:
 1596     case SLJIT_ORDERED_F64:
 1597         return 0x6;
 1598 
 1599     default:
 1600         SLJIT_UNREACHABLE();
 1601         return 0xe;
 1602     }
 1603 }
 1604 
 1605 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 1606 {
 1607     struct sljit_label *label;
 1608 
 1609     CHECK_ERROR_PTR();
 1610     CHECK_PTR(check_sljit_emit_label(compiler));
 1611 
 1612     if (compiler->last_label && compiler->last_label->size == compiler->size)
 1613         return compiler->last_label;
 1614 
 1615     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 1616     PTR_FAIL_IF(!label);
 1617     set_label(label, compiler);
 1618     return label;
 1619 }
 1620 
 1621 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 1622 {
 1623     struct sljit_jump *jump;
 1624 
 1625     CHECK_ERROR_PTR();
 1626     CHECK_PTR(check_sljit_emit_jump(compiler, type));
 1627 
 1628     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 1629     PTR_FAIL_IF(!jump);
 1630     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 1631     type &= 0xff;
 1632 
 1633     if (type < SLJIT_JUMP) {
 1634         jump->flags |= IS_COND;
 1635         PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
 1636     }
 1637     else if (type >= SLJIT_FAST_CALL)
 1638         jump->flags |= IS_BL;
 1639 
 1640     PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
 1641     jump->addr = compiler->size;
 1642     PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
 1643 
 1644     return jump;
 1645 }
 1646 
 1647 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 1648     sljit_s32 arg_types)
 1649 {
 1650     CHECK_ERROR_PTR();
 1651     CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 1652 
 1653 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 1654         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 1655     compiler->skip_checks = 1;
 1656 #endif
 1657 
 1658     return sljit_emit_jump(compiler, type);
 1659 }
 1660 
 1661 static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
 1662     sljit_s32 src, sljit_sw srcw)
 1663 {
 1664     struct sljit_jump *jump;
 1665     sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0;
 1666 
 1667     SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
 1668     ADJUST_LOCAL_OFFSET(src, srcw);
 1669 
 1670     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 1671     PTR_FAIL_IF(!jump);
 1672     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 1673     jump->flags |= IS_CBZ | IS_COND;
 1674 
 1675     if (src & SLJIT_MEM) {
 1676         PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 1677         src = TMP_REG1;
 1678     }
 1679     else if (src & SLJIT_IMM) {
 1680         PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 1681         src = TMP_REG1;
 1682     }
 1683 
 1684     SLJIT_ASSERT(FAST_IS_REG(src));
 1685 
 1686     if ((type & 0xff) == SLJIT_EQUAL)
 1687         inv_bits |= 1 << 24;
 1688 
 1689     PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
 1690     PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
 1691     jump->addr = compiler->size;
 1692     PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
 1693     return jump;
 1694 }
 1695 
 1696 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 1697 {
 1698     struct sljit_jump *jump;
 1699 
 1700     CHECK_ERROR();
 1701     CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 1702     ADJUST_LOCAL_OFFSET(src, srcw);
 1703 
 1704     if (!(src & SLJIT_IMM)) {
 1705         if (src & SLJIT_MEM) {
 1706             FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 1707             src = TMP_REG1;
 1708         }
 1709         return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
 1710     }
 1711 
 1712     /* These jumps are converted to jump/call instructions when possible. */
 1713     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 1714     FAIL_IF(!jump);
 1715     set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
 1716     jump->u.target = srcw;
 1717 
 1718     FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
 1719     jump->addr = compiler->size;
 1720     return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
 1721 }
 1722 
 1723 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
 1724     sljit_s32 arg_types,
 1725     sljit_s32 src, sljit_sw srcw)
 1726 {
 1727     CHECK_ERROR();
 1728     CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 1729 
 1730 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 1731         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 1732     compiler->skip_checks = 1;
 1733 #endif
 1734 
 1735     return sljit_emit_ijump(compiler, type, src, srcw);
 1736 }
 1737 
 1738 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
 1739     sljit_s32 dst, sljit_sw dstw,
 1740     sljit_s32 type)
 1741 {
 1742     sljit_s32 dst_r, src_r, flags, mem_flags;
 1743     sljit_ins cc;
 1744 
 1745     CHECK_ERROR();
 1746     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 1747     ADJUST_LOCAL_OFFSET(dst, dstw);
 1748 
 1749     cc = get_cc(type & 0xff);
 1750     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1751 
 1752     if (GET_OPCODE(op) < SLJIT_ADD) {
 1753         FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
 1754 
 1755         if (dst_r == TMP_REG1) {
 1756             mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE;
 1757             return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2);
 1758         }
 1759 
 1760         return SLJIT_SUCCESS;
 1761     }
 1762 
 1763     flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 1764     mem_flags = WORD_SIZE;
 1765 
 1766     if (op & SLJIT_I32_OP) {
 1767         flags |= INT_OP;
 1768         mem_flags = INT_SIZE;
 1769     }
 1770 
 1771     src_r = dst;
 1772 
 1773     if (dst & SLJIT_MEM) {
 1774         FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1));
 1775         src_r = TMP_REG1;
 1776     }
 1777 
 1778     FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
 1779     emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
 1780 
 1781     if (dst & SLJIT_MEM)
 1782         return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2);
 1783     return SLJIT_SUCCESS;
 1784 }
 1785 
 1786 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
 1787     sljit_s32 dst_reg,
 1788     sljit_s32 src, sljit_sw srcw)
 1789 {
 1790     sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0;
 1791     sljit_ins cc;
 1792 
 1793     CHECK_ERROR();
 1794     CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 1795 
 1796     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 1797         if (dst_reg & SLJIT_I32_OP)
 1798             srcw = (sljit_s32)srcw;
 1799         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 1800         src = TMP_REG1;
 1801         srcw = 0;
 1802     }
 1803 
 1804     cc = get_cc(type & 0xff);
 1805     dst_reg &= ~SLJIT_I32_OP;
 1806 
 1807     return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
 1808 }
 1809 
 1810 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 1811     sljit_s32 reg,
 1812     sljit_s32 mem, sljit_sw memw)
 1813 {
 1814     sljit_u32 sign = 0, inst;
 1815 
 1816     CHECK_ERROR();
 1817     CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
 1818 
 1819     if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
 1820         return SLJIT_ERR_UNSUPPORTED;
 1821 
 1822     if (type & SLJIT_MEM_SUPP)
 1823         return SLJIT_SUCCESS;
 1824 
 1825     switch (type & 0xff) {
 1826     case SLJIT_MOV:
 1827     case SLJIT_MOV_P:
 1828         inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
 1829         break;
 1830     case SLJIT_MOV_S8:
 1831         sign = 1;
 1832     case SLJIT_MOV_U8:
 1833         inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400;
 1834         break;
 1835     case SLJIT_MOV_S16:
 1836         sign = 1;
 1837     case SLJIT_MOV_U16:
 1838         inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400;
 1839         break;
 1840     case SLJIT_MOV_S32:
 1841         sign = 1;
 1842     case SLJIT_MOV_U32:
 1843         inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400;
 1844         break;
 1845     default:
 1846         SLJIT_UNREACHABLE();
 1847         inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
 1848         break;
 1849     }
 1850 
 1851     if (!(type & SLJIT_MEM_STORE))
 1852         inst |= sign ? 0x00800000 : 0x00400000;
 1853 
 1854     if (type & SLJIT_MEM_PRE)
 1855         inst |= 0x800;
 1856 
 1857     return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
 1858 }
 1859 
 1860 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
 1861     sljit_s32 freg,
 1862     sljit_s32 mem, sljit_sw memw)
 1863 {
 1864     sljit_u32 inst;
 1865 
 1866     CHECK_ERROR();
 1867     CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
 1868 
 1869     if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
 1870         return SLJIT_ERR_UNSUPPORTED;
 1871 
 1872     if (type & SLJIT_MEM_SUPP)
 1873         return SLJIT_SUCCESS;
 1874 
 1875     inst = STUR_FI | 0x80000400;
 1876 
 1877     if (!(type & SLJIT_F32_OP))
 1878         inst |= 0x40000000;
 1879 
 1880     if (!(type & SLJIT_MEM_STORE))
 1881         inst |= 0x00400000;
 1882 
 1883     if (type & SLJIT_MEM_PRE)
 1884         inst |= 0x800;
 1885 
 1886     return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
 1887 }
 1888 
 1889 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 1890 {
 1891     sljit_s32 dst_reg;
 1892     sljit_ins ins;
 1893 
 1894     CHECK_ERROR();
 1895     CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
 1896 
 1897     SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
 1898 
 1899     dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1900 
 1901     if (offset <= 0xffffff && offset >= -0xffffff) {
 1902         ins = ADDI;
 1903         if (offset < 0) {
 1904             offset = -offset;
 1905             ins = SUBI;
 1906         }
 1907 
 1908         if (offset <= 0xfff)
 1909             FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
 1910         else {
 1911             FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
 1912 
 1913             offset &= 0xfff;
 1914             if (offset != 0)
 1915                 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
 1916         }
 1917     }
 1918     else {
 1919         FAIL_IF(load_immediate (compiler, dst_reg, offset));
 1920         /* Add extended register form. */
 1921         FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
 1922     }
 1923 
 1924     if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
 1925         return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
 1926     return SLJIT_SUCCESS;
 1927 }
 1928 
 1929 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 1930 {
 1931     struct sljit_const *const_;
 1932     sljit_s32 dst_r;
 1933 
 1934     CHECK_ERROR_PTR();
 1935     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 1936     ADJUST_LOCAL_OFFSET(dst, dstw);
 1937 
 1938     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 1939     PTR_FAIL_IF(!const_);
 1940     set_const(const_, compiler);
 1941 
 1942     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 1943     PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
 1944 
 1945     if (dst & SLJIT_MEM)
 1946         PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
 1947     return const_;
 1948 }
 1949 
 1950 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 1951 {
 1952     sljit_ins* inst = (sljit_ins*)addr;
 1953     modify_imm64_const(inst, new_target);
 1954     inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 1955     SLJIT_CACHE_FLUSH(inst, inst + 4);
 1956 }
 1957 
 1958 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 1959 {
 1960     sljit_ins* inst = (sljit_ins*)addr;
 1961     modify_imm64_const(inst, new_constant);
 1962     inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 1963     SLJIT_CACHE_FLUSH(inst, inst + 4);
 1964 }