"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.44/sljit/sljitNativeARM_32.c" (29 Nov 2019, 81540 Bytes) of package /linux/misc/pcre-8.44.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeARM_32.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.43_vs_8.44.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 #ifdef __SOFTFP__
   28 #define ARM_ABI_INFO " ABI:softfp"
   29 #else
   30 #define ARM_ABI_INFO " ABI:hardfp"
   31 #endif
   32 
   33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
   34 {
   35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
   36     return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
   37 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   38     return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO;
   39 #else
   40 #error "Internal error: Unknown ARM architecture"
   41 #endif
   42 }
   43 
   44 /* Last register + 1. */
   45 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   46 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
   47 #define TMP_PC      (SLJIT_NUMBER_OF_REGISTERS + 4)
   48 
   49 #define TMP_FREG1   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
   50 #define TMP_FREG2   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
   51 
   52 /* In ARM instruction words.
   53    Cache lines are usually 32 byte aligned. */
   54 #define CONST_POOL_ALIGNMENT    8
   55 #define CONST_POOL_EMPTY    0xffffffff
   56 
   57 #define ALIGN_INSTRUCTION(ptr) \
   58     (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
   59 #define MAX_DIFFERENCE(max_diff) \
   60     (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
   61 
   62 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
   63 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
   64     0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
   65 };
   66 
   67 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
   68     0, 0, 1, 2, 3, 4, 5, 6, 7
   69 };
   70 
   71 #define RM(rm) (reg_map[rm])
   72 #define RD(rd) (reg_map[rd] << 12)
   73 #define RN(rn) (reg_map[rn] << 16)
   74 
   75 /* --------------------------------------------------------------------- */
   76 /*  Instrucion forms                                                     */
   77 /* --------------------------------------------------------------------- */
   78 
   79 /* The instruction includes the AL condition.
   80    INST_NAME - CONDITIONAL remove this flag. */
   81 #define COND_MASK   0xf0000000
   82 #define CONDITIONAL 0xe0000000
   83 #define PUSH_POOL   0xff000000
   84 
   85 #define ADC     0xe0a00000
   86 #define ADD     0xe0800000
   87 #define AND     0xe0000000
   88 #define B       0xea000000
   89 #define BIC     0xe1c00000
   90 #define BL      0xeb000000
   91 #define BLX     0xe12fff30
   92 #define BX      0xe12fff10
   93 #define CLZ     0xe16f0f10
   94 #define CMN     0xe1600000
   95 #define CMP     0xe1400000
   96 #define BKPT        0xe1200070
   97 #define EOR     0xe0200000
   98 #define MOV     0xe1a00000
   99 #define MUL     0xe0000090
  100 #define MVN     0xe1e00000
  101 #define NOP     0xe1a00000
  102 #define ORR     0xe1800000
  103 #define PUSH        0xe92d0000
  104 #define POP     0xe8bd0000
  105 #define RSB     0xe0600000
  106 #define RSC     0xe0e00000
  107 #define SBC     0xe0c00000
  108 #define SMULL       0xe0c00090
  109 #define SUB     0xe0400000
  110 #define UMULL       0xe0800090
  111 #define VABS_F32    0xeeb00ac0
  112 #define VADD_F32    0xee300a00
  113 #define VCMP_F32    0xeeb40a40
  114 #define VCVT_F32_S32    0xeeb80ac0
  115 #define VCVT_F64_F32    0xeeb70ac0
  116 #define VCVT_S32_F32    0xeebd0ac0
  117 #define VDIV_F32    0xee800a00
  118 #define VMOV_F32    0xeeb00a40
  119 #define VMOV        0xee000a10
  120 #define VMOV2       0xec400a10
  121 #define VMRS        0xeef1fa10
  122 #define VMUL_F32    0xee200a00
  123 #define VNEG_F32    0xeeb10a40
  124 #define VSTR_F32    0xed000a00
  125 #define VSUB_F32    0xee300a40
  126 
  127 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  128 /* Arm v7 specific instructions. */
  129 #define MOVW        0xe3000000
  130 #define MOVT        0xe3400000
  131 #define SXTB        0xe6af0070
  132 #define SXTH        0xe6bf0070
  133 #define UXTB        0xe6ef0070
  134 #define UXTH        0xe6ff0070
  135 #endif
  136 
  137 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  138 
  139 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
  140 {
  141     /* Pushing the constant pool into the instruction stream. */
  142     sljit_uw* inst;
  143     sljit_uw* cpool_ptr;
  144     sljit_uw* cpool_end;
  145     sljit_s32 i;
  146 
  147     /* The label could point the address after the constant pool. */
  148     if (compiler->last_label && compiler->last_label->size == compiler->size)
  149         compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
  150 
  151     SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
  152     inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  153     FAIL_IF(!inst);
  154     compiler->size++;
  155     *inst = 0xff000000 | compiler->cpool_fill;
  156 
  157     for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
  158         inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  159         FAIL_IF(!inst);
  160         compiler->size++;
  161         *inst = 0;
  162     }
  163 
  164     cpool_ptr = compiler->cpool;
  165     cpool_end = cpool_ptr + compiler->cpool_fill;
  166     while (cpool_ptr < cpool_end) {
  167         inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  168         FAIL_IF(!inst);
  169         compiler->size++;
  170         *inst = *cpool_ptr++;
  171     }
  172     compiler->cpool_diff = CONST_POOL_EMPTY;
  173     compiler->cpool_fill = 0;
  174     return SLJIT_SUCCESS;
  175 }
  176 
  177 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  178 {
  179     sljit_uw* ptr;
  180 
  181     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  182         FAIL_IF(push_cpool(compiler));
  183 
  184     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  185     FAIL_IF(!ptr);
  186     compiler->size++;
  187     *ptr = inst;
  188     return SLJIT_SUCCESS;
  189 }
  190 
  191 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  192 {
  193     sljit_uw* ptr;
  194     sljit_uw cpool_index = CPOOL_SIZE;
  195     sljit_uw* cpool_ptr;
  196     sljit_uw* cpool_end;
  197     sljit_u8* cpool_unique_ptr;
  198 
  199     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  200         FAIL_IF(push_cpool(compiler));
  201     else if (compiler->cpool_fill > 0) {
  202         cpool_ptr = compiler->cpool;
  203         cpool_end = cpool_ptr + compiler->cpool_fill;
  204         cpool_unique_ptr = compiler->cpool_unique;
  205         do {
  206             if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
  207                 cpool_index = cpool_ptr - compiler->cpool;
  208                 break;
  209             }
  210             cpool_ptr++;
  211             cpool_unique_ptr++;
  212         } while (cpool_ptr < cpool_end);
  213     }
  214 
  215     if (cpool_index == CPOOL_SIZE) {
  216         /* Must allocate a new entry in the literal pool. */
  217         if (compiler->cpool_fill < CPOOL_SIZE) {
  218             cpool_index = compiler->cpool_fill;
  219             compiler->cpool_fill++;
  220         }
  221         else {
  222             FAIL_IF(push_cpool(compiler));
  223             cpool_index = 0;
  224             compiler->cpool_fill = 1;
  225         }
  226     }
  227 
  228     SLJIT_ASSERT((inst & 0xfff) == 0);
  229     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  230     FAIL_IF(!ptr);
  231     compiler->size++;
  232     *ptr = inst | cpool_index;
  233 
  234     compiler->cpool[cpool_index] = literal;
  235     compiler->cpool_unique[cpool_index] = 0;
  236     if (compiler->cpool_diff == CONST_POOL_EMPTY)
  237         compiler->cpool_diff = compiler->size;
  238     return SLJIT_SUCCESS;
  239 }
  240 
  241 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  242 {
  243     sljit_uw* ptr;
  244     if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
  245         FAIL_IF(push_cpool(compiler));
  246 
  247     SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
  248     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  249     FAIL_IF(!ptr);
  250     compiler->size++;
  251     *ptr = inst | compiler->cpool_fill;
  252 
  253     compiler->cpool[compiler->cpool_fill] = literal;
  254     compiler->cpool_unique[compiler->cpool_fill] = 1;
  255     compiler->cpool_fill++;
  256     if (compiler->cpool_diff == CONST_POOL_EMPTY)
  257         compiler->cpool_diff = compiler->size;
  258     return SLJIT_SUCCESS;
  259 }
  260 
  261 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
  262 {
  263     /* Place for at least two instruction (doesn't matter whether the first has a literal). */
  264     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
  265         return push_cpool(compiler);
  266     return SLJIT_SUCCESS;
  267 }
  268 
  269 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
  270 {
  271     /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
  272     SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
  273     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
  274 
  275     return push_inst(compiler, BLX | RM(TMP_REG1));
  276 }
  277 
  278 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
  279 {
  280     sljit_uw diff;
  281     sljit_uw ind;
  282     sljit_uw counter = 0;
  283     sljit_uw* clear_const_pool = const_pool;
  284     sljit_uw* clear_const_pool_end = const_pool + cpool_size;
  285 
  286     SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
  287     /* Set unused flag for all literals in the constant pool.
  288        I.e.: unused literals can belong to branches, which can be encoded as B or BL.
  289        We can "compress" the constant pool by discarding these literals. */
  290     while (clear_const_pool < clear_const_pool_end)
  291         *clear_const_pool++ = (sljit_uw)(-1);
  292 
  293     while (last_pc_patch < code_ptr) {
  294         /* Data transfer instruction with Rn == r15. */
  295         if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
  296             diff = const_pool - last_pc_patch;
  297             ind = (*last_pc_patch) & 0xfff;
  298 
  299             /* Must be a load instruction with immediate offset. */
  300             SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
  301             if ((sljit_s32)const_pool[ind] < 0) {
  302                 const_pool[ind] = counter;
  303                 ind = counter;
  304                 counter++;
  305             }
  306             else
  307                 ind = const_pool[ind];
  308 
  309             SLJIT_ASSERT(diff >= 1);
  310             if (diff >= 2 || ind > 0) {
  311                 diff = (diff + ind - 2) << 2;
  312                 SLJIT_ASSERT(diff <= 0xfff);
  313                 *last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
  314             }
  315             else
  316                 *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
  317         }
  318         last_pc_patch++;
  319     }
  320     return counter;
  321 }
  322 
  323 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
  324 struct future_patch {
  325     struct future_patch* next;
  326     sljit_s32 index;
  327     sljit_s32 value;
  328 };
  329 
  330 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
  331 {
  332     sljit_s32 value;
  333     struct future_patch *curr_patch, *prev_patch;
  334 
  335     SLJIT_UNUSED_ARG(compiler);
  336 
  337     /* Using the values generated by patch_pc_relative_loads. */
  338     if (!*first_patch)
  339         value = (sljit_s32)cpool_start_address[cpool_current_index];
  340     else {
  341         curr_patch = *first_patch;
  342         prev_patch = NULL;
  343         while (1) {
  344             if (!curr_patch) {
  345                 value = (sljit_s32)cpool_start_address[cpool_current_index];
  346                 break;
  347             }
  348             if ((sljit_uw)curr_patch->index == cpool_current_index) {
  349                 value = curr_patch->value;
  350                 if (prev_patch)
  351                     prev_patch->next = curr_patch->next;
  352                 else
  353                     *first_patch = curr_patch->next;
  354                 SLJIT_FREE(curr_patch, compiler->allocator_data);
  355                 break;
  356             }
  357             prev_patch = curr_patch;
  358             curr_patch = curr_patch->next;
  359         }
  360     }
  361 
  362     if (value >= 0) {
  363         if ((sljit_uw)value > cpool_current_index) {
  364             curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
  365             if (!curr_patch) {
  366                 while (*first_patch) {
  367                     curr_patch = *first_patch;
  368                     *first_patch = (*first_patch)->next;
  369                     SLJIT_FREE(curr_patch, compiler->allocator_data);
  370                 }
  371                 return SLJIT_ERR_ALLOC_FAILED;
  372             }
  373             curr_patch->next = *first_patch;
  374             curr_patch->index = value;
  375             curr_patch->value = cpool_start_address[value];
  376             *first_patch = curr_patch;
  377         }
  378         cpool_start_address[value] = *buf_ptr;
  379     }
  380     return SLJIT_SUCCESS;
  381 }
  382 
  383 #else
  384 
  385 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  386 {
  387     sljit_uw* ptr;
  388 
  389     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  390     FAIL_IF(!ptr);
  391     compiler->size++;
  392     *ptr = inst;
  393     return SLJIT_SUCCESS;
  394 }
  395 
  396 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
  397 {
  398     FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
  399     return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
  400 }
  401 
  402 #endif
  403 
  404 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
  405 {
  406     sljit_sw diff;
  407 
  408     if (jump->flags & SLJIT_REWRITABLE_JUMP)
  409         return 0;
  410 
  411 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  412     if (jump->flags & IS_BL)
  413         code_ptr--;
  414 
  415     if (jump->flags & JUMP_ADDR)
  416         diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
  417     else {
  418         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  419         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
  420     }
  421 
  422     /* Branch to Thumb code has not been optimized yet. */
  423     if (diff & 0x3)
  424         return 0;
  425 
  426     if (jump->flags & IS_BL) {
  427         if (diff <= 0x01ffffff && diff >= -0x02000000) {
  428             *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
  429             jump->flags |= PATCH_B;
  430             return 1;
  431         }
  432     }
  433     else {
  434         if (diff <= 0x01ffffff && diff >= -0x02000000) {
  435             *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
  436             jump->flags |= PATCH_B;
  437         }
  438     }
  439 #else
  440     if (jump->flags & JUMP_ADDR)
  441         diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
  442     else {
  443         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  444         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
  445     }
  446 
  447     /* Branch to Thumb code has not been optimized yet. */
  448     if (diff & 0x3)
  449         return 0;
  450 
  451     if (diff <= 0x01ffffff && diff >= -0x02000000) {
  452         code_ptr -= 2;
  453         *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
  454         jump->flags |= PATCH_B;
  455         return 1;
  456     }
  457 #endif
  458     return 0;
  459 }
  460 
  461 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
  462 {
  463 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  464     sljit_uw *ptr = (sljit_uw *)jump_ptr;
  465     sljit_uw *inst = (sljit_uw *)ptr[0];
  466     sljit_uw mov_pc = ptr[1];
  467     sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
  468     sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
  469 
  470     if (diff <= 0x7fffff && diff >= -0x800000) {
  471         /* Turn to branch. */
  472         if (!bl) {
  473             inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
  474             if (flush_cache) {
  475                 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  476                 SLJIT_CACHE_FLUSH(inst, inst + 1);
  477             }
  478         } else {
  479             inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
  480             inst[1] = NOP;
  481             if (flush_cache) {
  482                 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  483                 SLJIT_CACHE_FLUSH(inst, inst + 2);
  484             }
  485         }
  486     } else {
  487         /* Get the position of the constant. */
  488         if (mov_pc & (1 << 23))
  489             ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
  490         else
  491             ptr = inst + 1;
  492 
  493         if (*inst != mov_pc) {
  494             inst[0] = mov_pc;
  495             if (!bl) {
  496                 if (flush_cache) {
  497                     inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  498                     SLJIT_CACHE_FLUSH(inst, inst + 1);
  499                 }
  500             } else {
  501                 inst[1] = BLX | RM(TMP_REG1);
  502                 if (flush_cache) {
  503                     inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  504                     SLJIT_CACHE_FLUSH(inst, inst + 2);
  505                 }
  506             }
  507         }
  508         *ptr = new_addr;
  509     }
  510 #else
  511     sljit_uw *inst = (sljit_uw*)jump_ptr;
  512     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  513     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
  514     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
  515     if (flush_cache) {
  516         inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  517         SLJIT_CACHE_FLUSH(inst, inst + 2);
  518     }
  519 #endif
  520 }
  521 
  522 static sljit_uw get_imm(sljit_uw imm);
  523 
  524 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache)
  525 {
  526 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  527     sljit_uw *ptr = (sljit_uw*)addr;
  528     sljit_uw *inst = (sljit_uw*)ptr[0];
  529     sljit_uw ldr_literal = ptr[1];
  530     sljit_uw src2;
  531 
  532     src2 = get_imm(new_constant);
  533     if (src2) {
  534         *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
  535         if (flush_cache) {
  536             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  537             SLJIT_CACHE_FLUSH(inst, inst + 1);
  538         }
  539         return;
  540     }
  541 
  542     src2 = get_imm(~new_constant);
  543     if (src2) {
  544         *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
  545         if (flush_cache) {
  546             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  547             SLJIT_CACHE_FLUSH(inst, inst + 1);
  548         }
  549         return;
  550     }
  551 
  552     if (ldr_literal & (1 << 23))
  553         ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
  554     else
  555         ptr = inst + 1;
  556 
  557     if (*inst != ldr_literal) {
  558         *inst = ldr_literal;
  559         if (flush_cache) {
  560             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  561             SLJIT_CACHE_FLUSH(inst, inst + 1);
  562         }
  563     }
  564     *ptr = new_constant;
  565 #else
  566     sljit_uw *inst = (sljit_uw*)addr;
  567     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  568     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
  569     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
  570     if (flush_cache) {
  571         inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  572         SLJIT_CACHE_FLUSH(inst, inst + 2);
  573     }
  574 #endif
  575 }
  576 
  577 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  578 {
  579     struct sljit_memory_fragment *buf;
  580     sljit_uw *code;
  581     sljit_uw *code_ptr;
  582     sljit_uw *buf_ptr;
  583     sljit_uw *buf_end;
  584     sljit_uw size;
  585     sljit_uw word_count;
  586     sljit_uw next_addr;
  587     sljit_sw executable_offset;
  588     sljit_sw addr;
  589 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  590     sljit_uw cpool_size;
  591     sljit_uw cpool_skip_alignment;
  592     sljit_uw cpool_current_index;
  593     sljit_uw *cpool_start_address;
  594     sljit_uw *last_pc_patch;
  595     struct future_patch *first_patch;
  596 #endif
  597 
  598     struct sljit_label *label;
  599     struct sljit_jump *jump;
  600     struct sljit_const *const_;
  601     struct sljit_put_label *put_label;
  602 
  603     CHECK_ERROR_PTR();
  604     CHECK_PTR(check_sljit_generate_code(compiler));
  605     reverse_buf(compiler);
  606 
  607     /* Second code generation pass. */
  608 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  609     size = compiler->size + (compiler->patches << 1);
  610     if (compiler->cpool_fill > 0)
  611         size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
  612 #else
  613     size = compiler->size;
  614 #endif
  615     code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
  616     PTR_FAIL_WITH_EXEC_IF(code);
  617     buf = compiler->buf;
  618 
  619 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  620     cpool_size = 0;
  621     cpool_skip_alignment = 0;
  622     cpool_current_index = 0;
  623     cpool_start_address = NULL;
  624     first_patch = NULL;
  625     last_pc_patch = code;
  626 #endif
  627 
  628     code_ptr = code;
  629     word_count = 0;
  630     next_addr = 1;
  631     executable_offset = SLJIT_EXEC_OFFSET(code);
  632 
  633     label = compiler->labels;
  634     jump = compiler->jumps;
  635     const_ = compiler->consts;
  636     put_label = compiler->put_labels;
  637 
  638     if (label && label->size == 0) {
  639         label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
  640         label = label->next;
  641     }
  642 
  643     do {
  644         buf_ptr = (sljit_uw*)buf->memory;
  645         buf_end = buf_ptr + (buf->used_size >> 2);
  646         do {
  647             word_count++;
  648 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  649             if (cpool_size > 0) {
  650                 if (cpool_skip_alignment > 0) {
  651                     buf_ptr++;
  652                     cpool_skip_alignment--;
  653                 }
  654                 else {
  655                     if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  656                         SLJIT_FREE_EXEC(code);
  657                         compiler->error = SLJIT_ERR_ALLOC_FAILED;
  658                         return NULL;
  659                     }
  660                     buf_ptr++;
  661                     if (++cpool_current_index >= cpool_size) {
  662                         SLJIT_ASSERT(!first_patch);
  663                         cpool_size = 0;
  664                         if (label && label->size == word_count) {
  665                             /* Points after the current instruction. */
  666                             label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  667                             label->size = code_ptr - code;
  668                             label = label->next;
  669 
  670                             next_addr = compute_next_addr(label, jump, const_, put_label);
  671                         }
  672                     }
  673                 }
  674             }
  675             else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
  676 #endif
  677                 *code_ptr = *buf_ptr++;
  678                 if (next_addr == word_count) {
  679                     SLJIT_ASSERT(!label || label->size >= word_count);
  680                     SLJIT_ASSERT(!jump || jump->addr >= word_count);
  681                     SLJIT_ASSERT(!const_ || const_->addr >= word_count);
  682                     SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
  683 
  684                 /* These structures are ordered by their address. */
  685                     if (jump && jump->addr == word_count) {
  686 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  687                         if (detect_jump_type(jump, code_ptr, code, executable_offset))
  688                             code_ptr--;
  689                         jump->addr = (sljit_uw)code_ptr;
  690 #else
  691                         jump->addr = (sljit_uw)(code_ptr - 2);
  692                         if (detect_jump_type(jump, code_ptr, code, executable_offset))
  693                             code_ptr -= 2;
  694 #endif
  695                         jump = jump->next;
  696                     }
  697                     if (label && label->size == word_count) {
  698                         /* code_ptr can be affected above. */
  699                         label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
  700                         label->size = (code_ptr + 1) - code;
  701                         label = label->next;
  702                     }
  703                     if (const_ && const_->addr == word_count) {
  704 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  705                         const_->addr = (sljit_uw)code_ptr;
  706 #else
  707                         const_->addr = (sljit_uw)(code_ptr - 1);
  708 #endif
  709                         const_ = const_->next;
  710                     }
  711                     if (put_label && put_label->addr == word_count) {
  712                         SLJIT_ASSERT(put_label->label);
  713                         put_label->addr = (sljit_uw)code_ptr;
  714                         put_label = put_label->next;
  715                     }
  716                     next_addr = compute_next_addr(label, jump, const_, put_label);
  717                 }
  718                 code_ptr++;
  719 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  720             }
  721             else {
  722                 /* Fortunately, no need to shift. */
  723                 cpool_size = *buf_ptr++ & ~PUSH_POOL;
  724                 SLJIT_ASSERT(cpool_size > 0);
  725                 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
  726                 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
  727                 if (cpool_current_index > 0) {
  728                     /* Unconditional branch. */
  729                     *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
  730                     code_ptr = cpool_start_address + cpool_current_index;
  731                 }
  732                 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
  733                 cpool_current_index = 0;
  734                 last_pc_patch = code_ptr;
  735             }
  736 #endif
  737         } while (buf_ptr < buf_end);
  738         buf = buf->next;
  739     } while (buf);
  740 
  741     SLJIT_ASSERT(!label);
  742     SLJIT_ASSERT(!jump);
  743     SLJIT_ASSERT(!const_);
  744     SLJIT_ASSERT(!put_label);
  745 
  746 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  747     SLJIT_ASSERT(cpool_size == 0);
  748     if (compiler->cpool_fill > 0) {
  749         cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
  750         cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
  751         if (cpool_current_index > 0)
  752             code_ptr = cpool_start_address + cpool_current_index;
  753 
  754         buf_ptr = compiler->cpool;
  755         buf_end = buf_ptr + compiler->cpool_fill;
  756         cpool_current_index = 0;
  757         while (buf_ptr < buf_end) {
  758             if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  759                 SLJIT_FREE_EXEC(code);
  760                 compiler->error = SLJIT_ERR_ALLOC_FAILED;
  761                 return NULL;
  762             }
  763             buf_ptr++;
  764             cpool_current_index++;
  765         }
  766         SLJIT_ASSERT(!first_patch);
  767     }
  768 #endif
  769 
  770     jump = compiler->jumps;
  771     while (jump) {
  772         buf_ptr = (sljit_uw *)jump->addr;
  773 
  774         if (jump->flags & PATCH_B) {
  775             addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
  776             if (!(jump->flags & JUMP_ADDR)) {
  777                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  778                 SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000);
  779                 *buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff;
  780             }
  781             else {
  782                 SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000);
  783                 *buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff;
  784             }
  785         }
  786         else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
  787 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  788             jump->addr = (sljit_uw)code_ptr;
  789             code_ptr[0] = (sljit_uw)buf_ptr;
  790             code_ptr[1] = *buf_ptr;
  791             inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  792             code_ptr += 2;
  793 #else
  794             inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  795 #endif
  796         }
  797         else {
  798 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  799             if (jump->flags & IS_BL)
  800                 buf_ptr--;
  801             if (*buf_ptr & (1 << 23))
  802                 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  803             else
  804                 buf_ptr += 1;
  805             *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
  806 #else
  807             inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  808 #endif
  809         }
  810         jump = jump->next;
  811     }
  812 
  813 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  814     const_ = compiler->consts;
  815     while (const_) {
  816         buf_ptr = (sljit_uw*)const_->addr;
  817         const_->addr = (sljit_uw)code_ptr;
  818 
  819         code_ptr[0] = (sljit_uw)buf_ptr;
  820         code_ptr[1] = *buf_ptr;
  821         if (*buf_ptr & (1 << 23))
  822             buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  823         else
  824             buf_ptr += 1;
  825         /* Set the value again (can be a simple constant). */
  826         inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
  827         code_ptr += 2;
  828 
  829         const_ = const_->next;
  830     }
  831 #endif
  832 
  833     put_label = compiler->put_labels;
  834     while (put_label) {
  835         addr = put_label->label->addr;
  836         buf_ptr = (sljit_uw*)put_label->addr;
  837 
  838 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  839         SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
  840         buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
  841 #else
  842         SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
  843         buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
  844         buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
  845 #endif
  846         put_label = put_label->next;
  847     }
  848 
  849     SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
  850 
  851     compiler->error = SLJIT_ERR_COMPILED;
  852     compiler->executable_offset = executable_offset;
  853     compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
  854 
  855     code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
  856     code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  857 
  858     SLJIT_CACHE_FLUSH(code, code_ptr);
  859     return code;
  860 }
  861 
  862 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  863 {
  864     switch (feature_type) {
  865     case SLJIT_HAS_FPU:
  866 #ifdef SLJIT_IS_FPU_AVAILABLE
  867         return SLJIT_IS_FPU_AVAILABLE;
  868 #else
  869         /* Available by default. */
  870         return 1;
  871 #endif
  872 
  873     case SLJIT_HAS_CLZ:
  874     case SLJIT_HAS_CMOV:
  875         return 1;
  876 
  877     default:
  878         return 0;
  879     }
  880 }
  881 
  882 /* --------------------------------------------------------------------- */
  883 /*  Entry, exit                                                          */
  884 /* --------------------------------------------------------------------- */
  885 
  886 /* Creates an index in data_transfer_insts array. */
  887 #define WORD_SIZE   0x00
  888 #define BYTE_SIZE   0x01
  889 #define HALF_SIZE   0x02
  890 #define PRELOAD     0x03
  891 #define SIGNED      0x04
  892 #define LOAD_DATA   0x08
  893 
  894 /* Flag bits for emit_op. */
  895 #define ALLOW_IMM   0x10
  896 #define ALLOW_INV_IMM   0x20
  897 #define ALLOW_ANY_IMM   (ALLOW_IMM | ALLOW_INV_IMM)
  898 
  899 /* s/l - store/load (1 bit)
  900    u/s - signed/unsigned (1 bit)
  901    w/b/h/N - word/byte/half/NOT allowed (2 bit)
  902    Storing signed and unsigned values are the same operations. */
  903 
  904 static const sljit_uw data_transfer_insts[16] = {
  905 /* s u w */ 0xe5000000 /* str */,
  906 /* s u b */ 0xe5400000 /* strb */,
  907 /* s u h */ 0xe10000b0 /* strh */,
  908 /* s u N */ 0x00000000 /* not allowed */,
  909 /* s s w */ 0xe5000000 /* str */,
  910 /* s s b */ 0xe5400000 /* strb */,
  911 /* s s h */ 0xe10000b0 /* strh */,
  912 /* s s N */ 0x00000000 /* not allowed */,
  913 
  914 /* l u w */ 0xe5100000 /* ldr */,
  915 /* l u b */ 0xe5500000 /* ldrb */,
  916 /* l u h */ 0xe11000b0 /* ldrh */,
  917 /* l u p */ 0xf5500000 /* preload */,
  918 /* l s w */ 0xe5100000 /* ldr */,
  919 /* l s b */ 0xe11000d0 /* ldrsb */,
  920 /* l s h */ 0xe11000f0 /* ldrsh */,
  921 /* l s N */ 0x00000000 /* not allowed */,
  922 };
  923 
  924 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
  925     (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg))
  926 
  927 /* Normal ldr/str instruction.
  928    Type2: ldrsb, ldrh, ldrsh */
  929 #define IS_TYPE1_TRANSFER(type) \
  930     (data_transfer_insts[(type) & 0xf] & 0x04000000)
  931 #define TYPE2_TRANSFER_IMM(imm) \
  932     (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
  933 
  934 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
  935     sljit_s32 dst, sljit_sw dstw,
  936     sljit_s32 src1, sljit_sw src1w,
  937     sljit_s32 src2, sljit_sw src2w);
  938 
  939 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  940     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  941     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  942 {
  943     sljit_s32 args, size, i, tmp;
  944     sljit_uw push;
  945 
  946     CHECK_ERROR();
  947     CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  948     set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  949 
  950     /* Push saved registers, temporary registers
  951        stmdb sp!, {..., lr} */
  952     push = PUSH | (1 << 14);
  953 
  954     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  955     for (i = SLJIT_S0; i >= tmp; i--)
  956         push |= 1 << reg_map[i];
  957 
  958     for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
  959         push |= 1 << reg_map[i];
  960 
  961     FAIL_IF(push_inst(compiler, push));
  962 
  963     /* Stack must be aligned to 8 bytes: */
  964     size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  965     local_size = ((size + local_size + 7) & ~7) - size;
  966     compiler->local_size = local_size;
  967     if (local_size > 0)
  968         FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  969 
  970     args = get_arg_count(arg_types);
  971 
  972     if (args >= 1)
  973         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0)));
  974     if (args >= 2)
  975         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1)));
  976     if (args >= 3)
  977         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2)));
  978 
  979     return SLJIT_SUCCESS;
  980 }
  981 
  982 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  983     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  984     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  985 {
  986     sljit_s32 size;
  987 
  988     CHECK_ERROR();
  989     CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  990     set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  991 
  992     size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  993     compiler->local_size = ((size + local_size + 7) & ~7) - size;
  994     return SLJIT_SUCCESS;
  995 }
  996 
  997 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  998 {
  999     sljit_s32 i, tmp;
 1000     sljit_uw pop;
 1001 
 1002     CHECK_ERROR();
 1003     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
 1004 
 1005     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
 1006 
 1007     if (compiler->local_size > 0)
 1008         FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
 1009 
 1010     /* Push saved registers, temporary registers
 1011        ldmia sp!, {..., pc} */
 1012     pop = POP | (1 << 15);
 1013 
 1014     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
 1015     for (i = SLJIT_S0; i >= tmp; i--)
 1016         pop |= 1 << reg_map[i];
 1017 
 1018     for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
 1019         pop |= 1 << reg_map[i];
 1020 
 1021     return push_inst(compiler, pop);
 1022 }
 1023 
 1024 /* --------------------------------------------------------------------- */
 1025 /*  Operators                                                            */
 1026 /* --------------------------------------------------------------------- */
 1027 
 1028 /* flags: */
 1029   /* Arguments are swapped. */
 1030 #define ARGS_SWAPPED    0x01
 1031   /* Inverted immediate. */
 1032 #define INV_IMM     0x02
 1033   /* Source and destination is register. */
 1034 #define MOVE_REG_CONV   0x04
 1035   /* Unused return value. */
 1036 #define UNUSED_RETURN   0x08
 1037 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
 1038 #define SET_FLAGS   (1 << 20)
 1039 /* dst: reg
 1040    src1: reg
 1041    src2: reg or imm (if allowed)
 1042    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
 1043 #define SRC2_IMM    (1 << 25)
 1044 
 1045 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \
 1046     SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
 1047     if (compiler->shift_imm != 0x20) { \
 1048         SLJIT_ASSERT(src1 == TMP_REG1); \
 1049         SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
 1050         \
 1051         if (compiler->shift_imm != 0) \
 1052             return push_inst(compiler, MOV | (flags & SET_FLAGS) | \
 1053                 RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \
 1054         return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \
 1055     } \
 1056     return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \
 1057         (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1));
 1058 
 1059 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 1060     sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 1061 {
 1062     switch (GET_OPCODE(op)) {
 1063     case SLJIT_MOV:
 1064         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1065         if (dst != src2) {
 1066             if (src2 & SRC2_IMM) {
 1067                 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1068             }
 1069             return push_inst(compiler, MOV | RD(dst) | RM(src2));
 1070         }
 1071         return SLJIT_SUCCESS;
 1072 
 1073     case SLJIT_MOV_U8:
 1074     case SLJIT_MOV_S8:
 1075         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1076         if (flags & MOVE_REG_CONV) {
 1077 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1078             if (op == SLJIT_MOV_U8)
 1079                 return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff);
 1080             FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2)));
 1081             return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst));
 1082 #else
 1083             return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
 1084 #endif
 1085         }
 1086         else if (dst != src2) {
 1087             SLJIT_ASSERT(src2 & SRC2_IMM);
 1088             return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1089         }
 1090         return SLJIT_SUCCESS;
 1091 
 1092     case SLJIT_MOV_U16:
 1093     case SLJIT_MOV_S16:
 1094         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1095         if (flags & MOVE_REG_CONV) {
 1096 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1097             FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2)));
 1098             return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst));
 1099 #else
 1100             return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
 1101 #endif
 1102         }
 1103         else if (dst != src2) {
 1104             SLJIT_ASSERT(src2 & SRC2_IMM);
 1105             return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1106         }
 1107         return SLJIT_SUCCESS;
 1108 
 1109     case SLJIT_NOT:
 1110         if (src2 & SRC2_IMM) {
 1111             return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2);
 1112         }
 1113         return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
 1114 
 1115     case SLJIT_CLZ:
 1116         SLJIT_ASSERT(!(flags & INV_IMM));
 1117         SLJIT_ASSERT(!(src2 & SRC2_IMM));
 1118         FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
 1119         return SLJIT_SUCCESS;
 1120 
 1121     case SLJIT_ADD:
 1122         SLJIT_ASSERT(!(flags & INV_IMM));
 1123         if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
 1124             return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1125         return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1126 
 1127     case SLJIT_ADDC:
 1128         SLJIT_ASSERT(!(flags & INV_IMM));
 1129         return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1130 
 1131     case SLJIT_SUB:
 1132         SLJIT_ASSERT(!(flags & INV_IMM));
 1133         if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
 1134             return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1135         return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
 1136             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1137 
 1138     case SLJIT_SUBC:
 1139         SLJIT_ASSERT(!(flags & INV_IMM));
 1140         return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
 1141             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1142 
 1143     case SLJIT_MUL:
 1144         SLJIT_ASSERT(!(flags & INV_IMM));
 1145         SLJIT_ASSERT(!(src2 & SRC2_IMM));
 1146 
 1147         if (!HAS_FLAGS(op))
 1148             return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]);
 1149 
 1150         FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));
 1151 
 1152         /* cmp TMP_REG1, dst asr #31. */
 1153         return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
 1154 
 1155     case SLJIT_AND:
 1156         return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
 1157             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1158 
 1159     case SLJIT_OR:
 1160         SLJIT_ASSERT(!(flags & INV_IMM));
 1161         return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1162 
 1163     case SLJIT_XOR:
 1164         SLJIT_ASSERT(!(flags & INV_IMM));
 1165         return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1166 
 1167     case SLJIT_SHL:
 1168         EMIT_SHIFT_INS_AND_RETURN(0);
 1169 
 1170     case SLJIT_LSHR:
 1171         EMIT_SHIFT_INS_AND_RETURN(1);
 1172 
 1173     case SLJIT_ASHR:
 1174         EMIT_SHIFT_INS_AND_RETURN(2);
 1175     }
 1176 
 1177     SLJIT_UNREACHABLE();
 1178     return SLJIT_SUCCESS;
 1179 }
 1180 
 1181 #undef EMIT_SHIFT_INS_AND_RETURN
 1182 
 1183 /* Tests whether the immediate can be stored in the 12 bit imm field.
 1184    Returns with 0 if not possible. */
 1185 static sljit_uw get_imm(sljit_uw imm)
 1186 {
 1187     sljit_s32 rol;
 1188 
 1189     if (imm <= 0xff)
 1190         return SRC2_IMM | imm;
 1191 
 1192     if (!(imm & 0xff000000)) {
 1193         imm <<= 8;
 1194         rol = 8;
 1195     }
 1196     else {
 1197         imm = (imm << 24) | (imm >> 8);
 1198         rol = 0;
 1199     }
 1200 
 1201     if (!(imm & 0xff000000)) {
 1202         imm <<= 8;
 1203         rol += 4;
 1204     }
 1205 
 1206     if (!(imm & 0xf0000000)) {
 1207         imm <<= 4;
 1208         rol += 2;
 1209     }
 1210 
 1211     if (!(imm & 0xc0000000)) {
 1212         imm <<= 2;
 1213         rol += 1;
 1214     }
 1215 
 1216     if (!(imm & 0x00ffffff))
 1217         return SRC2_IMM | (imm >> 24) | (rol << 8);
 1218     else
 1219         return 0;
 1220 }
 1221 
 1222 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1223 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive)
 1224 {
 1225     sljit_uw mask;
 1226     sljit_uw imm1;
 1227     sljit_uw imm2;
 1228     sljit_s32 rol;
 1229 
 1230     /* Step1: Search a zero byte (8 continous zero bit). */
 1231     mask = 0xff000000;
 1232     rol = 8;
 1233     while(1) {
 1234         if (!(imm & mask)) {
 1235             /* Rol imm by rol. */
 1236             imm = (imm << rol) | (imm >> (32 - rol));
 1237             /* Calculate arm rol. */
 1238             rol = 4 + (rol >> 1);
 1239             break;
 1240         }
 1241         rol += 2;
 1242         mask >>= 2;
 1243         if (mask & 0x3) {
 1244             /* rol by 8. */
 1245             imm = (imm << 8) | (imm >> 24);
 1246             mask = 0xff00;
 1247             rol = 24;
 1248             while (1) {
 1249                 if (!(imm & mask)) {
 1250                     /* Rol imm by rol. */
 1251                     imm = (imm << rol) | (imm >> (32 - rol));
 1252                     /* Calculate arm rol. */
 1253                     rol = (rol >> 1) - 8;
 1254                     break;
 1255                 }
 1256                 rol += 2;
 1257                 mask >>= 2;
 1258                 if (mask & 0x3)
 1259                     return 0;
 1260             }
 1261             break;
 1262         }
 1263     }
 1264 
 1265     /* The low 8 bit must be zero. */
 1266     SLJIT_ASSERT(!(imm & 0xff));
 1267 
 1268     if (!(imm & 0xff000000)) {
 1269         imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
 1270         imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
 1271     }
 1272     else if (imm & 0xc0000000) {
 1273         imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
 1274         imm <<= 8;
 1275         rol += 4;
 1276 
 1277         if (!(imm & 0xff000000)) {
 1278             imm <<= 8;
 1279             rol += 4;
 1280         }
 1281 
 1282         if (!(imm & 0xf0000000)) {
 1283             imm <<= 4;
 1284             rol += 2;
 1285         }
 1286 
 1287         if (!(imm & 0xc0000000)) {
 1288             imm <<= 2;
 1289             rol += 1;
 1290         }
 1291 
 1292         if (!(imm & 0x00ffffff))
 1293             imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
 1294         else
 1295             return 0;
 1296     }
 1297     else {
 1298         if (!(imm & 0xf0000000)) {
 1299             imm <<= 4;
 1300             rol += 2;
 1301         }
 1302 
 1303         if (!(imm & 0xc0000000)) {
 1304             imm <<= 2;
 1305             rol += 1;
 1306         }
 1307 
 1308         imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
 1309         imm <<= 8;
 1310         rol += 4;
 1311 
 1312         if (!(imm & 0xf0000000)) {
 1313             imm <<= 4;
 1314             rol += 2;
 1315         }
 1316 
 1317         if (!(imm & 0xc0000000)) {
 1318             imm <<= 2;
 1319             rol += 1;
 1320         }
 1321 
 1322         if (!(imm & 0x00ffffff))
 1323             imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
 1324         else
 1325             return 0;
 1326     }
 1327 
 1328     FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1));
 1329     FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2));
 1330     return 1;
 1331 }
 1332 #endif
 1333 
 1334 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
 1335 {
 1336     sljit_uw tmp;
 1337 
 1338 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 1339     if (!(imm & ~0xffff))
 1340         return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
 1341 #endif
 1342 
 1343     /* Create imm by 1 inst. */
 1344     tmp = get_imm(imm);
 1345     if (tmp)
 1346         return push_inst(compiler, MOV | RD(reg) | tmp);
 1347 
 1348     tmp = get_imm(~imm);
 1349     if (tmp)
 1350         return push_inst(compiler, MVN | RD(reg) | tmp);
 1351 
 1352 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1353     /* Create imm by 2 inst. */
 1354     FAIL_IF(generate_int(compiler, reg, imm, 1));
 1355     FAIL_IF(generate_int(compiler, reg, ~imm, 0));
 1356 
 1357     /* Load integer. */
 1358     return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
 1359 #else
 1360     FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
 1361     if (imm <= 0xffff)
 1362         return SLJIT_SUCCESS;
 1363     return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
 1364 #endif
 1365 }
 1366 
 1367 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
 1368     sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
 1369 {
 1370     sljit_uw imm, offset_reg;
 1371     sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);
 1372 
 1373     SLJIT_ASSERT (arg & SLJIT_MEM);
 1374     SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
 1375 
 1376     if ((arg & REG_MASK) == SLJIT_UNUSED) {
 1377         if (is_type1_transfer) {
 1378             FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff));
 1379             argw &= 0xfff;
 1380         }
 1381         else {
 1382             FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff));
 1383             argw &= 0xff;
 1384         }
 1385 
 1386         return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg,
 1387             is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
 1388     }
 1389 
 1390     if (arg & OFFS_REG_MASK) {
 1391         offset_reg = OFFS_REG(arg);
 1392         arg &= REG_MASK;
 1393         argw &= 0x3;
 1394 
 1395         if (argw != 0 && !is_type1_transfer) {
 1396             FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7)));
 1397             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
 1398         }
 1399 
 1400         /* Bit 25: RM is offset. */
 1401         return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
 1402             RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
 1403     }
 1404 
 1405     arg &= REG_MASK;
 1406 
 1407     if (is_type1_transfer) {
 1408         if (argw > 0xfff) {
 1409             imm = get_imm(argw & ~0xfff);
 1410             if (imm) {
 1411                 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
 1412                 argw = argw & 0xfff;
 1413                 arg = tmp_reg;
 1414             }
 1415         }
 1416         else if (argw < -0xfff) {
 1417             imm = get_imm(-argw & ~0xfff);
 1418             if (imm) {
 1419                 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
 1420                 argw = -(-argw & 0xfff);
 1421                 arg = tmp_reg;
 1422             }
 1423         }
 1424 
 1425         if (argw >= 0 && argw <= 0xfff)
 1426             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
 1427 
 1428         if (argw < 0 && argw >= -0xfff)
 1429             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, -argw));
 1430     }
 1431     else {
 1432         if (argw > 0xff) {
 1433             imm = get_imm(argw & ~0xff);
 1434             if (imm) {
 1435                 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
 1436                 argw = argw & 0xff;
 1437                 arg = tmp_reg;
 1438             }
 1439         }
 1440         else if (argw < -0xff) {
 1441             imm = get_imm(-argw & ~0xff);
 1442             if (imm) {
 1443                 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
 1444                 argw = -(-argw & 0xff);
 1445                 arg = tmp_reg;
 1446             }
 1447         }
 1448 
 1449         if (argw >= 0 && argw <= 0xff)
 1450             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, TYPE2_TRANSFER_IMM(argw)));
 1451 
 1452         if (argw < 0 && argw >= -0xff) {
 1453             argw = -argw;
 1454             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, TYPE2_TRANSFER_IMM(argw)));
 1455         }
 1456     }
 1457 
 1458     FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 1459     return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
 1460         RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
 1461 }
 1462 
 1463 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
 1464     sljit_s32 dst, sljit_sw dstw,
 1465     sljit_s32 src1, sljit_sw src1w,
 1466     sljit_s32 src2, sljit_sw src2w)
 1467 {
 1468     /* src1 is reg or TMP_REG1
 1469        src2 is reg, TMP_REG2, or imm
 1470        result goes to TMP_REG2, so put result can use TMP_REG1. */
 1471 
 1472     /* We prefers register and simple consts. */
 1473     sljit_s32 dst_reg;
 1474     sljit_s32 src1_reg;
 1475     sljit_s32 src2_reg;
 1476     sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 1477 
 1478     /* Destination check. */
 1479     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
 1480         flags |= UNUSED_RETURN;
 1481 
 1482     SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
 1483 
 1484     src2_reg = 0;
 1485 
 1486     do {
 1487         if (!(inp_flags & ALLOW_IMM))
 1488             break;
 1489 
 1490         if (src2 & SLJIT_IMM) {
 1491             src2_reg = get_imm(src2w);
 1492             if (src2_reg)
 1493                 break;
 1494             if (inp_flags & ALLOW_INV_IMM) {
 1495                 src2_reg = get_imm(~src2w);
 1496                 if (src2_reg) {
 1497                     flags |= INV_IMM;
 1498                     break;
 1499                 }
 1500             }
 1501             if (GET_OPCODE(op) == SLJIT_ADD) {
 1502                 src2_reg = get_imm(-src2w);
 1503                 if (src2_reg) {
 1504                     op = SLJIT_SUB | GET_ALL_FLAGS(op);
 1505                     break;
 1506                 }
 1507             }
 1508             if (GET_OPCODE(op) == SLJIT_SUB) {
 1509                 src2_reg = get_imm(-src2w);
 1510                 if (src2_reg) {
 1511                     op = SLJIT_ADD | GET_ALL_FLAGS(op);
 1512                     break;
 1513                 }
 1514             }
 1515         }
 1516 
 1517         if (src1 & SLJIT_IMM) {
 1518             src2_reg = get_imm(src1w);
 1519             if (src2_reg) {
 1520                 flags |= ARGS_SWAPPED;
 1521                 src1 = src2;
 1522                 src1w = src2w;
 1523                 break;
 1524             }
 1525             if (inp_flags & ALLOW_INV_IMM) {
 1526                 src2_reg = get_imm(~src1w);
 1527                 if (src2_reg) {
 1528                     flags |= ARGS_SWAPPED | INV_IMM;
 1529                     src1 = src2;
 1530                     src1w = src2w;
 1531                     break;
 1532                 }
 1533             }
 1534             if (GET_OPCODE(op) == SLJIT_ADD) {
 1535                 src2_reg = get_imm(-src1w);
 1536                 if (src2_reg) {
 1537                     /* Note: add is commutative operation. */
 1538                     src1 = src2;
 1539                     src1w = src2w;
 1540                     op = SLJIT_SUB | GET_ALL_FLAGS(op);
 1541                     break;
 1542                 }
 1543             }
 1544         }
 1545     } while(0);
 1546 
 1547     /* Source 1. */
 1548     if (FAST_IS_REG(src1))
 1549         src1_reg = src1;
 1550     else if (src1 & SLJIT_MEM) {
 1551         FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
 1552         src1_reg = TMP_REG1;
 1553     }
 1554     else {
 1555         FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
 1556         src1_reg = TMP_REG1;
 1557     }
 1558 
 1559     /* Destination. */
 1560     dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 1561 
 1562     if (op <= SLJIT_MOV_P) {
 1563         if (dst & SLJIT_MEM) {
 1564             if (inp_flags & BYTE_SIZE)
 1565                 inp_flags &= ~SIGNED;
 1566 
 1567             if (FAST_IS_REG(src2))
 1568                 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
 1569         }
 1570 
 1571         if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
 1572             flags |= MOVE_REG_CONV;
 1573     }
 1574 
 1575     /* Source 2. */
 1576     if (src2_reg == 0) {
 1577         src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
 1578 
 1579         if (FAST_IS_REG(src2))
 1580             src2_reg = src2;
 1581         else if (src2 & SLJIT_MEM)
 1582             FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
 1583         else
 1584             FAIL_IF(load_immediate(compiler, src2_reg, src2w));
 1585     }
 1586 
 1587     FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg));
 1588 
 1589     if (!(dst & SLJIT_MEM))
 1590         return SLJIT_SUCCESS;
 1591 
 1592     return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
 1593 }
 1594 
 1595 #ifdef __cplusplus
 1596 extern "C" {
 1597 #endif
 1598 
 1599 #if defined(__GNUC__)
 1600 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
 1601 extern int __aeabi_idivmod(int numerator, int denominator);
 1602 #else
 1603 #error "Software divmod functions are needed"
 1604 #endif
 1605 
 1606 #ifdef __cplusplus
 1607 }
 1608 #endif
 1609 
 1610 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 1611 {
 1612     sljit_sw saved_reg_list[3];
 1613     sljit_sw saved_reg_count;
 1614 
 1615     CHECK_ERROR();
 1616     CHECK(check_sljit_emit_op0(compiler, op));
 1617 
 1618     op = GET_OPCODE(op);
 1619     switch (op) {
 1620     case SLJIT_BREAKPOINT:
 1621         FAIL_IF(push_inst(compiler, BKPT));
 1622         break;
 1623     case SLJIT_NOP:
 1624         FAIL_IF(push_inst(compiler, NOP));
 1625         break;
 1626     case SLJIT_LMUL_UW:
 1627     case SLJIT_LMUL_SW:
 1628         return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
 1629             | (reg_map[SLJIT_R1] << 16)
 1630             | (reg_map[SLJIT_R0] << 12)
 1631             | (reg_map[SLJIT_R0] << 8)
 1632             | reg_map[SLJIT_R1]);
 1633     case SLJIT_DIVMOD_UW:
 1634     case SLJIT_DIVMOD_SW:
 1635     case SLJIT_DIV_UW:
 1636     case SLJIT_DIV_SW:
 1637         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
 1638         SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
 1639 
 1640         saved_reg_count = 0;
 1641         if (compiler->scratches >= 4)
 1642             saved_reg_list[saved_reg_count++] = 3;
 1643         if (compiler->scratches >= 3)
 1644             saved_reg_list[saved_reg_count++] = 2;
 1645         if (op >= SLJIT_DIV_UW)
 1646             saved_reg_list[saved_reg_count++] = 1;
 1647 
 1648         if (saved_reg_count > 0) {
 1649             FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8)
 1650                         | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
 1651             if (saved_reg_count >= 2) {
 1652                 SLJIT_ASSERT(saved_reg_list[1] < 8);
 1653                 FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
 1654             }
 1655             if (saved_reg_count >= 3) {
 1656                 SLJIT_ASSERT(saved_reg_list[2] < 8);
 1657                 FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
 1658             }
 1659         }
 1660 
 1661 #if defined(__GNUC__)
 1662         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
 1663             ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
 1664 #else
 1665 #error "Software divmod functions are needed"
 1666 #endif
 1667 
 1668         if (saved_reg_count > 0) {
 1669             if (saved_reg_count >= 3) {
 1670                 SLJIT_ASSERT(saved_reg_list[2] < 8);
 1671                 FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
 1672             }
 1673             if (saved_reg_count >= 2) {
 1674                 SLJIT_ASSERT(saved_reg_list[1] < 8);
 1675                 FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
 1676             }
 1677             return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8)
 1678                         | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
 1679         }
 1680         return SLJIT_SUCCESS;
 1681     }
 1682 
 1683     return SLJIT_SUCCESS;
 1684 }
 1685 
 1686 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 1687     sljit_s32 dst, sljit_sw dstw,
 1688     sljit_s32 src, sljit_sw srcw)
 1689 {
 1690     CHECK_ERROR();
 1691     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
 1692     ADJUST_LOCAL_OFFSET(dst, dstw);
 1693     ADJUST_LOCAL_OFFSET(src, srcw);
 1694 
 1695     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
 1696 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 1697         if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
 1698             return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
 1699 #endif
 1700         return SLJIT_SUCCESS;
 1701     }
 1702 
 1703     switch (GET_OPCODE(op)) {
 1704     case SLJIT_MOV:
 1705     case SLJIT_MOV_U32:
 1706     case SLJIT_MOV_S32:
 1707     case SLJIT_MOV_P:
 1708         return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
 1709 
 1710     case SLJIT_MOV_U8:
 1711         return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
 1712 
 1713     case SLJIT_MOV_S8:
 1714         return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
 1715 
 1716     case SLJIT_MOV_U16:
 1717         return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
 1718 
 1719     case SLJIT_MOV_S16:
 1720         return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
 1721 
 1722     case SLJIT_NOT:
 1723         return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
 1724 
 1725     case SLJIT_NEG:
 1726 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 1727             || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 1728         compiler->skip_checks = 1;
 1729 #endif
 1730         return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
 1731 
 1732     case SLJIT_CLZ:
 1733         return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
 1734     }
 1735 
 1736     return SLJIT_SUCCESS;
 1737 }
 1738 
 1739 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 1740     sljit_s32 dst, sljit_sw dstw,
 1741     sljit_s32 src1, sljit_sw src1w,
 1742     sljit_s32 src2, sljit_sw src2w)
 1743 {
 1744     CHECK_ERROR();
 1745     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1746     ADJUST_LOCAL_OFFSET(dst, dstw);
 1747     ADJUST_LOCAL_OFFSET(src1, src1w);
 1748     ADJUST_LOCAL_OFFSET(src2, src2w);
 1749 
 1750     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
 1751         return SLJIT_SUCCESS;
 1752 
 1753     switch (GET_OPCODE(op)) {
 1754     case SLJIT_ADD:
 1755     case SLJIT_ADDC:
 1756     case SLJIT_SUB:
 1757     case SLJIT_SUBC:
 1758     case SLJIT_OR:
 1759     case SLJIT_XOR:
 1760         return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
 1761 
 1762     case SLJIT_MUL:
 1763         return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
 1764 
 1765     case SLJIT_AND:
 1766         return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
 1767 
 1768     case SLJIT_SHL:
 1769     case SLJIT_LSHR:
 1770     case SLJIT_ASHR:
 1771         if (src2 & SLJIT_IMM) {
 1772             compiler->shift_imm = src2w & 0x1f;
 1773             return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
 1774         }
 1775         else {
 1776             compiler->shift_imm = 0x20;
 1777             return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
 1778         }
 1779     }
 1780 
 1781     return SLJIT_SUCCESS;
 1782 }
 1783 
 1784 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 1785 {
 1786     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 1787     return reg_map[reg];
 1788 }
 1789 
 1790 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 1791 {
 1792     CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
 1793     return (freg_map[reg] << 1);
 1794 }
 1795 
 1796 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
 1797     void *instruction, sljit_s32 size)
 1798 {
 1799     CHECK_ERROR();
 1800     CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 1801 
 1802     return push_inst(compiler, *(sljit_uw*)instruction);
 1803 }
 1804 
 1805 /* --------------------------------------------------------------------- */
 1806 /*  Floating point operators                                             */
 1807 /* --------------------------------------------------------------------- */
 1808 
 1809 
 1810 #define FPU_LOAD (1 << 20)
 1811 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
 1812     ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs))
 1813 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
 1814     ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16))
 1815 
 1816 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 1817 {
 1818     sljit_uw imm;
 1819     sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
 1820 
 1821     SLJIT_ASSERT(arg & SLJIT_MEM);
 1822     arg &= ~SLJIT_MEM;
 1823 
 1824     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
 1825         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
 1826         arg = TMP_REG2;
 1827         argw = 0;
 1828     }
 1829 
 1830     /* Fast loads and stores. */
 1831     if (arg) {
 1832         if (!(argw & ~0x3fc))
 1833             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
 1834         if (!(-argw & ~0x3fc))
 1835             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
 1836 
 1837         imm = get_imm(argw & ~0x3fc);
 1838         if (imm) {
 1839             FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
 1840             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
 1841         }
 1842         imm = get_imm(-argw & ~0x3fc);
 1843         if (imm) {
 1844             argw = -argw;
 1845             FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
 1846             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
 1847         }
 1848     }
 1849 
 1850     if (arg) {
 1851         FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
 1852         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
 1853     }
 1854     else
 1855         FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
 1856 
 1857     return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
 1858 }
 1859 
 1860 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
 1861     sljit_s32 dst, sljit_sw dstw,
 1862     sljit_s32 src, sljit_sw srcw)
 1863 {
 1864     op ^= SLJIT_F32_OP;
 1865 
 1866     if (src & SLJIT_MEM) {
 1867         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
 1868         src = TMP_FREG1;
 1869     }
 1870 
 1871     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
 1872 
 1873     if (FAST_IS_REG(dst))
 1874         return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16));
 1875 
 1876     /* Store the integer value from a VFP register. */
 1877     return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
 1878 }
 1879 
 1880 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
 1881     sljit_s32 dst, sljit_sw dstw,
 1882     sljit_s32 src, sljit_sw srcw)
 1883 {
 1884     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1885 
 1886     op ^= SLJIT_F32_OP;
 1887 
 1888     if (FAST_IS_REG(src))
 1889         FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16)));
 1890     else if (src & SLJIT_MEM) {
 1891         /* Load the integer value into a VFP register. */
 1892         FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
 1893     }
 1894     else {
 1895         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 1896         FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16)));
 1897     }
 1898 
 1899     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0)));
 1900 
 1901     if (dst & SLJIT_MEM)
 1902         return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw);
 1903     return SLJIT_SUCCESS;
 1904 }
 1905 
 1906 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
 1907     sljit_s32 src1, sljit_sw src1w,
 1908     sljit_s32 src2, sljit_sw src2w)
 1909 {
 1910     op ^= SLJIT_F32_OP;
 1911 
 1912     if (src1 & SLJIT_MEM) {
 1913         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
 1914         src1 = TMP_FREG1;
 1915     }
 1916 
 1917     if (src2 & SLJIT_MEM) {
 1918         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
 1919         src2 = TMP_FREG2;
 1920     }
 1921 
 1922     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0)));
 1923     return push_inst(compiler, VMRS);
 1924 }
 1925 
 1926 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 1927     sljit_s32 dst, sljit_sw dstw,
 1928     sljit_s32 src, sljit_sw srcw)
 1929 {
 1930     sljit_s32 dst_r;
 1931 
 1932     CHECK_ERROR();
 1933 
 1934     SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
 1935     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 1936 
 1937     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1938 
 1939     if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
 1940         op ^= SLJIT_F32_OP;
 1941 
 1942     if (src & SLJIT_MEM) {
 1943         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw));
 1944         src = dst_r;
 1945     }
 1946 
 1947     switch (GET_OPCODE(op)) {
 1948     case SLJIT_MOV_F64:
 1949         if (src != dst_r) {
 1950             if (dst_r != TMP_FREG1)
 1951                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1952             else
 1953                 dst_r = src;
 1954         }
 1955         break;
 1956     case SLJIT_NEG_F64:
 1957         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1958         break;
 1959     case SLJIT_ABS_F64:
 1960         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1961         break;
 1962     case SLJIT_CONV_F64_FROM_F32:
 1963         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1964         op ^= SLJIT_F32_OP;
 1965         break;
 1966     }
 1967 
 1968     if (dst & SLJIT_MEM)
 1969         return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw);
 1970     return SLJIT_SUCCESS;
 1971 }
 1972 
 1973 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 1974     sljit_s32 dst, sljit_sw dstw,
 1975     sljit_s32 src1, sljit_sw src1w,
 1976     sljit_s32 src2, sljit_sw src2w)
 1977 {
 1978     sljit_s32 dst_r;
 1979 
 1980     CHECK_ERROR();
 1981     CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1982     ADJUST_LOCAL_OFFSET(dst, dstw);
 1983     ADJUST_LOCAL_OFFSET(src1, src1w);
 1984     ADJUST_LOCAL_OFFSET(src2, src2w);
 1985 
 1986     op ^= SLJIT_F32_OP;
 1987 
 1988     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1989 
 1990     if (src2 & SLJIT_MEM) {
 1991         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
 1992         src2 = TMP_FREG2;
 1993     }
 1994 
 1995     if (src1 & SLJIT_MEM) {
 1996         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
 1997         src1 = TMP_FREG1;
 1998     }
 1999 
 2000     switch (GET_OPCODE(op)) {
 2001     case SLJIT_ADD_F64:
 2002         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 2003         break;
 2004 
 2005     case SLJIT_SUB_F64:
 2006         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 2007         break;
 2008 
 2009     case SLJIT_MUL_F64:
 2010         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 2011         break;
 2012 
 2013     case SLJIT_DIV_F64:
 2014         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 2015         break;
 2016     }
 2017 
 2018     if (dst_r == TMP_FREG1)
 2019         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw));
 2020 
 2021     return SLJIT_SUCCESS;
 2022 }
 2023 
 2024 #undef FPU_LOAD
 2025 #undef EMIT_FPU_DATA_TRANSFER
 2026 
 2027 /* --------------------------------------------------------------------- */
 2028 /*  Other instructions                                                   */
 2029 /* --------------------------------------------------------------------- */
 2030 
 2031 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 2032 {
 2033     CHECK_ERROR();
 2034     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
 2035     ADJUST_LOCAL_OFFSET(dst, dstw);
 2036 
 2037     SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
 2038 
 2039     if (FAST_IS_REG(dst))
 2040         return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
 2041 
 2042     /* Memory. */
 2043     return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
 2044 }
 2045 
 2046 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
 2047 {
 2048     CHECK_ERROR();
 2049     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
 2050     ADJUST_LOCAL_OFFSET(src, srcw);
 2051 
 2052     SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
 2053 
 2054     if (FAST_IS_REG(src))
 2055         FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
 2056     else
 2057         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
 2058 
 2059     return push_inst(compiler, BX | RM(TMP_REG2));
 2060 }
 2061 
 2062 /* --------------------------------------------------------------------- */
 2063 /*  Conditional instructions                                             */
 2064 /* --------------------------------------------------------------------- */
 2065 
 2066 static sljit_uw get_cc(sljit_s32 type)
 2067 {
 2068     switch (type) {
 2069     case SLJIT_EQUAL:
 2070     case SLJIT_MUL_NOT_OVERFLOW:
 2071     case SLJIT_EQUAL_F64:
 2072         return 0x00000000;
 2073 
 2074     case SLJIT_NOT_EQUAL:
 2075     case SLJIT_MUL_OVERFLOW:
 2076     case SLJIT_NOT_EQUAL_F64:
 2077         return 0x10000000;
 2078 
 2079     case SLJIT_LESS:
 2080     case SLJIT_LESS_F64:
 2081         return 0x30000000;
 2082 
 2083     case SLJIT_GREATER_EQUAL:
 2084     case SLJIT_GREATER_EQUAL_F64:
 2085         return 0x20000000;
 2086 
 2087     case SLJIT_GREATER:
 2088     case SLJIT_GREATER_F64:
 2089         return 0x80000000;
 2090 
 2091     case SLJIT_LESS_EQUAL:
 2092     case SLJIT_LESS_EQUAL_F64:
 2093         return 0x90000000;
 2094 
 2095     case SLJIT_SIG_LESS:
 2096         return 0xb0000000;
 2097 
 2098     case SLJIT_SIG_GREATER_EQUAL:
 2099         return 0xa0000000;
 2100 
 2101     case SLJIT_SIG_GREATER:
 2102         return 0xc0000000;
 2103 
 2104     case SLJIT_SIG_LESS_EQUAL:
 2105         return 0xd0000000;
 2106 
 2107     case SLJIT_OVERFLOW:
 2108     case SLJIT_UNORDERED_F64:
 2109         return 0x60000000;
 2110 
 2111     case SLJIT_NOT_OVERFLOW:
 2112     case SLJIT_ORDERED_F64:
 2113         return 0x70000000;
 2114 
 2115     default:
 2116         SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
 2117         return 0xe0000000;
 2118     }
 2119 }
 2120 
 2121 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2122 {
 2123     struct sljit_label *label;
 2124 
 2125     CHECK_ERROR_PTR();
 2126     CHECK_PTR(check_sljit_emit_label(compiler));
 2127 
 2128     if (compiler->last_label && compiler->last_label->size == compiler->size)
 2129         return compiler->last_label;
 2130 
 2131     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2132     PTR_FAIL_IF(!label);
 2133     set_label(label, compiler);
 2134     return label;
 2135 }
 2136 
 2137 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 2138 {
 2139     struct sljit_jump *jump;
 2140 
 2141     CHECK_ERROR_PTR();
 2142     CHECK_PTR(check_sljit_emit_jump(compiler, type));
 2143 
 2144     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2145     PTR_FAIL_IF(!jump);
 2146     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2147     type &= 0xff;
 2148 
 2149     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
 2150 
 2151 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2152     if (type >= SLJIT_FAST_CALL)
 2153         PTR_FAIL_IF(prepare_blx(compiler));
 2154     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
 2155         type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
 2156 
 2157     if (jump->flags & SLJIT_REWRITABLE_JUMP) {
 2158         jump->addr = compiler->size;
 2159         compiler->patches++;
 2160     }
 2161 
 2162     if (type >= SLJIT_FAST_CALL) {
 2163         jump->flags |= IS_BL;
 2164         PTR_FAIL_IF(emit_blx(compiler));
 2165     }
 2166 
 2167     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
 2168         jump->addr = compiler->size;
 2169 #else
 2170     if (type >= SLJIT_FAST_CALL)
 2171         jump->flags |= IS_BL;
 2172     PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
 2173     PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
 2174     jump->addr = compiler->size;
 2175 #endif
 2176     return jump;
 2177 }
 2178 
 2179 #ifdef __SOFTFP__
 2180 
 2181 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
 2182 {
 2183     sljit_s32 stack_offset = 0;
 2184     sljit_s32 arg_count = 0;
 2185     sljit_s32 word_arg_offset = 0;
 2186     sljit_s32 float_arg_count = 0;
 2187     sljit_s32 types = 0;
 2188     sljit_s32 src_offset = 4 * sizeof(sljit_sw);
 2189     sljit_u8 offsets[4];
 2190 
 2191     if (src && FAST_IS_REG(*src))
 2192         src_offset = reg_map[*src] * sizeof(sljit_sw);
 2193 
 2194     arg_types >>= SLJIT_DEF_SHIFT;
 2195 
 2196     while (arg_types) {
 2197         types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
 2198 
 2199         switch (arg_types & SLJIT_DEF_MASK) {
 2200         case SLJIT_ARG_TYPE_F32:
 2201             offsets[arg_count] = (sljit_u8)stack_offset;
 2202             stack_offset += sizeof(sljit_f32);
 2203             arg_count++;
 2204             float_arg_count++;
 2205             break;
 2206         case SLJIT_ARG_TYPE_F64:
 2207             if (stack_offset & 0x7)
 2208                 stack_offset += sizeof(sljit_sw);
 2209             offsets[arg_count] = (sljit_u8)stack_offset;
 2210             stack_offset += sizeof(sljit_f64);
 2211             arg_count++;
 2212             float_arg_count++;
 2213             break;
 2214         default:
 2215             offsets[arg_count] = (sljit_u8)stack_offset;
 2216             stack_offset += sizeof(sljit_sw);
 2217             arg_count++;
 2218             word_arg_offset += sizeof(sljit_sw);
 2219             break;
 2220         }
 2221 
 2222         arg_types >>= SLJIT_DEF_SHIFT;
 2223     }
 2224 
 2225     if (stack_offset > 16)
 2226         FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7)));
 2227 
 2228     /* Process arguments in reversed direction. */
 2229     while (types) {
 2230         switch (types & SLJIT_DEF_MASK) {
 2231         case SLJIT_ARG_TYPE_F32:
 2232             arg_count--;
 2233             float_arg_count--;
 2234             stack_offset = offsets[arg_count];
 2235 
 2236             if (stack_offset < 16) {
 2237                 if (src_offset == stack_offset) {
 2238                     FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2239                     *src = TMP_REG1;
 2240                 }
 2241                 FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10)));
 2242             } else
 2243                 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
 2244             break;
 2245         case SLJIT_ARG_TYPE_F64:
 2246             arg_count--;
 2247             float_arg_count--;
 2248             stack_offset = offsets[arg_count];
 2249 
 2250             SLJIT_ASSERT((stack_offset & 0x7) == 0);
 2251 
 2252             if (stack_offset < 16) {
 2253                 if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) {
 2254                     FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2255                     *src = TMP_REG1;
 2256                 }
 2257                 FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count));
 2258             } else
 2259                 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
 2260             break;
 2261         default:
 2262             arg_count--;
 2263             word_arg_offset -= sizeof(sljit_sw);
 2264             stack_offset = offsets[arg_count];
 2265 
 2266             SLJIT_ASSERT(stack_offset >= word_arg_offset);
 2267 
 2268             if (stack_offset != word_arg_offset) {
 2269                 if (stack_offset < 16) {
 2270                     if (src_offset == stack_offset) {
 2271                         FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2272                         *src = TMP_REG1;
 2273                     }
 2274                     else if (src_offset == word_arg_offset) {
 2275                         *src = 1 + (stack_offset >> 2);
 2276                         src_offset = stack_offset;
 2277                     }
 2278                     FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2)));
 2279                 } else
 2280                     FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16)));
 2281             }
 2282             break;
 2283         }
 2284 
 2285         types >>= SLJIT_DEF_SHIFT;
 2286     }
 2287 
 2288     return SLJIT_SUCCESS;
 2289 }
 2290 
 2291 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
 2292 {
 2293     sljit_s32 stack_size = 0;
 2294 
 2295     if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32)
 2296         FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
 2297     if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64)
 2298         FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
 2299 
 2300     arg_types >>= SLJIT_DEF_SHIFT;
 2301 
 2302     while (arg_types) {
 2303         switch (arg_types & SLJIT_DEF_MASK) {
 2304         case SLJIT_ARG_TYPE_F32:
 2305             stack_size += sizeof(sljit_f32);
 2306             break;
 2307         case SLJIT_ARG_TYPE_F64:
 2308             if (stack_size & 0x7)
 2309                 stack_size += sizeof(sljit_sw);
 2310             stack_size += sizeof(sljit_f64);
 2311             break;
 2312         default:
 2313             stack_size += sizeof(sljit_sw);
 2314             break;
 2315         }
 2316 
 2317         arg_types >>= SLJIT_DEF_SHIFT;
 2318     }
 2319 
 2320     if (stack_size <= 16)
 2321         return SLJIT_SUCCESS;
 2322 
 2323     return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7));
 2324 }
 2325 
 2326 #else /* !__SOFTFP__ */
 2327 
 2328 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
 2329 {
 2330     sljit_u32 remap = 0;
 2331     sljit_u32 offset = 0;
 2332     sljit_u32 new_offset, mask;
 2333 
 2334     /* Remove return value. */
 2335     arg_types >>= SLJIT_DEF_SHIFT;
 2336 
 2337     while (arg_types) {
 2338         if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) {
 2339             new_offset = 0;
 2340             mask = 1;
 2341 
 2342             while (remap & mask) {
 2343                 new_offset++;
 2344                 mask <<= 1;
 2345             }
 2346             remap |= mask;
 2347 
 2348             if (offset != new_offset)
 2349                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
 2350                     0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0)));
 2351 
 2352             offset += 2;
 2353         }
 2354         else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) {
 2355             new_offset = 0;
 2356             mask = 3;
 2357 
 2358             while (remap & mask) {
 2359                 new_offset += 2;
 2360                 mask <<= 2;
 2361             }
 2362             remap |= mask;
 2363 
 2364             if (offset != new_offset)
 2365                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0)));
 2366 
 2367             offset += 2;
 2368         }
 2369         arg_types >>= SLJIT_DEF_SHIFT;
 2370     }
 2371 
 2372     return SLJIT_SUCCESS;
 2373 }
 2374 
 2375 #endif /* __SOFTFP__ */
 2376 
 2377 #undef EMIT_FPU_OPERATION
 2378 
 2379 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 2380     sljit_s32 arg_types)
 2381 {
 2382 #ifdef __SOFTFP__
 2383     struct sljit_jump *jump;
 2384 #endif
 2385 
 2386     CHECK_ERROR_PTR();
 2387     CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 2388 
 2389 #ifdef __SOFTFP__
 2390     PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL));
 2391 
 2392 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2393         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2394     compiler->skip_checks = 1;
 2395 #endif
 2396 
 2397     jump = sljit_emit_jump(compiler, type);
 2398     PTR_FAIL_IF(jump == NULL);
 2399 
 2400     PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
 2401     return jump;
 2402 #else /* !__SOFTFP__ */
 2403     PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
 2404 
 2405 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2406         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2407     compiler->skip_checks = 1;
 2408 #endif
 2409 
 2410     return sljit_emit_jump(compiler, type);
 2411 #endif /* __SOFTFP__ */
 2412 }
 2413 
 2414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 2415 {
 2416     struct sljit_jump *jump;
 2417 
 2418     CHECK_ERROR();
 2419     CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 2420     ADJUST_LOCAL_OFFSET(src, srcw);
 2421 
 2422     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
 2423 
 2424     if (!(src & SLJIT_IMM)) {
 2425         if (FAST_IS_REG(src)) {
 2426             SLJIT_ASSERT(reg_map[src] != 14);
 2427             return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
 2428         }
 2429 
 2430         SLJIT_ASSERT(src & SLJIT_MEM);
 2431         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
 2432         return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
 2433     }
 2434 
 2435     /* These jumps are converted to jump/call instructions when possible. */
 2436     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2437     FAIL_IF(!jump);
 2438     set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
 2439     jump->u.target = srcw;
 2440 
 2441 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2442     if (type >= SLJIT_FAST_CALL)
 2443         FAIL_IF(prepare_blx(compiler));
 2444     FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
 2445     if (type >= SLJIT_FAST_CALL)
 2446         FAIL_IF(emit_blx(compiler));
 2447 #else
 2448     FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
 2449     FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
 2450 #endif
 2451     jump->addr = compiler->size;
 2452     return SLJIT_SUCCESS;
 2453 }
 2454 
 2455 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
 2456     sljit_s32 arg_types,
 2457     sljit_s32 src, sljit_sw srcw)
 2458 {
 2459     CHECK_ERROR();
 2460     CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 2461 
 2462 #ifdef __SOFTFP__
 2463     if (src & SLJIT_MEM) {
 2464         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
 2465         src = TMP_REG1;
 2466     }
 2467 
 2468     FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src));
 2469 
 2470 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2471         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2472     compiler->skip_checks = 1;
 2473 #endif
 2474 
 2475     FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
 2476 
 2477     return softfloat_post_call_with_args(compiler, arg_types);
 2478 #else /* !__SOFTFP__ */
 2479     FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
 2480 
 2481 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2482         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2483     compiler->skip_checks = 1;
 2484 #endif
 2485 
 2486     return sljit_emit_ijump(compiler, type, src, srcw);
 2487 #endif /* __SOFTFP__ */
 2488 }
 2489 
 2490 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
 2491     sljit_s32 dst, sljit_sw dstw,
 2492     sljit_s32 type)
 2493 {
 2494     sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
 2495     sljit_uw cc, ins;
 2496 
 2497     CHECK_ERROR();
 2498     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 2499     ADJUST_LOCAL_OFFSET(dst, dstw);
 2500 
 2501     op = GET_OPCODE(op);
 2502     cc = get_cc(type & 0xff);
 2503     dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2504 
 2505     if (op < SLJIT_ADD) {
 2506         FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
 2507         FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
 2508         if (dst & SLJIT_MEM)
 2509             return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
 2510         return SLJIT_SUCCESS;
 2511     }
 2512 
 2513     ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
 2514 
 2515     if (dst & SLJIT_MEM)
 2516         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
 2517 
 2518     FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
 2519 
 2520     if (op == SLJIT_AND)
 2521         FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
 2522 
 2523     if (dst & SLJIT_MEM)
 2524         FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
 2525 
 2526     if (flags & SLJIT_SET_Z)
 2527         return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
 2528     return SLJIT_SUCCESS;
 2529 }
 2530 
 2531 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
 2532     sljit_s32 dst_reg,
 2533     sljit_s32 src, sljit_sw srcw)
 2534 {
 2535     sljit_uw cc, tmp;
 2536 
 2537     CHECK_ERROR();
 2538     CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 2539 
 2540     dst_reg &= ~SLJIT_I32_OP;
 2541 
 2542     cc = get_cc(type & 0xff);
 2543 
 2544     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 2545         tmp = get_imm(srcw);
 2546         if (tmp)
 2547             return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
 2548 
 2549         tmp = get_imm(~srcw);
 2550         if (tmp)
 2551             return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
 2552 
 2553 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 2554         tmp = (sljit_uw) srcw;
 2555         FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
 2556         if (tmp <= 0xffff)
 2557             return SLJIT_SUCCESS;
 2558         return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
 2559 #else
 2560         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 2561         src = TMP_REG1;
 2562 #endif
 2563     }
 2564 
 2565     return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
 2566 }
 2567 
 2568 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 2569     sljit_s32 reg,
 2570     sljit_s32 mem, sljit_sw memw)
 2571 {
 2572     sljit_s32 flags;
 2573     sljit_uw is_type1_transfer, inst;
 2574 
 2575     CHECK_ERROR();
 2576     CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
 2577 
 2578     is_type1_transfer = 1;
 2579 
 2580     switch (type & 0xff) {
 2581     case SLJIT_MOV:
 2582     case SLJIT_MOV_U32:
 2583     case SLJIT_MOV_S32:
 2584     case SLJIT_MOV_P:
 2585         flags = WORD_SIZE;
 2586         break;
 2587     case SLJIT_MOV_U8:
 2588         flags = BYTE_SIZE;
 2589         break;
 2590     case SLJIT_MOV_S8:
 2591         if (!(type & SLJIT_MEM_STORE))
 2592             is_type1_transfer = 0;
 2593         flags = BYTE_SIZE | SIGNED;
 2594         break;
 2595     case SLJIT_MOV_U16:
 2596         is_type1_transfer = 0;
 2597         flags = HALF_SIZE;
 2598         break;
 2599     case SLJIT_MOV_S16:
 2600         is_type1_transfer = 0;
 2601         flags = HALF_SIZE | SIGNED;
 2602         break;
 2603     default:
 2604         SLJIT_UNREACHABLE();
 2605         flags = WORD_SIZE;
 2606         break;
 2607     }
 2608 
 2609     if (!(type & SLJIT_MEM_STORE))
 2610         flags |= LOAD_DATA;
 2611 
 2612     SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
 2613 
 2614     if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
 2615         if (!is_type1_transfer && memw != 0)
 2616             return SLJIT_ERR_UNSUPPORTED;
 2617     }
 2618     else {
 2619         if (is_type1_transfer) {
 2620             if (memw > 4095 && memw < -4095)
 2621                 return SLJIT_ERR_UNSUPPORTED;
 2622         }
 2623         else {
 2624             if (memw > 255 && memw < -255)
 2625                 return SLJIT_ERR_UNSUPPORTED;
 2626         }
 2627     }
 2628 
 2629     if (type & SLJIT_MEM_SUPP)
 2630         return SLJIT_SUCCESS;
 2631 
 2632     if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
 2633         memw &= 0x3;
 2634 
 2635         inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7));
 2636 
 2637         if (is_type1_transfer)
 2638             inst |= (1 << 25);
 2639 
 2640         if (type & SLJIT_MEM_PRE)
 2641             inst |= (1 << 21);
 2642         else
 2643             inst ^= (1 << 24);
 2644 
 2645         return push_inst(compiler, inst);
 2646     }
 2647 
 2648     inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
 2649 
 2650     if (type & SLJIT_MEM_PRE)
 2651         inst |= (1 << 21);
 2652     else
 2653         inst ^= (1 << 24);
 2654 
 2655     if (is_type1_transfer) {
 2656         if (memw >= 0)
 2657             inst |= (1 << 23);
 2658         else
 2659             memw = -memw;
 2660 
 2661         return push_inst(compiler, inst | memw);
 2662     }
 2663 
 2664     if (memw >= 0)
 2665         inst |= (1 << 23);
 2666     else
 2667         memw = -memw;
 2668 
 2669     return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw));
 2670 }
 2671 
 2672 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 2673 {
 2674     struct sljit_const *const_;
 2675     sljit_s32 dst_r;
 2676 
 2677     CHECK_ERROR_PTR();
 2678     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 2679     ADJUST_LOCAL_OFFSET(dst, dstw);
 2680 
 2681     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 2682 
 2683 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2684     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value));
 2685     compiler->patches++;
 2686 #else
 2687     PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
 2688 #endif
 2689 
 2690     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2691     PTR_FAIL_IF(!const_);
 2692     set_const(const_, compiler);
 2693 
 2694     if (dst & SLJIT_MEM)
 2695         PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
 2696     return const_;
 2697 }
 2698 
 2699 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 2700 {
 2701     struct sljit_put_label *put_label;
 2702     sljit_s32 dst_r;
 2703 
 2704     CHECK_ERROR_PTR();
 2705     CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
 2706     ADJUST_LOCAL_OFFSET(dst, dstw);
 2707 
 2708     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 2709 
 2710 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2711     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
 2712     compiler->patches++;
 2713 #else
 2714     PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
 2715 #endif
 2716 
 2717     put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
 2718     PTR_FAIL_IF(!put_label);
 2719     set_put_label(put_label, compiler, 0);
 2720 
 2721     if (dst & SLJIT_MEM)
 2722         PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
 2723     return put_label;
 2724 }
 2725 
 2726 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 2727 {
 2728     inline_set_jump_addr(addr, executable_offset, new_target, 1);
 2729 }
 2730 
 2731 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 2732 {
 2733     inline_set_const(addr, executable_offset, new_constant, 1);
 2734 }