"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/sljit/sljitNativeARM_32.c" (8 Jan 2018, 79539 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sljitNativeARM_32.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 8.41_vs_8.42.

    1 /*
    2  *    Stack-less Just-In-Time compiler
    3  *
    4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without modification, are
    7  * permitted provided that the following conditions are met:
    8  *
    9  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10  *      conditions and the following disclaimer.
   11  *
   12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13  *      of conditions and the following disclaimer in the documentation and/or other materials
   14  *      provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 #ifdef __SOFTFP__
   28 #define ARM_ABI_INFO " ABI:softfp"
   29 #else
   30 #define ARM_ABI_INFO " ABI:hardfp"
   31 #endif
   32 
   33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
   34 {
   35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
   36     return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
   37 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   38     return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO;
   39 #else
   40 #error "Internal error: Unknown ARM architecture"
   41 #endif
   42 }
   43 
   44 /* Last register + 1. */
   45 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
   46 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
   47 #define TMP_PC      (SLJIT_NUMBER_OF_REGISTERS + 4)
   48 
   49 #define TMP_FREG1   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
   50 #define TMP_FREG2   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
   51 
   52 /* In ARM instruction words.
   53    Cache lines are usually 32 byte aligned. */
   54 #define CONST_POOL_ALIGNMENT    8
   55 #define CONST_POOL_EMPTY    0xffffffff
   56 
   57 #define ALIGN_INSTRUCTION(ptr) \
   58     (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
   59 #define MAX_DIFFERENCE(max_diff) \
   60     (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
   61 
   62 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
   63 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
   64     0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
   65 };
   66 
   67 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
   68     0, 0, 1, 2, 3, 4, 5, 6, 7
   69 };
   70 
   71 #define RM(rm) (reg_map[rm])
   72 #define RD(rd) (reg_map[rd] << 12)
   73 #define RN(rn) (reg_map[rn] << 16)
   74 
   75 /* --------------------------------------------------------------------- */
   76 /*  Instrucion forms                                                     */
   77 /* --------------------------------------------------------------------- */
   78 
   79 /* The instruction includes the AL condition.
   80    INST_NAME - CONDITIONAL remove this flag. */
   81 #define COND_MASK   0xf0000000
   82 #define CONDITIONAL 0xe0000000
   83 #define PUSH_POOL   0xff000000
   84 
   85 #define ADC     0xe0a00000
   86 #define ADD     0xe0800000
   87 #define AND     0xe0000000
   88 #define B       0xea000000
   89 #define BIC     0xe1c00000
   90 #define BL      0xeb000000
   91 #define BLX     0xe12fff30
   92 #define BX      0xe12fff10
   93 #define CLZ     0xe16f0f10
   94 #define CMN     0xe1600000
   95 #define CMP     0xe1400000
   96 #define BKPT        0xe1200070
   97 #define EOR     0xe0200000
   98 #define MOV     0xe1a00000
   99 #define MUL     0xe0000090
  100 #define MVN     0xe1e00000
  101 #define NOP     0xe1a00000
  102 #define ORR     0xe1800000
  103 #define PUSH        0xe92d0000
  104 #define POP     0xe8bd0000
  105 #define RSB     0xe0600000
  106 #define RSC     0xe0e00000
  107 #define SBC     0xe0c00000
  108 #define SMULL       0xe0c00090
  109 #define SUB     0xe0400000
  110 #define UMULL       0xe0800090
  111 #define VABS_F32    0xeeb00ac0
  112 #define VADD_F32    0xee300a00
  113 #define VCMP_F32    0xeeb40a40
  114 #define VCVT_F32_S32    0xeeb80ac0
  115 #define VCVT_F64_F32    0xeeb70ac0
  116 #define VCVT_S32_F32    0xeebd0ac0
  117 #define VDIV_F32    0xee800a00
  118 #define VMOV_F32    0xeeb00a40
  119 #define VMOV        0xee000a10
  120 #define VMOV2       0xec400a10
  121 #define VMRS        0xeef1fa10
  122 #define VMUL_F32    0xee200a00
  123 #define VNEG_F32    0xeeb10a40
  124 #define VSTR_F32    0xed000a00
  125 #define VSUB_F32    0xee300a40
  126 
  127 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  128 /* Arm v7 specific instructions. */
  129 #define MOVW        0xe3000000
  130 #define MOVT        0xe3400000
  131 #define SXTB        0xe6af0070
  132 #define SXTH        0xe6bf0070
  133 #define UXTB        0xe6ef0070
  134 #define UXTH        0xe6ff0070
  135 #endif
  136 
  137 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  138 
  139 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
  140 {
  141     /* Pushing the constant pool into the instruction stream. */
  142     sljit_uw* inst;
  143     sljit_uw* cpool_ptr;
  144     sljit_uw* cpool_end;
  145     sljit_s32 i;
  146 
  147     /* The label could point the address after the constant pool. */
  148     if (compiler->last_label && compiler->last_label->size == compiler->size)
  149         compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
  150 
  151     SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
  152     inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  153     FAIL_IF(!inst);
  154     compiler->size++;
  155     *inst = 0xff000000 | compiler->cpool_fill;
  156 
  157     for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
  158         inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  159         FAIL_IF(!inst);
  160         compiler->size++;
  161         *inst = 0;
  162     }
  163 
  164     cpool_ptr = compiler->cpool;
  165     cpool_end = cpool_ptr + compiler->cpool_fill;
  166     while (cpool_ptr < cpool_end) {
  167         inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  168         FAIL_IF(!inst);
  169         compiler->size++;
  170         *inst = *cpool_ptr++;
  171     }
  172     compiler->cpool_diff = CONST_POOL_EMPTY;
  173     compiler->cpool_fill = 0;
  174     return SLJIT_SUCCESS;
  175 }
  176 
  177 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  178 {
  179     sljit_uw* ptr;
  180 
  181     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  182         FAIL_IF(push_cpool(compiler));
  183 
  184     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  185     FAIL_IF(!ptr);
  186     compiler->size++;
  187     *ptr = inst;
  188     return SLJIT_SUCCESS;
  189 }
  190 
  191 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  192 {
  193     sljit_uw* ptr;
  194     sljit_uw cpool_index = CPOOL_SIZE;
  195     sljit_uw* cpool_ptr;
  196     sljit_uw* cpool_end;
  197     sljit_u8* cpool_unique_ptr;
  198 
  199     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  200         FAIL_IF(push_cpool(compiler));
  201     else if (compiler->cpool_fill > 0) {
  202         cpool_ptr = compiler->cpool;
  203         cpool_end = cpool_ptr + compiler->cpool_fill;
  204         cpool_unique_ptr = compiler->cpool_unique;
  205         do {
  206             if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
  207                 cpool_index = cpool_ptr - compiler->cpool;
  208                 break;
  209             }
  210             cpool_ptr++;
  211             cpool_unique_ptr++;
  212         } while (cpool_ptr < cpool_end);
  213     }
  214 
  215     if (cpool_index == CPOOL_SIZE) {
  216         /* Must allocate a new entry in the literal pool. */
  217         if (compiler->cpool_fill < CPOOL_SIZE) {
  218             cpool_index = compiler->cpool_fill;
  219             compiler->cpool_fill++;
  220         }
  221         else {
  222             FAIL_IF(push_cpool(compiler));
  223             cpool_index = 0;
  224             compiler->cpool_fill = 1;
  225         }
  226     }
  227 
  228     SLJIT_ASSERT((inst & 0xfff) == 0);
  229     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  230     FAIL_IF(!ptr);
  231     compiler->size++;
  232     *ptr = inst | cpool_index;
  233 
  234     compiler->cpool[cpool_index] = literal;
  235     compiler->cpool_unique[cpool_index] = 0;
  236     if (compiler->cpool_diff == CONST_POOL_EMPTY)
  237         compiler->cpool_diff = compiler->size;
  238     return SLJIT_SUCCESS;
  239 }
  240 
  241 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  242 {
  243     sljit_uw* ptr;
  244     if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
  245         FAIL_IF(push_cpool(compiler));
  246 
  247     SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
  248     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  249     FAIL_IF(!ptr);
  250     compiler->size++;
  251     *ptr = inst | compiler->cpool_fill;
  252 
  253     compiler->cpool[compiler->cpool_fill] = literal;
  254     compiler->cpool_unique[compiler->cpool_fill] = 1;
  255     compiler->cpool_fill++;
  256     if (compiler->cpool_diff == CONST_POOL_EMPTY)
  257         compiler->cpool_diff = compiler->size;
  258     return SLJIT_SUCCESS;
  259 }
  260 
  261 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
  262 {
  263     /* Place for at least two instruction (doesn't matter whether the first has a literal). */
  264     if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
  265         return push_cpool(compiler);
  266     return SLJIT_SUCCESS;
  267 }
  268 
  269 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
  270 {
  271     /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
  272     SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
  273     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
  274 
  275     return push_inst(compiler, BLX | RM(TMP_REG1));
  276 }
  277 
  278 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
  279 {
  280     sljit_uw diff;
  281     sljit_uw ind;
  282     sljit_uw counter = 0;
  283     sljit_uw* clear_const_pool = const_pool;
  284     sljit_uw* clear_const_pool_end = const_pool + cpool_size;
  285 
  286     SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
  287     /* Set unused flag for all literals in the constant pool.
  288        I.e.: unused literals can belong to branches, which can be encoded as B or BL.
  289        We can "compress" the constant pool by discarding these literals. */
  290     while (clear_const_pool < clear_const_pool_end)
  291         *clear_const_pool++ = (sljit_uw)(-1);
  292 
  293     while (last_pc_patch < code_ptr) {
  294         /* Data transfer instruction with Rn == r15. */
  295         if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
  296             diff = const_pool - last_pc_patch;
  297             ind = (*last_pc_patch) & 0xfff;
  298 
  299             /* Must be a load instruction with immediate offset. */
  300             SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
  301             if ((sljit_s32)const_pool[ind] < 0) {
  302                 const_pool[ind] = counter;
  303                 ind = counter;
  304                 counter++;
  305             }
  306             else
  307                 ind = const_pool[ind];
  308 
  309             SLJIT_ASSERT(diff >= 1);
  310             if (diff >= 2 || ind > 0) {
  311                 diff = (diff + ind - 2) << 2;
  312                 SLJIT_ASSERT(diff <= 0xfff);
  313                 *last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
  314             }
  315             else
  316                 *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
  317         }
  318         last_pc_patch++;
  319     }
  320     return counter;
  321 }
  322 
  323 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
  324 struct future_patch {
  325     struct future_patch* next;
  326     sljit_s32 index;
  327     sljit_s32 value;
  328 };
  329 
  330 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
  331 {
  332     sljit_s32 value;
  333     struct future_patch *curr_patch, *prev_patch;
  334 
  335     SLJIT_UNUSED_ARG(compiler);
  336 
  337     /* Using the values generated by patch_pc_relative_loads. */
  338     if (!*first_patch)
  339         value = (sljit_s32)cpool_start_address[cpool_current_index];
  340     else {
  341         curr_patch = *first_patch;
  342         prev_patch = NULL;
  343         while (1) {
  344             if (!curr_patch) {
  345                 value = (sljit_s32)cpool_start_address[cpool_current_index];
  346                 break;
  347             }
  348             if ((sljit_uw)curr_patch->index == cpool_current_index) {
  349                 value = curr_patch->value;
  350                 if (prev_patch)
  351                     prev_patch->next = curr_patch->next;
  352                 else
  353                     *first_patch = curr_patch->next;
  354                 SLJIT_FREE(curr_patch, compiler->allocator_data);
  355                 break;
  356             }
  357             prev_patch = curr_patch;
  358             curr_patch = curr_patch->next;
  359         }
  360     }
  361 
  362     if (value >= 0) {
  363         if ((sljit_uw)value > cpool_current_index) {
  364             curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
  365             if (!curr_patch) {
  366                 while (*first_patch) {
  367                     curr_patch = *first_patch;
  368                     *first_patch = (*first_patch)->next;
  369                     SLJIT_FREE(curr_patch, compiler->allocator_data);
  370                 }
  371                 return SLJIT_ERR_ALLOC_FAILED;
  372             }
  373             curr_patch->next = *first_patch;
  374             curr_patch->index = value;
  375             curr_patch->value = cpool_start_address[value];
  376             *first_patch = curr_patch;
  377         }
  378         cpool_start_address[value] = *buf_ptr;
  379     }
  380     return SLJIT_SUCCESS;
  381 }
  382 
  383 #else
  384 
  385 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  386 {
  387     sljit_uw* ptr;
  388 
  389     ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  390     FAIL_IF(!ptr);
  391     compiler->size++;
  392     *ptr = inst;
  393     return SLJIT_SUCCESS;
  394 }
  395 
  396 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
  397 {
  398     FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
  399     return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
  400 }
  401 
  402 #endif
  403 
  404 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
  405 {
  406     sljit_sw diff;
  407 
  408     if (jump->flags & SLJIT_REWRITABLE_JUMP)
  409         return 0;
  410 
  411 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  412     if (jump->flags & IS_BL)
  413         code_ptr--;
  414 
  415     if (jump->flags & JUMP_ADDR)
  416         diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
  417     else {
  418         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  419         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
  420     }
  421 
  422     /* Branch to Thumb code has not been optimized yet. */
  423     if (diff & 0x3)
  424         return 0;
  425 
  426     if (jump->flags & IS_BL) {
  427         if (diff <= 0x01ffffff && diff >= -0x02000000) {
  428             *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
  429             jump->flags |= PATCH_B;
  430             return 1;
  431         }
  432     }
  433     else {
  434         if (diff <= 0x01ffffff && diff >= -0x02000000) {
  435             *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
  436             jump->flags |= PATCH_B;
  437         }
  438     }
  439 #else
  440     if (jump->flags & JUMP_ADDR)
  441         diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
  442     else {
  443         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  444         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
  445     }
  446 
  447     /* Branch to Thumb code has not been optimized yet. */
  448     if (diff & 0x3)
  449         return 0;
  450 
  451     if (diff <= 0x01ffffff && diff >= -0x02000000) {
  452         code_ptr -= 2;
  453         *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
  454         jump->flags |= PATCH_B;
  455         return 1;
  456     }
  457 #endif
  458     return 0;
  459 }
  460 
  461 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
  462 {
  463 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  464     sljit_uw *ptr = (sljit_uw *)jump_ptr;
  465     sljit_uw *inst = (sljit_uw *)ptr[0];
  466     sljit_uw mov_pc = ptr[1];
  467     sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
  468     sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
  469 
  470     if (diff <= 0x7fffff && diff >= -0x800000) {
  471         /* Turn to branch. */
  472         if (!bl) {
  473             inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
  474             if (flush_cache) {
  475                 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  476                 SLJIT_CACHE_FLUSH(inst, inst + 1);
  477             }
  478         } else {
  479             inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
  480             inst[1] = NOP;
  481             if (flush_cache) {
  482                 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  483                 SLJIT_CACHE_FLUSH(inst, inst + 2);
  484             }
  485         }
  486     } else {
  487         /* Get the position of the constant. */
  488         if (mov_pc & (1 << 23))
  489             ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
  490         else
  491             ptr = inst + 1;
  492 
  493         if (*inst != mov_pc) {
  494             inst[0] = mov_pc;
  495             if (!bl) {
  496                 if (flush_cache) {
  497                     inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  498                     SLJIT_CACHE_FLUSH(inst, inst + 1);
  499                 }
  500             } else {
  501                 inst[1] = BLX | RM(TMP_REG1);
  502                 if (flush_cache) {
  503                     inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  504                     SLJIT_CACHE_FLUSH(inst, inst + 2);
  505                 }
  506             }
  507         }
  508         *ptr = new_addr;
  509     }
  510 #else
  511     sljit_uw *inst = (sljit_uw*)jump_ptr;
  512     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  513     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
  514     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
  515     if (flush_cache) {
  516         inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  517         SLJIT_CACHE_FLUSH(inst, inst + 2);
  518     }
  519 #endif
  520 }
  521 
  522 static sljit_uw get_imm(sljit_uw imm);
  523 
  524 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache)
  525 {
  526 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  527     sljit_uw *ptr = (sljit_uw*)addr;
  528     sljit_uw *inst = (sljit_uw*)ptr[0];
  529     sljit_uw ldr_literal = ptr[1];
  530     sljit_uw src2;
  531 
  532     src2 = get_imm(new_constant);
  533     if (src2) {
  534         *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
  535         if (flush_cache) {
  536             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  537             SLJIT_CACHE_FLUSH(inst, inst + 1);
  538         }
  539         return;
  540     }
  541 
  542     src2 = get_imm(~new_constant);
  543     if (src2) {
  544         *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
  545         if (flush_cache) {
  546             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  547             SLJIT_CACHE_FLUSH(inst, inst + 1);
  548         }
  549         return;
  550     }
  551 
  552     if (ldr_literal & (1 << 23))
  553         ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
  554     else
  555         ptr = inst + 1;
  556 
  557     if (*inst != ldr_literal) {
  558         *inst = ldr_literal;
  559         if (flush_cache) {
  560             inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  561             SLJIT_CACHE_FLUSH(inst, inst + 1);
  562         }
  563     }
  564     *ptr = new_constant;
  565 #else
  566     sljit_uw *inst = (sljit_uw*)addr;
  567     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  568     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
  569     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
  570     if (flush_cache) {
  571         inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
  572         SLJIT_CACHE_FLUSH(inst, inst + 2);
  573     }
  574 #endif
  575 }
  576 
  577 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  578 {
  579     struct sljit_memory_fragment *buf;
  580     sljit_uw *code;
  581     sljit_uw *code_ptr;
  582     sljit_uw *buf_ptr;
  583     sljit_uw *buf_end;
  584     sljit_uw size;
  585     sljit_uw word_count;
  586     sljit_sw executable_offset;
  587     sljit_sw jump_addr;
  588 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  589     sljit_uw cpool_size;
  590     sljit_uw cpool_skip_alignment;
  591     sljit_uw cpool_current_index;
  592     sljit_uw *cpool_start_address;
  593     sljit_uw *last_pc_patch;
  594     struct future_patch *first_patch;
  595 #endif
  596 
  597     struct sljit_label *label;
  598     struct sljit_jump *jump;
  599     struct sljit_const *const_;
  600 
  601     CHECK_ERROR_PTR();
  602     CHECK_PTR(check_sljit_generate_code(compiler));
  603     reverse_buf(compiler);
  604 
  605     /* Second code generation pass. */
  606 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  607     size = compiler->size + (compiler->patches << 1);
  608     if (compiler->cpool_fill > 0)
  609         size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
  610 #else
  611     size = compiler->size;
  612 #endif
  613     code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
  614     PTR_FAIL_WITH_EXEC_IF(code);
  615     buf = compiler->buf;
  616 
  617 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  618     cpool_size = 0;
  619     cpool_skip_alignment = 0;
  620     cpool_current_index = 0;
  621     cpool_start_address = NULL;
  622     first_patch = NULL;
  623     last_pc_patch = code;
  624 #endif
  625 
  626     code_ptr = code;
  627     word_count = 0;
  628     executable_offset = SLJIT_EXEC_OFFSET(code);
  629 
  630     label = compiler->labels;
  631     jump = compiler->jumps;
  632     const_ = compiler->consts;
  633 
  634     if (label && label->size == 0) {
  635         label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
  636         label = label->next;
  637     }
  638 
  639     do {
  640         buf_ptr = (sljit_uw*)buf->memory;
  641         buf_end = buf_ptr + (buf->used_size >> 2);
  642         do {
  643             word_count++;
  644 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  645             if (cpool_size > 0) {
  646                 if (cpool_skip_alignment > 0) {
  647                     buf_ptr++;
  648                     cpool_skip_alignment--;
  649                 }
  650                 else {
  651                     if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  652                         SLJIT_FREE_EXEC(code);
  653                         compiler->error = SLJIT_ERR_ALLOC_FAILED;
  654                         return NULL;
  655                     }
  656                     buf_ptr++;
  657                     if (++cpool_current_index >= cpool_size) {
  658                         SLJIT_ASSERT(!first_patch);
  659                         cpool_size = 0;
  660                         if (label && label->size == word_count) {
  661                             /* Points after the current instruction. */
  662                             label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  663                             label->size = code_ptr - code;
  664                             label = label->next;
  665                         }
  666                     }
  667                 }
  668             }
  669             else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
  670 #endif
  671                 *code_ptr = *buf_ptr++;
  672                 /* These structures are ordered by their address. */
  673                 SLJIT_ASSERT(!label || label->size >= word_count);
  674                 SLJIT_ASSERT(!jump || jump->addr >= word_count);
  675                 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
  676                 if (jump && jump->addr == word_count) {
  677 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  678                     if (detect_jump_type(jump, code_ptr, code, executable_offset))
  679                         code_ptr--;
  680                     jump->addr = (sljit_uw)code_ptr;
  681 #else
  682                     jump->addr = (sljit_uw)(code_ptr - 2);
  683                     if (detect_jump_type(jump, code_ptr, code, executable_offset))
  684                         code_ptr -= 2;
  685 #endif
  686                     jump = jump->next;
  687                 }
  688                 if (label && label->size == word_count) {
  689                     /* code_ptr can be affected above. */
  690                     label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
  691                     label->size = (code_ptr + 1) - code;
  692                     label = label->next;
  693                 }
  694                 if (const_ && const_->addr == word_count) {
  695 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  696                     const_->addr = (sljit_uw)code_ptr;
  697 #else
  698                     const_->addr = (sljit_uw)(code_ptr - 1);
  699 #endif
  700                     const_ = const_->next;
  701                 }
  702                 code_ptr++;
  703 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  704             }
  705             else {
  706                 /* Fortunately, no need to shift. */
  707                 cpool_size = *buf_ptr++ & ~PUSH_POOL;
  708                 SLJIT_ASSERT(cpool_size > 0);
  709                 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
  710                 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
  711                 if (cpool_current_index > 0) {
  712                     /* Unconditional branch. */
  713                     *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
  714                     code_ptr = cpool_start_address + cpool_current_index;
  715                 }
  716                 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
  717                 cpool_current_index = 0;
  718                 last_pc_patch = code_ptr;
  719             }
  720 #endif
  721         } while (buf_ptr < buf_end);
  722         buf = buf->next;
  723     } while (buf);
  724 
  725     SLJIT_ASSERT(!label);
  726     SLJIT_ASSERT(!jump);
  727     SLJIT_ASSERT(!const_);
  728 
  729 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  730     SLJIT_ASSERT(cpool_size == 0);
  731     if (compiler->cpool_fill > 0) {
  732         cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
  733         cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
  734         if (cpool_current_index > 0)
  735             code_ptr = cpool_start_address + cpool_current_index;
  736 
  737         buf_ptr = compiler->cpool;
  738         buf_end = buf_ptr + compiler->cpool_fill;
  739         cpool_current_index = 0;
  740         while (buf_ptr < buf_end) {
  741             if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  742                 SLJIT_FREE_EXEC(code);
  743                 compiler->error = SLJIT_ERR_ALLOC_FAILED;
  744                 return NULL;
  745             }
  746             buf_ptr++;
  747             cpool_current_index++;
  748         }
  749         SLJIT_ASSERT(!first_patch);
  750     }
  751 #endif
  752 
  753     jump = compiler->jumps;
  754     while (jump) {
  755         buf_ptr = (sljit_uw *)jump->addr;
  756 
  757         if (jump->flags & PATCH_B) {
  758             jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
  759             if (!(jump->flags & JUMP_ADDR)) {
  760                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  761                 SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
  762                 *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
  763             }
  764             else {
  765                 SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
  766                 *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
  767             }
  768         }
  769         else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
  770 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  771             jump->addr = (sljit_uw)code_ptr;
  772             code_ptr[0] = (sljit_uw)buf_ptr;
  773             code_ptr[1] = *buf_ptr;
  774             inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  775             code_ptr += 2;
  776 #else
  777             inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  778 #endif
  779         }
  780         else {
  781 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  782             if (jump->flags & IS_BL)
  783                 buf_ptr--;
  784             if (*buf_ptr & (1 << 23))
  785                 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  786             else
  787                 buf_ptr += 1;
  788             *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
  789 #else
  790             inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  791 #endif
  792         }
  793         jump = jump->next;
  794     }
  795 
  796 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  797     const_ = compiler->consts;
  798     while (const_) {
  799         buf_ptr = (sljit_uw*)const_->addr;
  800         const_->addr = (sljit_uw)code_ptr;
  801 
  802         code_ptr[0] = (sljit_uw)buf_ptr;
  803         code_ptr[1] = *buf_ptr;
  804         if (*buf_ptr & (1 << 23))
  805             buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  806         else
  807             buf_ptr += 1;
  808         /* Set the value again (can be a simple constant). */
  809         inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
  810         code_ptr += 2;
  811 
  812         const_ = const_->next;
  813     }
  814 #endif
  815 
  816     SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
  817 
  818     compiler->error = SLJIT_ERR_COMPILED;
  819     compiler->executable_offset = executable_offset;
  820     compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
  821 
  822     code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
  823     code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  824 
  825     SLJIT_CACHE_FLUSH(code, code_ptr);
  826     return code;
  827 }
  828 
  829 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  830 {
  831     switch (feature_type) {
  832     case SLJIT_HAS_FPU:
  833 #ifdef SLJIT_IS_FPU_AVAILABLE
  834         return SLJIT_IS_FPU_AVAILABLE;
  835 #else
  836         /* Available by default. */
  837         return 1;
  838 #endif
  839 
  840     case SLJIT_HAS_CLZ:
  841     case SLJIT_HAS_CMOV:
  842         return 1;
  843 
  844     default:
  845         return 0;
  846     }
  847 }
  848 
  849 /* --------------------------------------------------------------------- */
  850 /*  Entry, exit                                                          */
  851 /* --------------------------------------------------------------------- */
  852 
  853 /* Creates an index in data_transfer_insts array. */
  854 #define WORD_SIZE   0x00
  855 #define BYTE_SIZE   0x01
  856 #define HALF_SIZE   0x02
  857 #define PRELOAD     0x03
  858 #define SIGNED      0x04
  859 #define LOAD_DATA   0x08
  860 
  861 /* Flag bits for emit_op. */
  862 #define ALLOW_IMM   0x10
  863 #define ALLOW_INV_IMM   0x20
  864 #define ALLOW_ANY_IMM   (ALLOW_IMM | ALLOW_INV_IMM)
  865 
  866 /* s/l - store/load (1 bit)
  867    u/s - signed/unsigned (1 bit)
  868    w/b/h/N - word/byte/half/NOT allowed (2 bit)
  869    Storing signed and unsigned values are the same operations. */
  870 
  871 static const sljit_uw data_transfer_insts[16] = {
  872 /* s u w */ 0xe5000000 /* str */,
  873 /* s u b */ 0xe5400000 /* strb */,
  874 /* s u h */ 0xe10000b0 /* strh */,
  875 /* s u N */ 0x00000000 /* not allowed */,
  876 /* s s w */ 0xe5000000 /* str */,
  877 /* s s b */ 0xe5400000 /* strb */,
  878 /* s s h */ 0xe10000b0 /* strh */,
  879 /* s s N */ 0x00000000 /* not allowed */,
  880 
  881 /* l u w */ 0xe5100000 /* ldr */,
  882 /* l u b */ 0xe5500000 /* ldrb */,
  883 /* l u h */ 0xe11000b0 /* ldrh */,
  884 /* l u p */ 0xf5500000 /* preload */,
  885 /* l s w */ 0xe5100000 /* ldr */,
  886 /* l s b */ 0xe11000d0 /* ldrsb */,
  887 /* l s h */ 0xe11000f0 /* ldrsh */,
  888 /* l s N */ 0x00000000 /* not allowed */,
  889 };
  890 
  891 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
  892     (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg))
  893 
  894 /* Normal ldr/str instruction.
  895    Type2: ldrsb, ldrh, ldrsh */
  896 #define IS_TYPE1_TRANSFER(type) \
  897     (data_transfer_insts[(type) & 0xf] & 0x04000000)
  898 #define TYPE2_TRANSFER_IMM(imm) \
  899     (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
  900 
  901 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
  902     sljit_s32 dst, sljit_sw dstw,
  903     sljit_s32 src1, sljit_sw src1w,
  904     sljit_s32 src2, sljit_sw src2w);
  905 
  906 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  907     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  908     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  909 {
  910     sljit_s32 args, size, i, tmp;
  911     sljit_uw push;
  912 
  913     CHECK_ERROR();
  914     CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  915     set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  916 
  917     /* Push saved registers, temporary registers
  918        stmdb sp!, {..., lr} */
  919     push = PUSH | (1 << 14);
  920 
  921     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  922     for (i = SLJIT_S0; i >= tmp; i--)
  923         push |= 1 << reg_map[i];
  924 
  925     for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
  926         push |= 1 << reg_map[i];
  927 
  928     FAIL_IF(push_inst(compiler, push));
  929 
  930     /* Stack must be aligned to 8 bytes: */
  931     size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  932     local_size = ((size + local_size + 7) & ~7) - size;
  933     compiler->local_size = local_size;
  934     if (local_size > 0)
  935         FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  936 
  937     args = get_arg_count(arg_types);
  938 
  939     if (args >= 1)
  940         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0)));
  941     if (args >= 2)
  942         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1)));
  943     if (args >= 3)
  944         FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2)));
  945 
  946     return SLJIT_SUCCESS;
  947 }
  948 
  949 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  950     sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  951     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  952 {
  953     sljit_s32 size;
  954 
  955     CHECK_ERROR();
  956     CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  957     set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  958 
  959     size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  960     compiler->local_size = ((size + local_size + 7) & ~7) - size;
  961     return SLJIT_SUCCESS;
  962 }
  963 
  964 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  965 {
  966     sljit_s32 i, tmp;
  967     sljit_uw pop;
  968 
  969     CHECK_ERROR();
  970     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  971 
  972     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  973 
  974     if (compiler->local_size > 0)
  975         FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
  976 
  977     /* Push saved registers, temporary registers
  978        ldmia sp!, {..., pc} */
  979     pop = POP | (1 << 15);
  980 
  981     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  982     for (i = SLJIT_S0; i >= tmp; i--)
  983         pop |= 1 << reg_map[i];
  984 
  985     for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
  986         pop |= 1 << reg_map[i];
  987 
  988     return push_inst(compiler, pop);
  989 }
  990 
  991 /* --------------------------------------------------------------------- */
  992 /*  Operators                                                            */
  993 /* --------------------------------------------------------------------- */
  994 
  995 /* flags: */
  996   /* Arguments are swapped. */
  997 #define ARGS_SWAPPED    0x01
  998   /* Inverted immediate. */
  999 #define INV_IMM     0x02
 1000   /* Source and destination is register. */
 1001 #define MOVE_REG_CONV   0x04
 1002   /* Unused return value. */
 1003 #define UNUSED_RETURN   0x08
 1004 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
 1005 #define SET_FLAGS   (1 << 20)
 1006 /* dst: reg
 1007    src1: reg
 1008    src2: reg or imm (if allowed)
 1009    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
 1010 #define SRC2_IMM    (1 << 25)
 1011 
 1012 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \
 1013     SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
 1014     if (compiler->shift_imm != 0x20) { \
 1015         SLJIT_ASSERT(src1 == TMP_REG1); \
 1016         SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
 1017         \
 1018         if (compiler->shift_imm != 0) \
 1019             return push_inst(compiler, MOV | (flags & SET_FLAGS) | \
 1020                 RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \
 1021         return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \
 1022     } \
 1023     return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \
 1024         (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1));
 1025 
 1026 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 1027     sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 1028 {
 1029     switch (GET_OPCODE(op)) {
 1030     case SLJIT_MOV:
 1031         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1032         if (dst != src2) {
 1033             if (src2 & SRC2_IMM) {
 1034                 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1035             }
 1036             return push_inst(compiler, MOV | RD(dst) | RM(src2));
 1037         }
 1038         return SLJIT_SUCCESS;
 1039 
 1040     case SLJIT_MOV_U8:
 1041     case SLJIT_MOV_S8:
 1042         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1043         if (flags & MOVE_REG_CONV) {
 1044 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1045             if (op == SLJIT_MOV_U8)
 1046                 return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff);
 1047             FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2)));
 1048             return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst));
 1049 #else
 1050             return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
 1051 #endif
 1052         }
 1053         else if (dst != src2) {
 1054             SLJIT_ASSERT(src2 & SRC2_IMM);
 1055             return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1056         }
 1057         return SLJIT_SUCCESS;
 1058 
 1059     case SLJIT_MOV_U16:
 1060     case SLJIT_MOV_S16:
 1061         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
 1062         if (flags & MOVE_REG_CONV) {
 1063 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1064             FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2)));
 1065             return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst));
 1066 #else
 1067             return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
 1068 #endif
 1069         }
 1070         else if (dst != src2) {
 1071             SLJIT_ASSERT(src2 & SRC2_IMM);
 1072             return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
 1073         }
 1074         return SLJIT_SUCCESS;
 1075 
 1076     case SLJIT_NOT:
 1077         if (src2 & SRC2_IMM) {
 1078             return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2);
 1079         }
 1080         return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
 1081 
 1082     case SLJIT_CLZ:
 1083         SLJIT_ASSERT(!(flags & INV_IMM));
 1084         SLJIT_ASSERT(!(src2 & SRC2_IMM));
 1085         FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
 1086         return SLJIT_SUCCESS;
 1087 
 1088     case SLJIT_ADD:
 1089         SLJIT_ASSERT(!(flags & INV_IMM));
 1090         if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
 1091             return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1092         return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1093 
 1094     case SLJIT_ADDC:
 1095         SLJIT_ASSERT(!(flags & INV_IMM));
 1096         return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1097 
 1098     case SLJIT_SUB:
 1099         SLJIT_ASSERT(!(flags & INV_IMM));
 1100         if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
 1101             return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1102         return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
 1103             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1104 
 1105     case SLJIT_SUBC:
 1106         SLJIT_ASSERT(!(flags & INV_IMM));
 1107         return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
 1108             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1109 
 1110     case SLJIT_MUL:
 1111         SLJIT_ASSERT(!(flags & INV_IMM));
 1112         SLJIT_ASSERT(!(src2 & SRC2_IMM));
 1113 
 1114         if (!HAS_FLAGS(op))
 1115             return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]);
 1116 
 1117         FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));
 1118 
 1119         /* cmp TMP_REG1, dst asr #31. */
 1120         return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
 1121 
 1122     case SLJIT_AND:
 1123         return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
 1124             | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1125 
 1126     case SLJIT_OR:
 1127         SLJIT_ASSERT(!(flags & INV_IMM));
 1128         return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1129 
 1130     case SLJIT_XOR:
 1131         SLJIT_ASSERT(!(flags & INV_IMM));
 1132         return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
 1133 
 1134     case SLJIT_SHL:
 1135         EMIT_SHIFT_INS_AND_RETURN(0);
 1136 
 1137     case SLJIT_LSHR:
 1138         EMIT_SHIFT_INS_AND_RETURN(1);
 1139 
 1140     case SLJIT_ASHR:
 1141         EMIT_SHIFT_INS_AND_RETURN(2);
 1142     }
 1143 
 1144     SLJIT_UNREACHABLE();
 1145     return SLJIT_SUCCESS;
 1146 }
 1147 
 1148 #undef EMIT_SHIFT_INS_AND_RETURN
 1149 
 1150 /* Tests whether the immediate can be stored in the 12 bit imm field.
 1151    Returns with 0 if not possible. */
 1152 static sljit_uw get_imm(sljit_uw imm)
 1153 {
 1154     sljit_s32 rol;
 1155 
 1156     if (imm <= 0xff)
 1157         return SRC2_IMM | imm;
 1158 
 1159     if (!(imm & 0xff000000)) {
 1160         imm <<= 8;
 1161         rol = 8;
 1162     }
 1163     else {
 1164         imm = (imm << 24) | (imm >> 8);
 1165         rol = 0;
 1166     }
 1167 
 1168     if (!(imm & 0xff000000)) {
 1169         imm <<= 8;
 1170         rol += 4;
 1171     }
 1172 
 1173     if (!(imm & 0xf0000000)) {
 1174         imm <<= 4;
 1175         rol += 2;
 1176     }
 1177 
 1178     if (!(imm & 0xc0000000)) {
 1179         imm <<= 2;
 1180         rol += 1;
 1181     }
 1182 
 1183     if (!(imm & 0x00ffffff))
 1184         return SRC2_IMM | (imm >> 24) | (rol << 8);
 1185     else
 1186         return 0;
 1187 }
 1188 
 1189 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1190 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive)
 1191 {
 1192     sljit_uw mask;
 1193     sljit_uw imm1;
 1194     sljit_uw imm2;
 1195     sljit_s32 rol;
 1196 
 1197     /* Step1: Search a zero byte (8 continous zero bit). */
 1198     mask = 0xff000000;
 1199     rol = 8;
 1200     while(1) {
 1201         if (!(imm & mask)) {
 1202             /* Rol imm by rol. */
 1203             imm = (imm << rol) | (imm >> (32 - rol));
 1204             /* Calculate arm rol. */
 1205             rol = 4 + (rol >> 1);
 1206             break;
 1207         }
 1208         rol += 2;
 1209         mask >>= 2;
 1210         if (mask & 0x3) {
 1211             /* rol by 8. */
 1212             imm = (imm << 8) | (imm >> 24);
 1213             mask = 0xff00;
 1214             rol = 24;
 1215             while (1) {
 1216                 if (!(imm & mask)) {
 1217                     /* Rol imm by rol. */
 1218                     imm = (imm << rol) | (imm >> (32 - rol));
 1219                     /* Calculate arm rol. */
 1220                     rol = (rol >> 1) - 8;
 1221                     break;
 1222                 }
 1223                 rol += 2;
 1224                 mask >>= 2;
 1225                 if (mask & 0x3)
 1226                     return 0;
 1227             }
 1228             break;
 1229         }
 1230     }
 1231 
 1232     /* The low 8 bit must be zero. */
 1233     SLJIT_ASSERT(!(imm & 0xff));
 1234 
 1235     if (!(imm & 0xff000000)) {
 1236         imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
 1237         imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
 1238     }
 1239     else if (imm & 0xc0000000) {
 1240         imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
 1241         imm <<= 8;
 1242         rol += 4;
 1243 
 1244         if (!(imm & 0xff000000)) {
 1245             imm <<= 8;
 1246             rol += 4;
 1247         }
 1248 
 1249         if (!(imm & 0xf0000000)) {
 1250             imm <<= 4;
 1251             rol += 2;
 1252         }
 1253 
 1254         if (!(imm & 0xc0000000)) {
 1255             imm <<= 2;
 1256             rol += 1;
 1257         }
 1258 
 1259         if (!(imm & 0x00ffffff))
 1260             imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
 1261         else
 1262             return 0;
 1263     }
 1264     else {
 1265         if (!(imm & 0xf0000000)) {
 1266             imm <<= 4;
 1267             rol += 2;
 1268         }
 1269 
 1270         if (!(imm & 0xc0000000)) {
 1271             imm <<= 2;
 1272             rol += 1;
 1273         }
 1274 
 1275         imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
 1276         imm <<= 8;
 1277         rol += 4;
 1278 
 1279         if (!(imm & 0xf0000000)) {
 1280             imm <<= 4;
 1281             rol += 2;
 1282         }
 1283 
 1284         if (!(imm & 0xc0000000)) {
 1285             imm <<= 2;
 1286             rol += 1;
 1287         }
 1288 
 1289         if (!(imm & 0x00ffffff))
 1290             imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
 1291         else
 1292             return 0;
 1293     }
 1294 
 1295     FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1));
 1296     FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2));
 1297     return 1;
 1298 }
 1299 #endif
 1300 
 1301 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
 1302 {
 1303     sljit_uw tmp;
 1304 
 1305 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 1306     if (!(imm & ~0xffff))
 1307         return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
 1308 #endif
 1309 
 1310     /* Create imm by 1 inst. */
 1311     tmp = get_imm(imm);
 1312     if (tmp)
 1313         return push_inst(compiler, MOV | RD(reg) | tmp);
 1314 
 1315     tmp = get_imm(~imm);
 1316     if (tmp)
 1317         return push_inst(compiler, MVN | RD(reg) | tmp);
 1318 
 1319 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 1320     /* Create imm by 2 inst. */
 1321     FAIL_IF(generate_int(compiler, reg, imm, 1));
 1322     FAIL_IF(generate_int(compiler, reg, ~imm, 0));
 1323 
 1324     /* Load integer. */
 1325     return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
 1326 #else
 1327     FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
 1328     if (imm <= 0xffff)
 1329         return SLJIT_SUCCESS;
 1330     return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
 1331 #endif
 1332 }
 1333 
 1334 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
 1335     sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
 1336 {
 1337     sljit_uw imm, offset_reg;
 1338     sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);
 1339 
 1340     SLJIT_ASSERT (arg & SLJIT_MEM);
 1341     SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
 1342 
 1343     if ((arg & REG_MASK) == SLJIT_UNUSED) {
 1344         if (is_type1_transfer) {
 1345             FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff));
 1346             argw &= 0xfff;
 1347         }
 1348         else {
 1349             FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff));
 1350             argw &= 0xff;
 1351         }
 1352 
 1353         return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg,
 1354             is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
 1355     }
 1356 
 1357     if (arg & OFFS_REG_MASK) {
 1358         offset_reg = OFFS_REG(arg);
 1359         arg &= REG_MASK;
 1360         argw &= 0x3;
 1361 
 1362         if (argw != 0 && !is_type1_transfer) {
 1363             FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7)));
 1364             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
 1365         }
 1366 
 1367         /* Bit 25: RM is offset. */
 1368         return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
 1369             RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
 1370     }
 1371 
 1372     arg &= REG_MASK;
 1373 
 1374     if (is_type1_transfer) {
 1375         if (argw > 0xfff) {
 1376             imm = get_imm(argw & ~0xfff);
 1377             if (imm) {
 1378                 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
 1379                 argw = argw & 0xfff;
 1380                 arg = tmp_reg;
 1381             }
 1382         }
 1383         else if (argw < -0xfff) {
 1384             imm = get_imm(-argw & ~0xfff);
 1385             if (imm) {
 1386                 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
 1387                 argw = -(-argw & 0xfff);
 1388                 arg = tmp_reg;
 1389             }
 1390         }
 1391 
 1392         if (argw >= 0 && argw <= 0xfff)
 1393             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
 1394 
 1395         if (argw < 0 && argw >= -0xfff)
 1396             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, -argw));
 1397     }
 1398     else {
 1399         if (argw > 0xff) {
 1400             imm = get_imm(argw & ~0xff);
 1401             if (imm) {
 1402                 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
 1403                 argw = argw & 0xff;
 1404                 arg = tmp_reg;
 1405             }
 1406         }
 1407         else if (argw < -0xff) {
 1408             imm = get_imm(-argw & ~0xff);
 1409             if (imm) {
 1410                 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
 1411                 argw = -(-argw & 0xff);
 1412                 arg = tmp_reg;
 1413             }
 1414         }
 1415 
 1416         if (argw >= 0 && argw <= 0xff)
 1417             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, TYPE2_TRANSFER_IMM(argw)));
 1418 
 1419         if (argw < 0 && argw >= -0xff) {
 1420             argw = -argw;
 1421             return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, TYPE2_TRANSFER_IMM(argw)));
 1422         }
 1423     }
 1424 
 1425     FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 1426     return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
 1427         RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
 1428 }
 1429 
 1430 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
 1431     sljit_s32 dst, sljit_sw dstw,
 1432     sljit_s32 src1, sljit_sw src1w,
 1433     sljit_s32 src2, sljit_sw src2w)
 1434 {
 1435     /* src1 is reg or TMP_REG1
 1436        src2 is reg, TMP_REG2, or imm
 1437        result goes to TMP_REG2, so put result can use TMP_REG1. */
 1438 
 1439     /* We prefers register and simple consts. */
 1440     sljit_s32 dst_reg;
 1441     sljit_s32 src1_reg;
 1442     sljit_s32 src2_reg;
 1443     sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 1444 
 1445     /* Destination check. */
 1446     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
 1447         flags |= UNUSED_RETURN;
 1448 
 1449     SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
 1450 
 1451     src2_reg = 0;
 1452 
 1453     do {
 1454         if (!(inp_flags & ALLOW_IMM))
 1455             break;
 1456 
 1457         if (src2 & SLJIT_IMM) {
 1458             src2_reg = get_imm(src2w);
 1459             if (src2_reg)
 1460                 break;
 1461             if (inp_flags & ALLOW_INV_IMM) {
 1462                 src2_reg = get_imm(~src2w);
 1463                 if (src2_reg) {
 1464                     flags |= INV_IMM;
 1465                     break;
 1466                 }
 1467             }
 1468             if (GET_OPCODE(op) == SLJIT_ADD) {
 1469                 src2_reg = get_imm(-src2w);
 1470                 if (src2_reg) {
 1471                     op = SLJIT_SUB | GET_ALL_FLAGS(op);
 1472                     break;
 1473                 }
 1474             }
 1475             if (GET_OPCODE(op) == SLJIT_SUB) {
 1476                 src2_reg = get_imm(-src2w);
 1477                 if (src2_reg) {
 1478                     op = SLJIT_ADD | GET_ALL_FLAGS(op);
 1479                     break;
 1480                 }
 1481             }
 1482         }
 1483 
 1484         if (src1 & SLJIT_IMM) {
 1485             src2_reg = get_imm(src1w);
 1486             if (src2_reg) {
 1487                 flags |= ARGS_SWAPPED;
 1488                 src1 = src2;
 1489                 src1w = src2w;
 1490                 break;
 1491             }
 1492             if (inp_flags & ALLOW_INV_IMM) {
 1493                 src2_reg = get_imm(~src1w);
 1494                 if (src2_reg) {
 1495                     flags |= ARGS_SWAPPED | INV_IMM;
 1496                     src1 = src2;
 1497                     src1w = src2w;
 1498                     break;
 1499                 }
 1500             }
 1501             if (GET_OPCODE(op) == SLJIT_ADD) {
 1502                 src2_reg = get_imm(-src1w);
 1503                 if (src2_reg) {
 1504                     /* Note: add is commutative operation. */
 1505                     src1 = src2;
 1506                     src1w = src2w;
 1507                     op = SLJIT_SUB | GET_ALL_FLAGS(op);
 1508                     break;
 1509                 }
 1510             }
 1511         }
 1512     } while(0);
 1513 
 1514     /* Source 1. */
 1515     if (FAST_IS_REG(src1))
 1516         src1_reg = src1;
 1517     else if (src1 & SLJIT_MEM) {
 1518         FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
 1519         src1_reg = TMP_REG1;
 1520     }
 1521     else {
 1522         FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
 1523         src1_reg = TMP_REG1;
 1524     }
 1525 
 1526     /* Destination. */
 1527     dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 1528 
 1529     if (op <= SLJIT_MOV_P) {
 1530         if (dst & SLJIT_MEM) {
 1531             if (inp_flags & BYTE_SIZE)
 1532                 inp_flags &= ~SIGNED;
 1533 
 1534             if (FAST_IS_REG(src2))
 1535                 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
 1536         }
 1537 
 1538         if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
 1539             flags |= MOVE_REG_CONV;
 1540     }
 1541 
 1542     /* Source 2. */
 1543     if (src2_reg == 0) {
 1544         src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
 1545 
 1546         if (FAST_IS_REG(src2))
 1547             src2_reg = src2;
 1548         else if (src2 & SLJIT_MEM)
 1549             FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
 1550         else
 1551             FAIL_IF(load_immediate(compiler, src2_reg, src2w));
 1552     }
 1553 
 1554     FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg));
 1555 
 1556     if (!(dst & SLJIT_MEM))
 1557         return SLJIT_SUCCESS;
 1558 
 1559     return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
 1560 }
 1561 
 1562 #ifdef __cplusplus
 1563 extern "C" {
 1564 #endif
 1565 
 1566 #if defined(__GNUC__)
 1567 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
 1568 extern int __aeabi_idivmod(int numerator, int denominator);
 1569 #else
 1570 #error "Software divmod functions are needed"
 1571 #endif
 1572 
 1573 #ifdef __cplusplus
 1574 }
 1575 #endif
 1576 
 1577 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 1578 {
 1579     sljit_sw saved_reg_list[3];
 1580     sljit_sw saved_reg_count;
 1581 
 1582     CHECK_ERROR();
 1583     CHECK(check_sljit_emit_op0(compiler, op));
 1584 
 1585     op = GET_OPCODE(op);
 1586     switch (op) {
 1587     case SLJIT_BREAKPOINT:
 1588         FAIL_IF(push_inst(compiler, BKPT));
 1589         break;
 1590     case SLJIT_NOP:
 1591         FAIL_IF(push_inst(compiler, NOP));
 1592         break;
 1593     case SLJIT_LMUL_UW:
 1594     case SLJIT_LMUL_SW:
 1595         return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
 1596             | (reg_map[SLJIT_R1] << 16)
 1597             | (reg_map[SLJIT_R0] << 12)
 1598             | (reg_map[SLJIT_R0] << 8)
 1599             | reg_map[SLJIT_R1]);
 1600     case SLJIT_DIVMOD_UW:
 1601     case SLJIT_DIVMOD_SW:
 1602     case SLJIT_DIV_UW:
 1603     case SLJIT_DIV_SW:
 1604         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
 1605         SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
 1606 
 1607         saved_reg_count = 0;
 1608         if (compiler->scratches >= 4)
 1609             saved_reg_list[saved_reg_count++] = 3;
 1610         if (compiler->scratches >= 3)
 1611             saved_reg_list[saved_reg_count++] = 2;
 1612         if (op >= SLJIT_DIV_UW)
 1613             saved_reg_list[saved_reg_count++] = 1;
 1614 
 1615         if (saved_reg_count > 0) {
 1616             FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8)
 1617                         | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
 1618             if (saved_reg_count >= 2) {
 1619                 SLJIT_ASSERT(saved_reg_list[1] < 8);
 1620                 FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
 1621             }
 1622             if (saved_reg_count >= 3) {
 1623                 SLJIT_ASSERT(saved_reg_list[2] < 8);
 1624                 FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
 1625             }
 1626         }
 1627 
 1628 #if defined(__GNUC__)
 1629         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
 1630             ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
 1631 #else
 1632 #error "Software divmod functions are needed"
 1633 #endif
 1634 
 1635         if (saved_reg_count > 0) {
 1636             if (saved_reg_count >= 3) {
 1637                 SLJIT_ASSERT(saved_reg_list[2] < 8);
 1638                 FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
 1639             }
 1640             if (saved_reg_count >= 2) {
 1641                 SLJIT_ASSERT(saved_reg_list[1] < 8);
 1642                 FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
 1643             }
 1644             return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8)
 1645                         | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
 1646         }
 1647         return SLJIT_SUCCESS;
 1648     }
 1649 
 1650     return SLJIT_SUCCESS;
 1651 }
 1652 
 1653 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 1654     sljit_s32 dst, sljit_sw dstw,
 1655     sljit_s32 src, sljit_sw srcw)
 1656 {
 1657     CHECK_ERROR();
 1658     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
 1659     ADJUST_LOCAL_OFFSET(dst, dstw);
 1660     ADJUST_LOCAL_OFFSET(src, srcw);
 1661 
 1662     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
 1663 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 1664         if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
 1665             return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
 1666 #endif
 1667         return SLJIT_SUCCESS;
 1668     }
 1669 
 1670     switch (GET_OPCODE(op)) {
 1671     case SLJIT_MOV:
 1672     case SLJIT_MOV_U32:
 1673     case SLJIT_MOV_S32:
 1674     case SLJIT_MOV_P:
 1675         return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
 1676 
 1677     case SLJIT_MOV_U8:
 1678         return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
 1679 
 1680     case SLJIT_MOV_S8:
 1681         return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
 1682 
 1683     case SLJIT_MOV_U16:
 1684         return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
 1685 
 1686     case SLJIT_MOV_S16:
 1687         return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
 1688 
 1689     case SLJIT_NOT:
 1690         return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
 1691 
 1692     case SLJIT_NEG:
 1693 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 1694             || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 1695         compiler->skip_checks = 1;
 1696 #endif
 1697         return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
 1698 
 1699     case SLJIT_CLZ:
 1700         return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
 1701     }
 1702 
 1703     return SLJIT_SUCCESS;
 1704 }
 1705 
 1706 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 1707     sljit_s32 dst, sljit_sw dstw,
 1708     sljit_s32 src1, sljit_sw src1w,
 1709     sljit_s32 src2, sljit_sw src2w)
 1710 {
 1711     CHECK_ERROR();
 1712     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1713     ADJUST_LOCAL_OFFSET(dst, dstw);
 1714     ADJUST_LOCAL_OFFSET(src1, src1w);
 1715     ADJUST_LOCAL_OFFSET(src2, src2w);
 1716 
 1717     if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
 1718         return SLJIT_SUCCESS;
 1719 
 1720     switch (GET_OPCODE(op)) {
 1721     case SLJIT_ADD:
 1722     case SLJIT_ADDC:
 1723     case SLJIT_SUB:
 1724     case SLJIT_SUBC:
 1725     case SLJIT_OR:
 1726     case SLJIT_XOR:
 1727         return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
 1728 
 1729     case SLJIT_MUL:
 1730         return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
 1731 
 1732     case SLJIT_AND:
 1733         return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
 1734 
 1735     case SLJIT_SHL:
 1736     case SLJIT_LSHR:
 1737     case SLJIT_ASHR:
 1738         if (src2 & SLJIT_IMM) {
 1739             compiler->shift_imm = src2w & 0x1f;
 1740             return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
 1741         }
 1742         else {
 1743             compiler->shift_imm = 0x20;
 1744             return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
 1745         }
 1746     }
 1747 
 1748     return SLJIT_SUCCESS;
 1749 }
 1750 
 1751 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 1752 {
 1753     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 1754     return reg_map[reg];
 1755 }
 1756 
 1757 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 1758 {
 1759     CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
 1760     return (freg_map[reg] << 1);
 1761 }
 1762 
 1763 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
 1764     void *instruction, sljit_s32 size)
 1765 {
 1766     CHECK_ERROR();
 1767     CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 1768 
 1769     return push_inst(compiler, *(sljit_uw*)instruction);
 1770 }
 1771 
 1772 /* --------------------------------------------------------------------- */
 1773 /*  Floating point operators                                             */
 1774 /* --------------------------------------------------------------------- */
 1775 
 1776 
 1777 #define FPU_LOAD (1 << 20)
 1778 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
 1779     ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs))
 1780 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
 1781     ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16))
 1782 
 1783 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 1784 {
 1785     sljit_uw imm;
 1786     sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
 1787 
 1788     SLJIT_ASSERT(arg & SLJIT_MEM);
 1789     arg &= ~SLJIT_MEM;
 1790 
 1791     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
 1792         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
 1793         arg = TMP_REG2;
 1794         argw = 0;
 1795     }
 1796 
 1797     /* Fast loads and stores. */
 1798     if (arg) {
 1799         if (!(argw & ~0x3fc))
 1800             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
 1801         if (!(-argw & ~0x3fc))
 1802             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
 1803 
 1804         imm = get_imm(argw & ~0x3fc);
 1805         if (imm) {
 1806             FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
 1807             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
 1808         }
 1809         imm = get_imm(-argw & ~0x3fc);
 1810         if (imm) {
 1811             argw = -argw;
 1812             FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
 1813             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
 1814         }
 1815     }
 1816 
 1817     if (arg) {
 1818         FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
 1819         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
 1820     }
 1821     else
 1822         FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
 1823 
 1824     return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
 1825 }
 1826 
 1827 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
 1828     sljit_s32 dst, sljit_sw dstw,
 1829     sljit_s32 src, sljit_sw srcw)
 1830 {
 1831     op ^= SLJIT_F32_OP;
 1832 
 1833     if (src & SLJIT_MEM) {
 1834         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
 1835         src = TMP_FREG1;
 1836     }
 1837 
 1838     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
 1839 
 1840     if (FAST_IS_REG(dst))
 1841         return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16));
 1842 
 1843     /* Store the integer value from a VFP register. */
 1844     return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
 1845 }
 1846 
 1847 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
 1848     sljit_s32 dst, sljit_sw dstw,
 1849     sljit_s32 src, sljit_sw srcw)
 1850 {
 1851     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1852 
 1853     op ^= SLJIT_F32_OP;
 1854 
 1855     if (FAST_IS_REG(src))
 1856         FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16)));
 1857     else if (src & SLJIT_MEM) {
 1858         /* Load the integer value into a VFP register. */
 1859         FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
 1860     }
 1861     else {
 1862         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 1863         FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16)));
 1864     }
 1865 
 1866     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0)));
 1867 
 1868     if (dst & SLJIT_MEM)
 1869         return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw);
 1870     return SLJIT_SUCCESS;
 1871 }
 1872 
 1873 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
 1874     sljit_s32 src1, sljit_sw src1w,
 1875     sljit_s32 src2, sljit_sw src2w)
 1876 {
 1877     op ^= SLJIT_F32_OP;
 1878 
 1879     if (src1 & SLJIT_MEM) {
 1880         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
 1881         src1 = TMP_FREG1;
 1882     }
 1883 
 1884     if (src2 & SLJIT_MEM) {
 1885         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
 1886         src2 = TMP_FREG2;
 1887     }
 1888 
 1889     FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0)));
 1890     return push_inst(compiler, VMRS);
 1891 }
 1892 
 1893 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 1894     sljit_s32 dst, sljit_sw dstw,
 1895     sljit_s32 src, sljit_sw srcw)
 1896 {
 1897     sljit_s32 dst_r;
 1898 
 1899     CHECK_ERROR();
 1900 
 1901     SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
 1902     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 1903 
 1904     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1905 
 1906     if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
 1907         op ^= SLJIT_F32_OP;
 1908 
 1909     if (src & SLJIT_MEM) {
 1910         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw));
 1911         src = dst_r;
 1912     }
 1913 
 1914     switch (GET_OPCODE(op)) {
 1915     case SLJIT_MOV_F64:
 1916         if (src != dst_r) {
 1917             if (dst_r != TMP_FREG1)
 1918                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1919             else
 1920                 dst_r = src;
 1921         }
 1922         break;
 1923     case SLJIT_NEG_F64:
 1924         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1925         break;
 1926     case SLJIT_ABS_F64:
 1927         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1928         break;
 1929     case SLJIT_CONV_F64_FROM_F32:
 1930         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
 1931         op ^= SLJIT_F32_OP;
 1932         break;
 1933     }
 1934 
 1935     if (dst & SLJIT_MEM)
 1936         return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw);
 1937     return SLJIT_SUCCESS;
 1938 }
 1939 
 1940 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 1941     sljit_s32 dst, sljit_sw dstw,
 1942     sljit_s32 src1, sljit_sw src1w,
 1943     sljit_s32 src2, sljit_sw src2w)
 1944 {
 1945     sljit_s32 dst_r;
 1946 
 1947     CHECK_ERROR();
 1948     CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
 1949     ADJUST_LOCAL_OFFSET(dst, dstw);
 1950     ADJUST_LOCAL_OFFSET(src1, src1w);
 1951     ADJUST_LOCAL_OFFSET(src2, src2w);
 1952 
 1953     op ^= SLJIT_F32_OP;
 1954 
 1955     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 1956 
 1957     if (src2 & SLJIT_MEM) {
 1958         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
 1959         src2 = TMP_FREG2;
 1960     }
 1961 
 1962     if (src1 & SLJIT_MEM) {
 1963         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
 1964         src1 = TMP_FREG1;
 1965     }
 1966 
 1967     switch (GET_OPCODE(op)) {
 1968     case SLJIT_ADD_F64:
 1969         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 1970         break;
 1971 
 1972     case SLJIT_SUB_F64:
 1973         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 1974         break;
 1975 
 1976     case SLJIT_MUL_F64:
 1977         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 1978         break;
 1979 
 1980     case SLJIT_DIV_F64:
 1981         FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
 1982         break;
 1983     }
 1984 
 1985     if (dst_r == TMP_FREG1)
 1986         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw));
 1987 
 1988     return SLJIT_SUCCESS;
 1989 }
 1990 
 1991 #undef FPU_LOAD
 1992 #undef EMIT_FPU_DATA_TRANSFER
 1993 
 1994 /* --------------------------------------------------------------------- */
 1995 /*  Other instructions                                                   */
 1996 /* --------------------------------------------------------------------- */
 1997 
 1998 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 1999 {
 2000     CHECK_ERROR();
 2001     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
 2002     ADJUST_LOCAL_OFFSET(dst, dstw);
 2003 
 2004     SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
 2005 
 2006     if (FAST_IS_REG(dst))
 2007         return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
 2008 
 2009     /* Memory. */
 2010     return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
 2011 }
 2012 
 2013 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
 2014 {
 2015     CHECK_ERROR();
 2016     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
 2017     ADJUST_LOCAL_OFFSET(src, srcw);
 2018 
 2019     SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
 2020 
 2021     if (FAST_IS_REG(src))
 2022         FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
 2023     else
 2024         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
 2025 
 2026     return push_inst(compiler, BX | RM(TMP_REG2));
 2027 }
 2028 
 2029 /* --------------------------------------------------------------------- */
 2030 /*  Conditional instructions                                             */
 2031 /* --------------------------------------------------------------------- */
 2032 
 2033 static sljit_uw get_cc(sljit_s32 type)
 2034 {
 2035     switch (type) {
 2036     case SLJIT_EQUAL:
 2037     case SLJIT_MUL_NOT_OVERFLOW:
 2038     case SLJIT_EQUAL_F64:
 2039         return 0x00000000;
 2040 
 2041     case SLJIT_NOT_EQUAL:
 2042     case SLJIT_MUL_OVERFLOW:
 2043     case SLJIT_NOT_EQUAL_F64:
 2044         return 0x10000000;
 2045 
 2046     case SLJIT_LESS:
 2047     case SLJIT_LESS_F64:
 2048         return 0x30000000;
 2049 
 2050     case SLJIT_GREATER_EQUAL:
 2051     case SLJIT_GREATER_EQUAL_F64:
 2052         return 0x20000000;
 2053 
 2054     case SLJIT_GREATER:
 2055     case SLJIT_GREATER_F64:
 2056         return 0x80000000;
 2057 
 2058     case SLJIT_LESS_EQUAL:
 2059     case SLJIT_LESS_EQUAL_F64:
 2060         return 0x90000000;
 2061 
 2062     case SLJIT_SIG_LESS:
 2063         return 0xb0000000;
 2064 
 2065     case SLJIT_SIG_GREATER_EQUAL:
 2066         return 0xa0000000;
 2067 
 2068     case SLJIT_SIG_GREATER:
 2069         return 0xc0000000;
 2070 
 2071     case SLJIT_SIG_LESS_EQUAL:
 2072         return 0xd0000000;
 2073 
 2074     case SLJIT_OVERFLOW:
 2075     case SLJIT_UNORDERED_F64:
 2076         return 0x60000000;
 2077 
 2078     case SLJIT_NOT_OVERFLOW:
 2079     case SLJIT_ORDERED_F64:
 2080         return 0x70000000;
 2081 
 2082     default:
 2083         SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
 2084         return 0xe0000000;
 2085     }
 2086 }
 2087 
 2088 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2089 {
 2090     struct sljit_label *label;
 2091 
 2092     CHECK_ERROR_PTR();
 2093     CHECK_PTR(check_sljit_emit_label(compiler));
 2094 
 2095     if (compiler->last_label && compiler->last_label->size == compiler->size)
 2096         return compiler->last_label;
 2097 
 2098     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2099     PTR_FAIL_IF(!label);
 2100     set_label(label, compiler);
 2101     return label;
 2102 }
 2103 
 2104 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 2105 {
 2106     struct sljit_jump *jump;
 2107 
 2108     CHECK_ERROR_PTR();
 2109     CHECK_PTR(check_sljit_emit_jump(compiler, type));
 2110 
 2111     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2112     PTR_FAIL_IF(!jump);
 2113     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2114     type &= 0xff;
 2115 
 2116     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
 2117 
 2118 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2119     if (type >= SLJIT_FAST_CALL)
 2120         PTR_FAIL_IF(prepare_blx(compiler));
 2121     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
 2122         type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
 2123 
 2124     if (jump->flags & SLJIT_REWRITABLE_JUMP) {
 2125         jump->addr = compiler->size;
 2126         compiler->patches++;
 2127     }
 2128 
 2129     if (type >= SLJIT_FAST_CALL) {
 2130         jump->flags |= IS_BL;
 2131         PTR_FAIL_IF(emit_blx(compiler));
 2132     }
 2133 
 2134     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
 2135         jump->addr = compiler->size;
 2136 #else
 2137     if (type >= SLJIT_FAST_CALL)
 2138         jump->flags |= IS_BL;
 2139     PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
 2140     PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
 2141     jump->addr = compiler->size;
 2142 #endif
 2143     return jump;
 2144 }
 2145 
 2146 #ifdef __SOFTFP__
 2147 
 2148 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
 2149 {
 2150     sljit_s32 stack_offset = 0;
 2151     sljit_s32 arg_count = 0;
 2152     sljit_s32 word_arg_offset = 0;
 2153     sljit_s32 float_arg_count = 0;
 2154     sljit_s32 types = 0;
 2155     sljit_s32 src_offset = 4 * sizeof(sljit_sw);
 2156     sljit_u8 offsets[4];
 2157 
 2158     if (src && FAST_IS_REG(*src))
 2159         src_offset = reg_map[*src] * sizeof(sljit_sw);
 2160 
 2161     arg_types >>= SLJIT_DEF_SHIFT;
 2162 
 2163     while (arg_types) {
 2164         types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
 2165 
 2166         switch (arg_types & SLJIT_DEF_MASK) {
 2167         case SLJIT_ARG_TYPE_F32:
 2168             offsets[arg_count] = (sljit_u8)stack_offset;
 2169             stack_offset += sizeof(sljit_f32);
 2170             arg_count++;
 2171             float_arg_count++;
 2172             break;
 2173         case SLJIT_ARG_TYPE_F64:
 2174             if (stack_offset & 0x7)
 2175                 stack_offset += sizeof(sljit_sw);
 2176             offsets[arg_count] = (sljit_u8)stack_offset;
 2177             stack_offset += sizeof(sljit_f64);
 2178             arg_count++;
 2179             float_arg_count++;
 2180             break;
 2181         default:
 2182             offsets[arg_count] = (sljit_u8)stack_offset;
 2183             stack_offset += sizeof(sljit_sw);
 2184             arg_count++;
 2185             word_arg_offset += sizeof(sljit_sw);
 2186             break;
 2187         }
 2188 
 2189         arg_types >>= SLJIT_DEF_SHIFT;
 2190     }
 2191 
 2192     if (stack_offset > 16)
 2193         FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7)));
 2194 
 2195     /* Process arguments in reversed direction. */
 2196     while (types) {
 2197         switch (types & SLJIT_DEF_MASK) {
 2198         case SLJIT_ARG_TYPE_F32:
 2199             arg_count--;
 2200             float_arg_count--;
 2201             stack_offset = offsets[arg_count];
 2202 
 2203             if (stack_offset < 16) {
 2204                 if (src_offset == stack_offset) {
 2205                     FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2206                     *src = TMP_REG1;
 2207                 }
 2208                 FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10)));
 2209             } else
 2210                 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
 2211             break;
 2212         case SLJIT_ARG_TYPE_F64:
 2213             arg_count--;
 2214             float_arg_count--;
 2215             stack_offset = offsets[arg_count];
 2216 
 2217             SLJIT_ASSERT((stack_offset & 0x7) == 0);
 2218 
 2219             if (stack_offset < 16) {
 2220                 if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) {
 2221                     FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2222                     *src = TMP_REG1;
 2223                 }
 2224                 FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count));
 2225             } else
 2226                 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
 2227             break;
 2228         default:
 2229             arg_count--;
 2230             word_arg_offset -= sizeof(sljit_sw);
 2231             stack_offset = offsets[arg_count];
 2232 
 2233             SLJIT_ASSERT(stack_offset >= word_arg_offset);
 2234 
 2235             if (stack_offset != word_arg_offset) {
 2236                 if (stack_offset < 16) {
 2237                     if (src_offset == stack_offset) {
 2238                         FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
 2239                         *src = TMP_REG1;
 2240                     }
 2241                     else if (src_offset == word_arg_offset) {
 2242                         *src = 1 + (stack_offset >> 2);
 2243                         src_offset = stack_offset;
 2244                     }
 2245                     FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2)));
 2246                 } else
 2247                     FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16)));
 2248             }
 2249             break;
 2250         }
 2251 
 2252         types >>= SLJIT_DEF_SHIFT;
 2253     }
 2254 
 2255     return SLJIT_SUCCESS;
 2256 }
 2257 
 2258 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
 2259 {
 2260     sljit_s32 stack_size = 0;
 2261 
 2262     if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32)
 2263         FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
 2264     if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64)
 2265         FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
 2266 
 2267     arg_types >>= SLJIT_DEF_SHIFT;
 2268 
 2269     while (arg_types) {
 2270         switch (arg_types & SLJIT_DEF_MASK) {
 2271         case SLJIT_ARG_TYPE_F32:
 2272             stack_size += sizeof(sljit_f32);
 2273             break;
 2274         case SLJIT_ARG_TYPE_F64:
 2275             if (stack_size & 0x7)
 2276                 stack_size += sizeof(sljit_sw);
 2277             stack_size += sizeof(sljit_f64);
 2278             break;
 2279         default:
 2280             stack_size += sizeof(sljit_sw);
 2281             break;
 2282         }
 2283 
 2284         arg_types >>= SLJIT_DEF_SHIFT;
 2285     }
 2286 
 2287     if (stack_size <= 16)
 2288         return SLJIT_SUCCESS;
 2289 
 2290     return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7));
 2291 }
 2292 
 2293 #else /* !__SOFTFP__ */
 2294 
 2295 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
 2296 {
 2297     sljit_u32 remap = 0;
 2298     sljit_u32 offset = 0;
 2299     sljit_u32 new_offset, mask;
 2300 
 2301     /* Remove return value. */
 2302     arg_types >>= SLJIT_DEF_SHIFT;
 2303 
 2304     while (arg_types) {
 2305         if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) {
 2306             new_offset = 0;
 2307             mask = 1;
 2308 
 2309             while (remap & mask) {
 2310                 new_offset++;
 2311                 mask <<= 1;
 2312             }
 2313             remap |= mask;
 2314 
 2315             if (offset != new_offset)
 2316                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
 2317                     0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0)));
 2318 
 2319             offset += 2;
 2320         }
 2321         else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) {
 2322             new_offset = 0;
 2323             mask = 3;
 2324 
 2325             while (remap & mask) {
 2326                 new_offset += 2;
 2327                 mask <<= 2;
 2328             }
 2329             remap |= mask;
 2330 
 2331             if (offset != new_offset)
 2332                 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0)));
 2333 
 2334             offset += 2;
 2335         }
 2336         arg_types >>= SLJIT_DEF_SHIFT;
 2337     }
 2338 
 2339     return SLJIT_SUCCESS;
 2340 }
 2341 
 2342 #endif /* __SOFTFP__ */
 2343 
 2344 #undef EMIT_FPU_OPERATION
 2345 
 2346 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 2347     sljit_s32 arg_types)
 2348 {
 2349 #ifdef __SOFTFP__
 2350     struct sljit_jump *jump;
 2351 #endif
 2352 
 2353     CHECK_ERROR_PTR();
 2354     CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 2355 
 2356 #ifdef __SOFTFP__
 2357     PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL));
 2358 
 2359 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2360         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2361     compiler->skip_checks = 1;
 2362 #endif
 2363 
 2364     jump = sljit_emit_jump(compiler, type);
 2365     PTR_FAIL_IF(jump == NULL);
 2366 
 2367     PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
 2368     return jump;
 2369 #else /* !__SOFTFP__ */
 2370     PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
 2371 
 2372 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2373         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2374     compiler->skip_checks = 1;
 2375 #endif
 2376 
 2377     return sljit_emit_jump(compiler, type);
 2378 #endif /* __SOFTFP__ */
 2379 }
 2380 
 2381 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 2382 {
 2383     struct sljit_jump *jump;
 2384 
 2385     CHECK_ERROR();
 2386     CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 2387     ADJUST_LOCAL_OFFSET(src, srcw);
 2388 
 2389     SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
 2390 
 2391     if (!(src & SLJIT_IMM)) {
 2392         if (FAST_IS_REG(src)) {
 2393             SLJIT_ASSERT(reg_map[src] != 14);
 2394             return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
 2395         }
 2396 
 2397         SLJIT_ASSERT(src & SLJIT_MEM);
 2398         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
 2399         return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
 2400     }
 2401 
 2402     /* These jumps are converted to jump/call instructions when possible. */
 2403     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2404     FAIL_IF(!jump);
 2405     set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
 2406     jump->u.target = srcw;
 2407 
 2408 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2409     if (type >= SLJIT_FAST_CALL)
 2410         FAIL_IF(prepare_blx(compiler));
 2411     FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
 2412     if (type >= SLJIT_FAST_CALL)
 2413         FAIL_IF(emit_blx(compiler));
 2414 #else
 2415     FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
 2416     FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
 2417 #endif
 2418     jump->addr = compiler->size;
 2419     return SLJIT_SUCCESS;
 2420 }
 2421 
 2422 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
 2423     sljit_s32 arg_types,
 2424     sljit_s32 src, sljit_sw srcw)
 2425 {
 2426     CHECK_ERROR();
 2427     CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 2428 
 2429 #ifdef __SOFTFP__
 2430     if (src & SLJIT_MEM) {
 2431         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
 2432         src = TMP_REG1;
 2433     }
 2434 
 2435     FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src));
 2436 
 2437 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2438         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2439     compiler->skip_checks = 1;
 2440 #endif
 2441 
 2442     FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
 2443 
 2444     return softfloat_post_call_with_args(compiler, arg_types);
 2445 #else /* !__SOFTFP__ */
 2446     FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
 2447 
 2448 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
 2449         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
 2450     compiler->skip_checks = 1;
 2451 #endif
 2452 
 2453     return sljit_emit_ijump(compiler, type, src, srcw);
 2454 #endif /* __SOFTFP__ */
 2455 }
 2456 
 2457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
 2458     sljit_s32 dst, sljit_sw dstw,
 2459     sljit_s32 type)
 2460 {
 2461     sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
 2462     sljit_uw cc, ins;
 2463 
 2464     CHECK_ERROR();
 2465     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 2466     ADJUST_LOCAL_OFFSET(dst, dstw);
 2467 
 2468     op = GET_OPCODE(op);
 2469     cc = get_cc(type & 0xff);
 2470     dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 2471 
 2472     if (op < SLJIT_ADD) {
 2473         FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
 2474         FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
 2475         if (dst & SLJIT_MEM)
 2476             return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
 2477         return SLJIT_SUCCESS;
 2478     }
 2479 
 2480     ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
 2481 
 2482     if (dst & SLJIT_MEM)
 2483         FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
 2484 
 2485     FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
 2486 
 2487     if (op == SLJIT_AND)
 2488         FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
 2489 
 2490     if (dst & SLJIT_MEM)
 2491         FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
 2492 
 2493     if (flags & SLJIT_SET_Z)
 2494         return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
 2495     return SLJIT_SUCCESS;
 2496 }
 2497 
 2498 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
 2499     sljit_s32 dst_reg,
 2500     sljit_s32 src, sljit_sw srcw)
 2501 {
 2502     sljit_uw cc, tmp;
 2503 
 2504     CHECK_ERROR();
 2505     CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 2506 
 2507     dst_reg &= ~SLJIT_I32_OP;
 2508 
 2509     cc = get_cc(type & 0xff);
 2510 
 2511     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 2512         tmp = get_imm(srcw);
 2513         if (tmp)
 2514             return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
 2515 
 2516         tmp = get_imm(~srcw);
 2517         if (tmp)
 2518             return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
 2519 
 2520 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 2521         tmp = (sljit_uw) srcw;
 2522         FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
 2523         if (tmp <= 0xffff)
 2524             return SLJIT_SUCCESS;
 2525         return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
 2526 #else
 2527         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 2528         src = TMP_REG1;
 2529 #endif
 2530     }
 2531 
 2532     return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
 2533 }
 2534 
 2535 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 2536     sljit_s32 reg,
 2537     sljit_s32 mem, sljit_sw memw)
 2538 {
 2539     sljit_s32 flags;
 2540     sljit_uw is_type1_transfer, inst;
 2541 
 2542     CHECK_ERROR();
 2543     CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
 2544 
 2545     is_type1_transfer = 1;
 2546 
 2547     switch (type & 0xff) {
 2548     case SLJIT_MOV:
 2549     case SLJIT_MOV_U32:
 2550     case SLJIT_MOV_S32:
 2551     case SLJIT_MOV_P:
 2552         flags = WORD_SIZE;
 2553         break;
 2554     case SLJIT_MOV_U8:
 2555         flags = BYTE_SIZE;
 2556         break;
 2557     case SLJIT_MOV_S8:
 2558         if (!(type & SLJIT_MEM_STORE))
 2559             is_type1_transfer = 0;
 2560         flags = BYTE_SIZE | SIGNED;
 2561         break;
 2562     case SLJIT_MOV_U16:
 2563         is_type1_transfer = 0;
 2564         flags = HALF_SIZE;
 2565         break;
 2566     case SLJIT_MOV_S16:
 2567         is_type1_transfer = 0;
 2568         flags = HALF_SIZE | SIGNED;
 2569         break;
 2570     default:
 2571         SLJIT_UNREACHABLE();
 2572         flags = WORD_SIZE;
 2573         break;
 2574     }
 2575 
 2576     if (!(type & SLJIT_MEM_STORE))
 2577         flags |= LOAD_DATA;
 2578 
 2579     SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
 2580 
 2581     if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
 2582         if (!is_type1_transfer && memw != 0)
 2583             return SLJIT_ERR_UNSUPPORTED;
 2584     }
 2585     else {
 2586         if (is_type1_transfer) {
 2587             if (memw > 4095 && memw < -4095)
 2588                 return SLJIT_ERR_UNSUPPORTED;
 2589         }
 2590         else {
 2591             if (memw > 255 && memw < -255)
 2592                 return SLJIT_ERR_UNSUPPORTED;
 2593         }
 2594     }
 2595 
 2596     if (type & SLJIT_MEM_SUPP)
 2597         return SLJIT_SUCCESS;
 2598 
 2599     if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
 2600         memw &= 0x3;
 2601 
 2602         inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7));
 2603 
 2604         if (is_type1_transfer)
 2605             inst |= (1 << 25);
 2606 
 2607         if (type & SLJIT_MEM_PRE)
 2608             inst |= (1 << 21);
 2609         else
 2610             inst ^= (1 << 24);
 2611 
 2612         return push_inst(compiler, inst);
 2613     }
 2614 
 2615     inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
 2616 
 2617     if (type & SLJIT_MEM_PRE)
 2618         inst |= (1 << 21);
 2619     else
 2620         inst ^= (1 << 24);
 2621 
 2622     if (is_type1_transfer) {
 2623         if (memw >= 0)
 2624             inst |= (1 << 23);
 2625         else
 2626             memw = -memw;
 2627 
 2628         return push_inst(compiler, inst | memw);
 2629     }
 2630 
 2631     if (memw >= 0)
 2632         inst |= (1 << 23);
 2633     else
 2634         memw = -memw;
 2635 
 2636     return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw));
 2637 }
 2638 
 2639 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 2640 {
 2641     struct sljit_const *const_;
 2642     sljit_s32 reg;
 2643 
 2644     CHECK_ERROR_PTR();
 2645     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 2646     ADJUST_LOCAL_OFFSET(dst, dstw);
 2647 
 2648     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2649     PTR_FAIL_IF(!const_);
 2650 
 2651     reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 2652 
 2653 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 2654     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), init_value));
 2655     compiler->patches++;
 2656 #else
 2657     PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
 2658 #endif
 2659     set_const(const_, compiler);
 2660 
 2661     if (dst & SLJIT_MEM)
 2662         PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
 2663     return const_;
 2664 }
 2665 
 2666 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 2667 {
 2668     inline_set_jump_addr(addr, executable_offset, new_target, 1);
 2669 }
 2670 
 2671 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 2672 {
 2673     inline_set_const(addr, executable_offset, new_constant, 1);
 2674 }