w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pixman-arm-simd-asm.h
Go to the documentation of this file.
1 /*
2  * Copyright © 2012 Raspberry Pi Foundation
3  * Copyright © 2012 RISC OS Open Ltd
4  *
5  * Permission to use, copy, modify, distribute, and sell this software and its
6  * documentation for any purpose is hereby granted without fee, provided that
7  * the above copyright notice appear in all copies and that both that
8  * copyright notice and this permission notice appear in supporting
9  * documentation, and that the name of the copyright holders not be used in
10  * advertising or publicity pertaining to distribution of the software without
11  * specific, written prior permission. The copyright holders make no
12  * representations about the suitability of this software for any purpose. It
13  * is provided "as is" without express or implied warranty.
14  *
15  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22  * SOFTWARE.
23  *
24  * Author: Ben Avison (bavison@riscosopen.org)
25  *
26  */
27 
28 /*
29  * Because the alignment of pixel data to cachelines, and even the number of
30  * cachelines per row can vary from row to row, and because of the need to
31  * preload each scanline once and only once, this prefetch strategy treats
32  * each row of pixels independently. When a pixel row is long enough, there
33  * are three distinct phases of prefetch:
34  * * an inner loop section, where each time a cacheline of data is
35  * processed, another cacheline is preloaded (the exact distance ahead is
36  * determined empirically using profiling results from lowlevel-blt-bench)
37  * * a leading section, where enough cachelines are preloaded to ensure no
38  * cachelines escape being preloaded when the inner loop starts
39  * * a trailing section, where a limited number (0 or more) of cachelines
40  * are preloaded to deal with data (if any) that hangs off the end of the
41  * last iteration of the inner loop, plus any trailing bytes that were not
42  * enough to make up one whole iteration of the inner loop
43  *
44  * There are (in general) three distinct code paths, selected between
45  * depending upon how long the pixel row is. If it is long enough that there
46  * is at least one iteration of the inner loop (as described above) then
47  * this is described as the "wide" case. If it is shorter than that, but
48  * there are still enough bytes output that there is at least one 16-byte-
49  * long, 16-byte-aligned write to the destination (the optimum type of
50  * write), then this is the "medium" case. If it is not even this long, then
51  * this is the "narrow" case, and there is no attempt to align writes to
52  * 16-byte boundaries. In the "medium" and "narrow" cases, all the
53  * cachelines containing data from the pixel row are prefetched up-front.
54  */
55 
56 /*
57  * Determine whether we put the arguments on the stack for debugging.
58  */
59 #undef DEBUG_PARAMS
60 
61 /*
62  * Bit flags for 'generate_composite_function' macro which are used
63  * to tune generated functions behavior.
64  */
72 .set FLAG_PROCESS_DOES_STORE, 8 /* usually because it needs to conditionally skip it */
80 .set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
83 
84 /*
85  * Number of bytes by which to adjust preload offset of destination
86  * buffer (allows preload instruction to be moved before the load(s))
87  */
89 
90 /*
91  * Offset into stack where mask and source pointer/stride can be accessed.
92  */
93 #ifdef DEBUG_PARAMS
94 .set ARGS_STACK_OFFSET, (9*4+9*4)
95 #else
96 .set ARGS_STACK_OFFSET, (9*4)
97 #endif
98 
99 /*
100  * Offset into stack where space allocated during init macro can be accessed.
101  */
103 
104 /*
105  * Constants for selecting preferable prefetch type.
106  */
109 
110 /*
111  * Definitions of macros for load/store of pixel data.
112  */
113 
114 .macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0
115  .if numbytes == 16
116  .if unaligned == 1
117  op&r&cond WK&reg0, [base], #4
118  op&r&cond WK&reg1, [base], #4
119  op&r&cond WK&reg2, [base], #4
120  op&r&cond WK&reg3, [base], #4
121  .else
122  op&m&cond&ia base!, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
123  .endif
124  .elseif numbytes == 8
125  .if unaligned == 1
126  op&r&cond WK&reg0, [base], #4
127  op&r&cond WK&reg1, [base], #4
128  .else
129  op&m&cond&ia base!, {WK&reg0,WK&reg1}
130  .endif
131  .elseif numbytes == 4
132  op&r&cond WK&reg0, [base], #4
133  .elseif numbytes == 2
134  op&r&cond&h WK&reg0, [base], #2
135  .elseif numbytes == 1
136  op&r&cond&b WK&reg0, [base], #1
137  .else
138  .error "unsupported size: numbytes"
139  .endif
140 .endm
141 
142 .macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base
143  .if numbytes == 16
144  stm&cond&db base, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
145  .elseif numbytes == 8
146  stm&cond&db base, {WK&reg0,WK&reg1}
147  .elseif numbytes == 4
148  str&cond WK&reg0, [base, #-4]
149  .elseif numbytes == 2
150  str&cond&h WK&reg0, [base, #-2]
151  .elseif numbytes == 1
152  str&cond&b WK&reg0, [base, #-1]
153  .else
154  .error "unsupported size: numbytes"
155  .endif
156 .endm
157 
159  pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned
160 .endm
161 
162 .macro pixst cond, numbytes, firstreg, base
164  pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
165  .else
166  pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
167  .endif
168 .endm
169 
170 .macro PF a, x:vararg
172  a x
173  .endif
174 .endm
175 
176 
177 .macro preload_leading_step1 bpp, ptr, base
178 /* If the destination is already 16-byte aligned, then we need to preload
179  * between 0 and prefetch_distance (inclusive) cache lines ahead so there
180  * are no gaps when the inner loop starts.
181  */
182  .if bpp > 0
183  PF bic, ptr, base, #31
184  .set OFFSET, 0
185  .rept prefetch_distance+1
186  PF pld, [ptr, #OFFSET]
187  .set OFFSET, OFFSET+32
188  .endr
189  .endif
190 .endm
191 
192 .macro preload_leading_step2 bpp, bpp_shift, ptr, base
193 /* However, if the destination is not 16-byte aligned, we may need to
194  * preload more cache lines than that. The question we need to ask is:
195  * are the bytes corresponding to the leading pixels more than the amount
196  * by which the source pointer will be rounded down for preloading, and if
197  * so, by how many cache lines? Effectively, we want to calculate
198  * leading_bytes = ((-dst)&15)*src_bpp/dst_bpp
199  * inner_loop_offset = (src+leading_bytes)&31
200  * extra_needed = leading_bytes - inner_loop_offset
201  * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only
202  * possible when there are 4 src bytes for every 1 dst byte).
203  */
204  .if bpp > 0
205  .ifc base,DST
206  /* The test can be simplified further when preloading the destination */
207  PF tst, base, #16
208  PF beq, 61f
209  .else
210  .if bpp/dst_w_bpp == 4
212  PF and, SCRATCH, SCRATCH, #31
214  PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */
215  PF movs, SCRATCH, SCRATCH, lsl #32-6 /* so this sets NC / nc / Nc */
216  PF bcs, 61f
217  PF bpl, 60f
218  PF pld, [ptr, #32*(prefetch_distance+2)]
219  .else
220  PF mov, SCRATCH, base, lsl #32-5
223  PF bls, 61f
224  .endif
226 60: PF pld, [ptr, #32*(prefetch_distance+1)]
227 61:
228  .endif
229 .endm
230 
231 #define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2))
232 .macro preload_middle bpp, base, scratch_holds_offset
233  .if bpp > 0
234  /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */
235  .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp)
236  .if scratch_holds_offset
237  PF pld, [base, SCRATCH]
238  .else
239  PF bic, SCRATCH, base, #31
240  PF pld, [SCRATCH, #32*prefetch_distance]
241  .endif
242  .endif
243  .endif
244 .endm
245 
246 .macro preload_trailing bpp, bpp_shift, base
247  .if bpp > 0
248  .if bpp*pix_per_block > 256
249  /* Calculations are more complex if more than one fetch per block */
250  PF and, WK1, base, #31
251  PF add, WK1, WK1, WK0, lsl #bpp_shift
252  PF add, WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1)
253  PF bic, SCRATCH, base, #31
254 80: PF pld, [SCRATCH, #32*(prefetch_distance+1)]
255  PF add, SCRATCH, SCRATCH, #32
256  PF subs, WK1, WK1, #32
257  PF bhi, 80b
258  .else
259  /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */
260  PF mov, SCRATCH, base, lsl #32-5
261  PF adds, SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift
263  /* The instruction above has two effects: ensures Z is only
264  * set if C was clear (so Z indicates that both shifted quantities
265  * were 0), and clears C if Z was set (so C indicates that the sum
266  * of the shifted quantities was greater and not equal to 32) */
267  PF beq, 82f
268  PF bic, SCRATCH, base, #31
269  PF bcc, 81f
270  PF pld, [SCRATCH, #32*(prefetch_distance+2)]
271 81: PF pld, [SCRATCH, #32*(prefetch_distance+1)]
272 82:
273  .endif
274  .endif
275 .endm
276 
277 
278 .macro preload_line narrow_case, bpp, bpp_shift, base
279 /* "narrow_case" - just means that the macro was invoked from the "narrow"
280  * code path rather than the "medium" one - because in the narrow case,
281  * the row of pixels is known to output no more than 30 bytes, then
282  * (assuming the source pixels are no wider than the the destination
283  * pixels) they cannot possibly straddle more than 2 32-byte cachelines,
284  * meaning there's no need for a loop.
285  * "bpp" - number of bits per pixel in the channel (source, mask or
286  * destination) that's being preloaded, or 0 if this channel is not used
287  * for reading
288  * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course)
289  * "base" - base address register of channel to preload (SRC, MASK or DST)
290  */
291  .if bpp > 0
292  .if narrow_case && (bpp <= dst_w_bpp)
293  /* In these cases, each line for each channel is in either 1 or 2 cache lines */
294  PF bic, WK0, base, #31
295  PF pld, [WK0]
296  PF add, WK1, base, X, LSL #bpp_shift
297  PF sub, WK1, WK1, #1
298  PF bic, WK1, WK1, #31
299  PF cmp, WK1, WK0
300  PF beq, 90f
301  PF pld, [WK1]
302 90:
303  .else
304  PF bic, WK0, base, #31
305  PF pld, [WK0]
306  PF add, WK1, base, X, lsl #bpp_shift
307  PF sub, WK1, WK1, #1
308  PF bic, WK1, WK1, #31
309  PF cmp, WK1, WK0
310  PF beq, 92f
311 91: PF add, WK0, WK0, #32
312  PF cmp, WK0, WK1
313  PF pld, [WK0]
314  PF bne, 91b
315 92:
316  .endif
317  .endif
318 .endm
319 
320 
321 .macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
323  .if decrementx
325  .endif
327  .if !((flags) & FLAG_PROCESS_DOES_STORE)
328  pixst cond, numbytes, firstreg, DST
329  .endif
330 .endm
331 
332 .macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
333  .if (flags) & FLAG_BRANCH_OVER
334  .ifc cond,mi
335  bpl 100f
336  .endif
337  .ifc cond,cs
338  bcc 100f
339  .endif
340  .ifc cond,ne
341  beq 100f
342  .endif
343  conditional_process1_helper , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
344 100:
345  .else
346  conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
347  .endif
348 .endm
349 
350 .macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx
352  /* Can't interleave reads and writes */
353  test
354  conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx
356  test
357  .endif
358  conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx
359  .else
360  /* Can interleave reads and writes for better scheduling */
361  test
362  process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0
363  process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0
364  .if decrementx
365  sub&cond1 X, X, #8*numbytes1/dst_w_bpp
366  sub&cond2 X, X, #8*numbytes2/dst_w_bpp
367  .endif
368  process_tail cond1, numbytes1, firstreg1
369  process_tail cond2, numbytes2, firstreg2
370  pixst cond1, numbytes1, firstreg1, DST
371  pixst cond2, numbytes2, firstreg2, DST
372  .endif
373 .endm
374 
375 
376 .macro test_bits_1_0_ptr
378  movs SCRATCH, X, lsl #32-1 /* C,N = bits 1,0 of DST */
379  .else
380  movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */
381  .endif
382 .endm
383 
384 .macro test_bits_3_2_ptr
386  movs SCRATCH, X, lsl #32-3 /* C,N = bits 3, 2 of DST */
387  .else
388  movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */
389  .endif
390 .endm
391 
392 .macro leading_15bytes process_head, process_tail
393  /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
394  .set DECREMENT_X, 1
396  .set DECREMENT_X, 0
397  sub X, X, WK0, lsr #dst_bpp_shift
398  str X, [sp, #LINE_SAVED_REG_COUNT*4]
399  mov X, WK0
400  .endif
401  /* Use unaligned loads in all cases for simplicity */
402  .if dst_w_bpp == 8
403  conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X
404  .elseif dst_w_bpp == 16
405  test_bits_1_0_ptr
406  conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X
407  .endif
408  conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X
410  ldr X, [sp, #LINE_SAVED_REG_COUNT*4]
411  .endif
412 .endm
413 
414 .macro test_bits_3_2_pix
415  movs SCRATCH, X, lsl #dst_bpp_shift+32-3
416 .endm
417 
418 .macro test_bits_1_0_pix
419  .if dst_w_bpp == 8
420  movs SCRATCH, X, lsl #dst_bpp_shift+32-1
421  .else
422  movs SCRATCH, X, lsr #1
423  .endif
424 .endm
425 
426 .macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask
427  conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0
428  .if dst_w_bpp == 16
429  test_bits_1_0_pix
430  conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0
431  .elseif dst_w_bpp == 8
432  conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0
433  .endif
434 .endm
435 
436 
437 .macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
438 110:
439  .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */
440  .rept pix_per_block*dst_w_bpp/128
442  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
443  preload_middle src_bpp, SRC, 1
444  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
445  preload_middle mask_bpp, MASK, 1
446  .else
447  preload_middle src_bpp, SRC, 0
448  preload_middle mask_bpp, MASK, 0
449  .endif
450  .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
451  /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
452  * destination prefetches are 32-byte aligned. It's also the easiest channel to offset
453  * preloads for, to achieve staggered prefetches for multiple channels, because there are
454  * always two STMs per prefetch, so there is always an opposite STM on which to put the
455  * preload. Note, no need to BIC the base register here */
456  PF pld, [DST, #32*prefetch_distance - dst_alignment]
457  .endif
458  process_tail , 16, 0
459  .if !((flags) & FLAG_PROCESS_DOES_STORE)
460  pixst , 16, 0, DST
461  .endif
462  .set SUBBLOCK, SUBBLOCK+1
463  .endr
464  subs X, X, #pix_per_block
465  bhs 110b
466 .endm
467 
468 .macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask
469  /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */
470  .if dst_r_bpp > 0
471  tst DST, #16
472  bne 111f
474  b 112f
475 111:
476  .endif
478 112:
479  /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
480  .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
481  PF and, WK0, X, #pix_per_block-1
482  .endif
483  preload_trailing src_bpp, src_bpp_shift, SRC
484  preload_trailing mask_bpp, mask_bpp_shift, MASK
485  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
486  preload_trailing dst_r_bpp, dst_bpp_shift, DST
487  .endif
489  /* The remainder of the line is handled identically to the medium case */
490  medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
491 .endm
492 
493 .macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
494 120:
496  process_tail , 16, 0
497  .if !((flags) & FLAG_PROCESS_DOES_STORE)
498  pixst , 16, 0, DST
499  .endif
500  subs X, X, #128/dst_w_bpp
501  bhs 120b
502  /* Trailing pixels */
503  tst X, #128/dst_w_bpp - 1
506 .endm
507 
508 .macro narrow_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
509  tst X, #16*8/dst_w_bpp
510  conditional_process1 ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0
511  /* Trailing pixels */
512  /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */
514 .endm
515 
517  /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */
518  .if mask_bpp == 8 || mask_bpp == 16
519  tst MASK, #3
520  bne 141f
521  .endif
522  .if src_bpp == 8 || src_bpp == 16
523  tst SRC, #3
524  bne 140f
525  .endif
527  .if src_bpp == 8 || src_bpp == 16
528  b exit_label
529 140:
531  .endif
532  .if mask_bpp == 8 || mask_bpp == 16
533  b exit_label
534 141:
535  .if src_bpp == 8 || src_bpp == 16
536  tst SRC, #3
537  bne 142f
538  .endif
540  .if src_bpp == 8 || src_bpp == 16
541  b exit_label
542 142:
544  .endif
545  .endif
546 .endm
547 
548 
550  .if vars_spilled
551  /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */
552  /* This is ldmia sp,{} */
553  .word 0xE89D0000 | LINE_SAVED_REGS
554  .endif
555  subs Y, Y, #1
556  .if vars_spilled
557  .if (LINE_SAVED_REGS) & (1<<1)
558  str Y, [sp]
559  .endif
560  .endif
561  add DST, DST, STRIDE_D
562  .if src_bpp > 0
563  add SRC, SRC, STRIDE_S
564  .endif
565  .if mask_bpp > 0
566  add MASK, MASK, STRIDE_M
567  .endif
568  .if restore_x
569  mov X, ORIG_W
570  .endif
571  bhs loop_label
572  .ifc "last_one",""
573  .if vars_spilled
574  b 197f
575  .else
576  b 198f
577  .endif
578  .else
580  b 198f
581  .endif
582  .endif
583 .endm
584 
585 
586 .macro generate_composite_function fname, \
587  src_bpp_, \
588  mask_bpp_, \
589  dst_w_bpp_, \
590  flags_, \
591  prefetch_distance_, \
592  init, \
593  newline, \
594  cleanup, \
595  process_head, \
596  process_tail, \
597  process_inner_loop
598 
599  pixman_asm_function fname
600 
601 /*
602  * Make some macro arguments globally visible and accessible
603  * from other macros
604  */
605  .set src_bpp, src_bpp_
606  .set mask_bpp, mask_bpp_
607  .set dst_w_bpp, dst_w_bpp_
608  .set flags, flags_
610 
611 /*
612  * Select prefetch type for this function.
613  */
614  .if prefetch_distance == 0
616  .else
618  .endif
619 
620  .if src_bpp == 32
621  .set src_bpp_shift, 2
622  .elseif src_bpp == 24
623  .set src_bpp_shift, 0
624  .elseif src_bpp == 16
625  .set src_bpp_shift, 1
626  .elseif src_bpp == 8
627  .set src_bpp_shift, 0
628  .elseif src_bpp == 0
629  .set src_bpp_shift, -1
630  .else
631  .error "requested src bpp (src_bpp) is not supported"
632  .endif
633 
634  .if mask_bpp == 32
635  .set mask_bpp_shift, 2
636  .elseif mask_bpp == 24
637  .set mask_bpp_shift, 0
638  .elseif mask_bpp == 8
639  .set mask_bpp_shift, 0
640  .elseif mask_bpp == 0
641  .set mask_bpp_shift, -1
642  .else
643  .error "requested mask bpp (mask_bpp) is not supported"
644  .endif
645 
646  .if dst_w_bpp == 32
647  .set dst_bpp_shift, 2
648  .elseif dst_w_bpp == 24
649  .set dst_bpp_shift, 0
650  .elseif dst_w_bpp == 16
651  .set dst_bpp_shift, 1
652  .elseif dst_w_bpp == 8
653  .set dst_bpp_shift, 0
654  .else
655  .error "requested dst bpp (dst_w_bpp) is not supported"
656  .endif
657 
658  .if (((flags) & FLAG_DST_READWRITE) != 0)
659  .set dst_r_bpp, dst_w_bpp
660  .else
661  .set dst_r_bpp, 0
662  .endif
663 
665  .if src_bpp != 0
666  .if 32*8/src_bpp > pix_per_block
667  .set pix_per_block, 32*8/src_bpp
668  .endif
669  .endif
670  .if mask_bpp != 0
671  .if 32*8/mask_bpp > pix_per_block
672  .set pix_per_block, 32*8/mask_bpp
673  .endif
674  .endif
675  .if dst_r_bpp != 0
676  .if 32*8/dst_r_bpp > pix_per_block
677  .set pix_per_block, 32*8/dst_r_bpp
678  .endif
679  .endif
680 
681 /* The standard entry conditions set up by pixman-arm-common.h are:
682  * r0 = width (pixels)
683  * r1 = height (rows)
684  * r2 = pointer to top-left pixel of destination
685  * r3 = destination stride (pixels)
686  * [sp] = source pixel value, or pointer to top-left pixel of source
687  * [sp,#4] = 0 or source stride (pixels)
688  * The following arguments are unused for non-mask operations
689  * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask
690  * [sp,#12] = 0 or mask stride (pixels)
691  */
692 
693 /*
694  * Assign symbolic names to registers
695  */
696  X .req r0 /* pixels to go on this line */
697  Y .req r1 /* lines to go */
698  DST .req r2 /* destination pixel pointer */
699  STRIDE_D .req r3 /* destination stride (bytes, minus width) */
700  SRC .req r4 /* source pixel pointer */
701  STRIDE_S .req r5 /* source stride (bytes, minus width) */
702  MASK .req r6 /* mask pixel pointer (if applicable) */
703  STRIDE_M .req r7 /* mask stride (bytes, minus width) */
704  WK0 .req r8 /* pixel data registers */
705  WK1 .req r9
706  WK2 .req r10
707  WK3 .req r11
708  SCRATCH .req r12
709  ORIG_W .req r14 /* width (pixels) */
710 
711  push {r4-r11, lr} /* save all registers */
712 
713  subs Y, Y, #1
714  blo 199f
715 
716 #ifdef DEBUG_PARAMS
717  sub sp, sp, #9*4
718 #endif
719 
720  .if src_bpp > 0
721  ldr SRC, [sp, #ARGS_STACK_OFFSET]
722  ldr STRIDE_S, [sp, #ARGS_STACK_OFFSET+4]
723  .endif
724  .if mask_bpp > 0
725  ldr MASK, [sp, #ARGS_STACK_OFFSET+8]
726  ldr STRIDE_M, [sp, #ARGS_STACK_OFFSET+12]
727  .endif
728 
729 #ifdef DEBUG_PARAMS
730  add Y, Y, #1
731  stmia sp, {r0-r7,pc}
732  sub Y, Y, #1
733 #endif
734 
735  init
736 
738  /* Reserve a word in which to store X during leading pixels */
739  sub sp, sp, #4
742  .endif
743 
744  lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */
745  sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
746  .if src_bpp > 0
747  lsl STRIDE_S, #src_bpp_shift
748  sub STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift
749  .endif
750  .if mask_bpp > 0
751  lsl STRIDE_M, #mask_bpp_shift
752  sub STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift
753  .endif
754 
755  /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */
756  cmp X, #2*16*8/dst_w_bpp - 1
757  blo 170f
758  .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */
759  /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */
761  blo 160f
762 
763  /* Wide case */
764  /* Adjust X so that the decrement instruction can also test for
765  * inner loop termination. We want it to stop when there are
766  * (prefetch_distance+1) complete blocks to go. */
768  mov ORIG_W, X
770  /* This is stmdb sp!,{} */
771  .word 0xE92D0000 | LINE_SAVED_REGS
772  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
773  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
774  .endif
775 151: /* New line */
776  newline
777  preload_leading_step1 src_bpp, WK1, SRC
778  preload_leading_step1 mask_bpp, WK2, MASK
779  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
780  preload_leading_step1 dst_r_bpp, WK3, DST
781  .endif
782 
783  ands WK0, DST, #15
784  beq 154f
785  rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
786 
787  preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC
788  preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK
789  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
790  preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST
791  .endif
792 
793  leading_15bytes process_head, process_tail
794 
795 154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
796  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
797  and SCRATCH, SRC, #31
799  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
800  and SCRATCH, MASK, #31
802  .endif
803  .ifc "process_inner_loop",""
804  switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
805  .else
806  switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
807  .endif
808 
809 157: /* Check for another line */
812  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
813  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
814  .endif
815  .endif
816 
817  .ltorg
818 
819 160: /* Medium case */
820  mov ORIG_W, X
822  /* This is stmdb sp!,{} */
823  .word 0xE92D0000 | LINE_SAVED_REGS
824  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
825  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
826  .endif
827 161: /* New line */
828  newline
829  preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
830  preload_line 0, mask_bpp, mask_bpp_shift, MASK
831  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
832  preload_line 0, dst_r_bpp, dst_bpp_shift, DST
833  .endif
834 
835  sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
836  ands WK0, DST, #15
837  beq 164f
838  rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
839 
840  leading_15bytes process_head, process_tail
841 
842 164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
843  switch_on_alignment medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f
844 
845 167: /* Check for another line */
847 
848  .ltorg
849 
850 170: /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */
851  .if dst_w_bpp < 32
852  mov ORIG_W, X
853  .endif
855  /* This is stmdb sp!,{} */
856  .word 0xE92D0000 | LINE_SAVED_REGS
857  .endif
858 171: /* New line */
859  newline
860  preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
861  preload_line 1, mask_bpp, mask_bpp_shift, MASK
862  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
863  preload_line 1, dst_r_bpp, dst_bpp_shift, DST
864  .endif
865 
866  .if dst_w_bpp == 8
867  tst DST, #3
868  beq 174f
869 172: subs X, X, #1
870  blo 177f
871  process_head , 1, 0, 1, 1, 0
872  process_tail , 1, 0
873  .if !((flags) & FLAG_PROCESS_DOES_STORE)
874  pixst , 1, 0, DST
875  .endif
876  tst DST, #3
877  bne 172b
878  .elseif dst_w_bpp == 16
879  tst DST, #2
880  beq 174f
881  subs X, X, #1
882  blo 177f
883  process_head , 2, 0, 1, 1, 0
884  process_tail , 2, 0
885  .if !((flags) & FLAG_PROCESS_DOES_STORE)
886  pixst , 2, 0, DST
887  .endif
888  .endif
889 
890 174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
891  switch_on_alignment narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f
892 
893 177: /* Check for another line */
896  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
897  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
898  .endif
899 
900 197:
902  add sp, sp, #LINE_SAVED_REG_COUNT*4
903  .endif
904 198:
908  add sp, sp, #4
909  .endif
910 
911  cleanup
912 
913 #ifdef DEBUG_PARAMS
914  add sp, sp, #9*4 /* junk the debug copy of arguments */
915 #endif
916 199:
917  pop {r4-r11, pc} /* exit */
918 
919  .ltorg
920 
921  .unreq X
922  .unreq Y
923  .unreq DST
924  .unreq STRIDE_D
925  .unreq SRC
926  .unreq STRIDE_S
927  .unreq MASK
928  .unreq STRIDE_M
929  .unreq WK0
930  .unreq WK1
931  .unreq WK2
932  .unreq WK3
933  .unreq SCRATCH
934  .unreq ORIG_W
935  .endfunc
936 .endm
937 
938 .macro line_saved_regs x:vararg
939  .set LINE_SAVED_REGS, 0
940  .set LINE_SAVED_REG_COUNT, 0
941  .irp SAVED_REG,x
942  .ifc "SAVED_REG","Y"
943  .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1)
944  .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
945  .endif
946  .ifc "SAVED_REG","STRIDE_D"
947  .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3)
948  .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
949  .endif
950  .ifc "SAVED_REG","STRIDE_S"
951  .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5)
952  .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
953  .endif
954  .ifc "SAVED_REG","STRIDE_M"
955  .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7)
956  .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
957  .endif
958  .ifc "SAVED_REG","ORIG_W"
959  .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14)
960  .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
961  .endif
962  .endr
963 .endm
964 
965 .macro nop_macro x:vararg
966 .endm
static integer lr
Definition: aptex.h:532
int cmp(const void *p, const void *q)
Definition: bkmk2uni.c:1611
#define end_of_line
Definition: devnag.c:315
long pc
Definition: disdvi.c:114
int h
Definition: dviconv.c:9
mpz_t * f
Definition: gen-fib.c:34
#define r1
#define r7
#define r3
#define r5
#define r2
#define r4
#define r0
#define r6
action
Definition: epdf.c:220
#define macro
Definition: ctangleboot.c:153
#define test
Definition: tie.c:129
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC(lowbit *8/dst_w_bpp)
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else flags_
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing mask_bpp
set set set set set set set set set set set set set set set FLAG_PROCESS_PRESERVES_WK0
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif endif
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop process_head
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF bic
set set set set set set set set set set set set set set set set set set FLAG_NO_PRELOAD_DST
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing src_bpp
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp dst_w_bpp
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line restore_x
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 bpp_shift
set set set set FLAG_BRANCH_OVER
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp SCRATCH
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags_ set prefetch_distance_ if PREFETCH_TYPE_NONE else set PREFETCH_TYPE_CURRENT
set set set set set set set set set set set set set set set set set set set set *set set PREFETCH_TYPE_NONE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS b
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported firstreg
set set set set set set set set set set set set set set FLAG_PROCESS_PRESERVES_SCRATCH
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF movs
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else src_bpp_
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst cond
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst DST
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else mask_bpp_
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags_ set prefetch_distance_ if PREFETCH_TYPE_NONE else set PREFETCH_TYPE_STANDARD endif if elseif elseif elseif elseif else error requested src elseif elseif elseif else error requested mask elseif elseif elseif else error requested dst dst_w_bpp else set dst_r_bpp
set set set set set set set set set set set set set set set set set set set set *set set set PREFETCH_TYPE_STANDARD
set set set FLAG_COND_EXEC
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst unaligned
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF add
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst reg3
set set set set set set set set set set set set FLAG_SPILL_LINE_VARS
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop unaligned_src
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else cleanup
set set set set set set set set set set set set set set set set set set set DST_PRELOAD_BIAS
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add SRC
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF sub
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing src_bpp_shift
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF bcs
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing mask_bpp_shift
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF adceqs
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs Y
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF pld
set set FLAG_DST_READWRITE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF bls
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop process_tail
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags_ set prefetch_distance
#define IS_END_OF_GROUP(INDEX, SIZE)
set set set set set set set set set set set FLAG_SPILL_LINE_VARS_NON_WIDE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF bcc
set set set set set set set set set set set set set set set set set FLAG_PRELOAD_DST
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF and
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop unaligned_mask
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF rsb
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF mov
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF tst
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 ne
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst st
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF WK1
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment process_inner_loop
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else init
set set set set set set set FLAG_PROCESS_DOESNT_STORE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add MASK
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst reg2
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF bpl
set set set set set set set set set set set set set set set set set set set set ARGS_STACK_OFFSET
set set set set set FLAG_PROCESS_PRESERVES_PSR
set FLAG_DST_WRITEONLY
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF bhi
set set set set set set set set set set FLAG_SPILL_LINE_VARS_WIDE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set OFFSET
set set set set set set set set set set set set set FLAG_PROCESS_CORRUPTS_SCRATCH
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else prefetch_distance_
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags_ set prefetch_distance_ if PREFETCH_TYPE_NONE else set PREFETCH_TYPE_STANDARD endif if elseif elseif elseif elseif else error requested src elseif elseif elseif else error requested mask elseif elseif elseif else error requested dst dst_w_bpp else set endif set pix_per_block
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst reg0
set set set set set set FLAG_PROCESS_CORRUPTS_PSR
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 bpp
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment action
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst ld
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF adds
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK dst_bpp_shift
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF subs
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF X
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst numbytes
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line vars_spilled
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst reg1
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line loop_label
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF beq
set set set set set set set set set FLAG_NO_SPILL_LINE_VARS
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp WK0
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else newline
set set set set set set set set set set set set set set set set FLAG_PROCESS_CORRUPTS_WK0
set set set set set set set set FLAG_PROCESS_DOES_STORE
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF ptr
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else dst_w_bpp_
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF x
set set set set set set set set set set set set set set set set set set set set *set LOCALS_STACK_OFFSET
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst base
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc last_one
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF rsbs
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels exit_label
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF a
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels unused
char * fname
Definition: plain2.c:121
static struct colourSystem * cs
Definition: ppmcie.c:241
int db
Definition: ppmqvga.c:68
int r
Definition: ppmqvga.c:68
#define r8(n)
Definition: pts_fax.c:3137
#define pop()
Definition: opcodes.h:78
#define push(n)
Definition: opcodes.h:77
test
Definition: parser.c:257
#define str(s)
Definition: sh6.c:399
Definition: pst1form.c:310
Definition: sh.h:1226
m
Definition: tex4ht.c:3990
#define sp
Definition: stack.c:11