w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pixman-arm-neon-asm.h
Go to the documentation of this file.
1 /*
2  * Copyright © 2009 Nokia Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
24  */
25 
26 /*
27  * This file contains a macro ('generate_composite_function') which can
28  * construct 2D image processing functions, based on a common template.
29  * Any combinations of source, destination and mask images with 8bpp,
30  * 16bpp, 24bpp, 32bpp color formats are supported.
31  *
32  * This macro takes care of:
33  * - handling of leading and trailing unaligned pixels
34  * - doing most of the work related to L2 cache preload
35  * - encourages the use of software pipelining for better instructions
36  * scheduling
37  *
38  * The user of this macro has to provide some configuration parameters
39  * (bit depths for the images, prefetch distance, etc.) and a set of
40  * macros, which should implement basic code chunks responsible for
41  * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
42  * examples.
43  *
44  * TODO:
45  * - try overlapped pixel method (from Ian Rickards) when processing
46  * exactly two blocks of pixels
47  * - maybe add an option to do reverse scanline processing
48  */
49 
50 /*
51  * Bit flags for 'generate_composite_function' macro which are used
52  * to tune generated functions behavior.
53  */
57 
58 /*
59  * Offset in stack where mask and source pointer/stride can be accessed
60  * from 'init' macro. This is useful for doing special handling for solid mask.
61  */
63 
64 /*
65  * Constants for selecting preferable prefetch type.
66  */
67 .set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */
68 .set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */
69 .set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */
70 
71 /*
72  * Definitions of supplementary pixld/pixst macros (for partial load/store of
73  * pixel data).
74  */
75 
76 .macro pixldst1 op, elem_size, reg1, mem_operand, abits
77 .if abits > 0
78  op&.&elem_size {d&reg1}, [&mem_operand&, :&abits&]!
79 .else
80  op&.&elem_size {d&reg1}, [&mem_operand&]!
81 .endif
82 .endm
83 
84 .macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
85 .if abits > 0
86  op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&, :&abits&]!
87 .else
88  op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&]!
89 .endif
90 .endm
91 
92 .macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
93 .if abits > 0
94  op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&, :&abits&]!
95 .else
96  op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&]!
97 .endif
98 .endm
99 
100 .macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
101  op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
102 .endm
103 
104 .macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
105  op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
106 .endm
107 
108 .macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
109  op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
110 .endm
111 
113 .if numbytes == 32
114  pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
115  %(basereg+6), %(basereg+7), mem_operand, abits
116 .elseif numbytes == 16
117  pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
118 .elseif numbytes == 8
119  pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
120 .elseif numbytes == 4
121  .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
122  pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
123  .elseif elem_size == 16
124  pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
125  pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
126  .else
127  pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
128  pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
129  pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
130  pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
131  .endif
132 .elseif numbytes == 2
133  .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
134  pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
135  .else
136  pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
137  pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
138  .endif
139 .elseif numbytes == 1
140  pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
141 .else
142  .error "unsupported size: numbytes"
143 .endif
144 .endm
145 
147 .if bpp > 0
148 .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
149  pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
150  %(basereg+6), %(basereg+7), mem_operand, abits
151 .elseif (bpp == 24) && (numpix == 8)
152  pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
153 .elseif (bpp == 24) && (numpix == 4)
154  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
155  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
156  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
157  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
158 .elseif (bpp == 24) && (numpix == 2)
159  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
160  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
161 .elseif (bpp == 24) && (numpix == 1)
162  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
163 .else
164  pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
165 .endif
166 .endif
167 .endm
168 
169 .macro pixst numpix, bpp, basereg, mem_operand, abits=0
170 .if bpp > 0
171 .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
172  pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
173  %(basereg+6), %(basereg+7), mem_operand, abits
174 .elseif (bpp == 24) && (numpix == 8)
175  pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
176 .elseif (bpp == 24) && (numpix == 4)
177  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
178  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
179  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
180  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
181 .elseif (bpp == 24) && (numpix == 2)
182  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
183  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
184 .elseif (bpp == 24) && (numpix == 1)
185  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
186 .else
187  pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
188 .endif
189 .endif
190 .endm
191 
193 .if (bpp * numpix) <= 128
195 .else
197 .endif
198 .endm
199 
201 .if (bpp * numpix) <= 128
202  pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
203 .else
205 .endif
206 .endm
207 
208 /*
209  * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register
210  * aliases to be defined)
211  */
212 .macro pixld1_s elem_size, reg1, mem_operand
213 .if elem_size == 16
214  mov TMP1, VX, asr #16
216 5: subpls VX, VX, SRC_WIDTH_FIXED
217  bpl 5b
219  mov TMP2, VX, asr #16
220  adds VX, VX, UNIT_X
221 5: subpls VX, VX, SRC_WIDTH_FIXED
222  bpl 5b
223  add TMP2, mem_operand, TMP2, asl #1
224  vld1.16 {d&reg1&[0]}, [TMP1, :16]
225  mov TMP1, VX, asr #16
226  adds VX, VX, UNIT_X
227 5: subpls VX, VX, SRC_WIDTH_FIXED
228  bpl 5b
229  add TMP1, mem_operand, TMP1, asl #1
230  vld1.16 {d&reg1&[1]}, [TMP2, :16]
231  mov TMP2, VX, asr #16
232  adds VX, VX, UNIT_X
233 5: subpls VX, VX, SRC_WIDTH_FIXED
234  bpl 5b
235  add TMP2, mem_operand, TMP2, asl #1
236  vld1.16 {d&reg1&[2]}, [TMP1, :16]
237  vld1.16 {d&reg1&[3]}, [TMP2, :16]
238 .elseif elem_size == 32
239  mov TMP1, VX, asr #16
240  adds VX, VX, UNIT_X
241 5: subpls VX, VX, SRC_WIDTH_FIXED
242  bpl 5b
243  add TMP1, mem_operand, TMP1, asl #2
244  mov TMP2, VX, asr #16
245  adds VX, VX, UNIT_X
246 5: subpls VX, VX, SRC_WIDTH_FIXED
247  bpl 5b
248  add TMP2, mem_operand, TMP2, asl #2
249  vld1.32 {d&reg1&[0]}, [TMP1, :32]
250  vld1.32 {d&reg1&[1]}, [TMP2, :32]
251 .else
252  .error "unsupported"
253 .endif
254 .endm
255 
256 .macro pixld2_s elem_size, reg1, reg2, mem_operand
257 .if 0 /* elem_size == 32 */
258  mov TMP1, VX, asr #16
259  add VX, VX, UNIT_X, asl #1
260  add TMP1, mem_operand, TMP1, asl #2
261  mov TMP2, VX, asr #16
262  sub VX, VX, UNIT_X
263  add TMP2, mem_operand, TMP2, asl #2
264  vld1.32 {d&reg1&[0]}, [TMP1, :32]
265  mov TMP1, VX, asr #16
266  add VX, VX, UNIT_X, asl #1
267  add TMP1, mem_operand, TMP1, asl #2
268  vld1.32 {d&reg2&[0]}, [TMP2, :32]
269  mov TMP2, VX, asr #16
270  add VX, VX, UNIT_X
271  add TMP2, mem_operand, TMP2, asl #2
272  vld1.32 {d&reg1&[1]}, [TMP1, :32]
273  vld1.32 {d&reg2&[1]}, [TMP2, :32]
274 .else
275  pixld1_s elem_size, reg1, mem_operand
276  pixld1_s elem_size, reg2, mem_operand
277 .endif
278 .endm
279 
280 .macro pixld0_s elem_size, reg1, idx, mem_operand
281 .if elem_size == 16
282  mov TMP1, VX, asr #16
283  adds VX, VX, UNIT_X
284 5: subpls VX, VX, SRC_WIDTH_FIXED
285  bpl 5b
286  add TMP1, mem_operand, TMP1, asl #1
287  vld1.16 {d&reg1&[idx]}, [TMP1, :16]
288 .elseif elem_size == 32
289  mov TMP1, VX, asr #16
290  adds VX, VX, UNIT_X
291 5: subpls VX, VX, SRC_WIDTH_FIXED
292  bpl 5b
293  add TMP1, mem_operand, TMP1, asl #2
294  vld1.32 {d&reg1&[idx]}, [TMP1, :32]
295 .endif
296 .endm
297 
299 .if numbytes == 32
300  pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
301  pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
302  pixdeinterleave elem_size, %(basereg+4)
303 .elseif numbytes == 16
304  pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
305 .elseif numbytes == 8
306  pixld1_s elem_size, %(basereg+1), mem_operand
307 .elseif numbytes == 4
308  .if elem_size == 32
309  pixld0_s elem_size, %(basereg+0), 1, mem_operand
310  .elseif elem_size == 16
311  pixld0_s elem_size, %(basereg+0), 2, mem_operand
312  pixld0_s elem_size, %(basereg+0), 3, mem_operand
313  .else
314  pixld0_s elem_size, %(basereg+0), 4, mem_operand
315  pixld0_s elem_size, %(basereg+0), 5, mem_operand
316  pixld0_s elem_size, %(basereg+0), 6, mem_operand
317  pixld0_s elem_size, %(basereg+0), 7, mem_operand
318  .endif
319 .elseif numbytes == 2
320  .if elem_size == 16
321  pixld0_s elem_size, %(basereg+0), 1, mem_operand
322  .else
323  pixld0_s elem_size, %(basereg+0), 2, mem_operand
324  pixld0_s elem_size, %(basereg+0), 3, mem_operand
325  .endif
326 .elseif numbytes == 1
327  pixld0_s elem_size, %(basereg+0), 1, mem_operand
328 .else
329  .error "unsupported size: numbytes"
330 .endif
331 .endm
332 
333 .macro pixld_s numpix, bpp, basereg, mem_operand
334 .if bpp > 0
336 .endif
337 .endm
338 
340  vuzp.8 d&reg1, d&reg2
341 .endm
342 
343 .macro vzip8 reg1, reg2
344  vzip.8 d&reg1, d&reg2
345 .endm
346 
347 /* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
348 .macro pixdeinterleave bpp, basereg
349 .if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
350  vuzp8 %(basereg+0), %(basereg+1)
351  vuzp8 %(basereg+2), %(basereg+3)
352  vuzp8 %(basereg+1), %(basereg+3)
353  vuzp8 %(basereg+0), %(basereg+2)
354 .endif
355 .endm
356 
357 /* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
358 .macro pixinterleave bpp, basereg
359 .if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
360  vzip8 %(basereg+0), %(basereg+2)
361  vzip8 %(basereg+1), %(basereg+3)
362  vzip8 %(basereg+2), %(basereg+3)
363  vzip8 %(basereg+0), %(basereg+1)
364 .endif
365 .endm
366 
367 /*
368  * This is a macro for implementing cache preload. The main idea is that
369  * cache preload logic is mostly independent from the rest of pixels
370  * processing code. It starts at the top left pixel and moves forward
371  * across pixels and can jump across scanlines. Prefetch distance is
372  * handled in an 'incremental' way: it starts from 0 and advances to the
373  * optimal distance over time. After reaching optimal prefetch distance,
374  * it is kept constant. There are some checks which prevent prefetching
375  * unneeded pixel lines below the image (but it still can prefetch a bit
376  * more data on the right side of the image - not a big issue and may
377  * be actually helpful when rendering text glyphs). Additional trick is
378  * the use of LDR instruction for prefetch instead of PLD when moving to
379  * the next line, the point is that we have a high chance of getting TLB
380  * miss in this case, and PLD would be useless.
381  *
382  * This sounds like it may introduce a noticeable overhead (when working with
383  * fully cached data). But in reality, due to having a separate pipeline and
384  * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
385  * execute simultaneously with NEON and be completely shadowed by it. Thus
386  * we get no performance overhead at all (*). This looks like a very nice
387  * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
388  * but still can implement some rather advanced prefetch logic in software
389  * for almost zero cost!
390  *
391  * (*) The overhead of the prefetcher is visible when running some trivial
392  * pixels processing like simple copy. Anyway, having prefetch is a must
393  * when working with the graphics data.
394  */
395 .macro PF a, x:vararg
397  a x
398 .endif
399 .endm
400 
401 .macro cache_preload std_increment, boost_increment
402 .if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
403 .if regs_shortage
404  PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
405 .endif
408 .endif
409  PF tst PF_CTL, #0xF
410  PF addne PF_X, PF_X, #boost_increment
411  PF subne PF_CTL, PF_CTL, #1
412  PF cmp PF_X, ORIG_W
414  PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
415 .endif
416 .if dst_r_bpp != 0
417  PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
418 .endif
420  PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
421 .endif
422  PF subge PF_X, PF_X, ORIG_W
423  PF subges PF_CTL, PF_CTL, #0x10
424 .if src_bpp_shift >= 0
425  PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
426 .endif
427 .if dst_r_bpp != 0
428  PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
429 .endif
430 .if mask_bpp_shift >= 0
431  PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
432 .endif
433 .endif
434 .endm
435 
436 .macro cache_preload_simple
438 .if src_bpp > 0
439  pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
440 .endif
441 .if dst_r_bpp > 0
442  pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
443 .endif
444 .if mask_bpp > 0
445  pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
446 .endif
447 .endif
448 .endm
449 
450 .macro fetch_mask_pixblock
453 .endm
454 
455 /*
456  * Macro which is used to process leading pixels until destination
457  * pointer is properly aligned (at 16 bytes boundary). When destination
458  * buffer uses 16bpp format, this is unnecessary, or even pointless.
459  */
460 .macro ensure_destination_ptr_alignment process_pixblock_head, \
461  process_pixblock_tail, \
462  process_pixblock_tail_head
463 .if dst_w_bpp != 24
464  tst DST_R, #0xF
465  beq 2f
466 
467 .irp lowbit, 1, 2, 4, 8, 16
468 local skip1
469 .if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
470 .if lowbit < 16 /* we don't need more than 16-byte alignment */
471  tst DST_R, #lowbit
472  beq 1f
473 .endif
476 .if dst_r_bpp > 0
478 .else
480 .endif
481  PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
482  sub W, W, #(lowbit * 8 / dst_w_bpp)
483 1:
484 .endif
485 .endr
486  pixdeinterleave src_bpp, src_basereg
487  pixdeinterleave mask_bpp, mask_basereg
488  pixdeinterleave dst_r_bpp, dst_r_basereg
489 
492  cache_preload_simple
494 
495  pixinterleave dst_w_bpp, dst_w_basereg
496 .irp lowbit, 1, 2, 4, 8, 16
497 .if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
498 .if lowbit < 16 /* we don't need more than 16-byte alignment */
499  tst DST_W, #lowbit
500  beq 1f
501 .endif
503 1:
504 .endif
505 .endr
506 .endif
507 2:
508 .endm
509 
510 /*
511  * Special code for processing up to (pixblock_size - 1) remaining
512  * trailing pixels. As SIMD processing performs operation on
513  * pixblock_size pixels, anything smaller than this has to be loaded
514  * and stored in a special way. Loading and storing of pixel data is
515  * performed in such a way that we fill some 'slots' in the NEON
516  * registers (some slots naturally are unused), then perform compositing
517  * operation as usual. In the end, the data is taken from these 'slots'
518  * and saved to memory.
519  *
520  * cache_preload_flag - allows to suppress prefetch if
521  * set to 0
522  * dst_aligned_flag - selects whether destination buffer
523  * is aligned
524  */
525 .macro process_trailing_pixels cache_preload_flag, \
526  dst_aligned_flag, \
527  process_pixblock_head, \
528  process_pixblock_tail, \
529  process_pixblock_tail_head
530  tst W, #(pixblock_size - 1)
531  beq 2f
532 .irp chunk_size, 16, 8, 4, 2, 1
534  tst W, #chunk_size
535  beq 1f
538 .if dst_aligned_flag != 0
540 .else
542 .endif
544  PF add PF_X, PF_X, #chunk_size
546 1:
547 .endif
548 .endr
549  pixdeinterleave src_bpp, src_basereg
550  pixdeinterleave mask_bpp, mask_basereg
551  pixdeinterleave dst_r_bpp, dst_r_basereg
552 
554 .if cache_preload_flag != 0
556  cache_preload_simple
557 .endif
559  pixinterleave dst_w_bpp, dst_w_basereg
560 .irp chunk_size, 16, 8, 4, 2, 1
562  tst W, #chunk_size
563  beq 1f
564 .if dst_aligned_flag != 0
566 .else
568 .endif
569 1:
570 .endif
571 .endr
572 2:
573 .endm
574 
575 /*
576  * Macro, which performs all the needed operations to switch to the next
577  * scanline and start the next loop iteration unless all the scanlines
578  * are already processed.
579  */
580 .macro advance_to_next_scanline start_of_loop_label
581 .if regs_shortage
582  ldrd W, [sp] /* load W and H (width and height) from stack */
583 .else
584  mov W, ORIG_W
585 .endif
587 .if src_bpp != 0
589 .endif
590 .if mask_bpp != 0
592 .endif
593 .if (dst_w_bpp != 24)
594  sub DST_W, DST_W, W, lsl #dst_bpp_shift
595 .endif
596 .if (src_bpp != 24) && (src_bpp != 0)
597  sub SRC, SRC, W, lsl #src_bpp_shift
598 .endif
599 .if (mask_bpp != 24) && (mask_bpp != 0)
600  sub MASK, MASK, W, lsl #mask_bpp_shift
601 .endif
602  subs H, H, #1
603  mov DST_R, DST_W
604 .if regs_shortage
605  str H, [sp, #4] /* save updated height to stack */
606 .endif
607  bge start_of_loop_label
608 .endm
609 
610 /*
611  * Registers are allocated in the following way by default:
612  * d0, d1, d2, d3 - reserved for loading source pixel data
613  * d4, d5, d6, d7 - reserved for loading destination pixel data
614  * d24, d25, d26, d27 - reserved for loading mask pixel data
615  * d28, d29, d30, d31 - final destination pixel data for writeback to memory
616  */
617 .macro generate_composite_function fname, \
621  flags, \
624  init, \
633 
634  pixman_asm_function fname
635 
636  push {r4-r12, lr} /* save all registers */
637 
638 /*
639  * Select prefetch type for this function. If prefetch distance is
640  * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
641  * has to be used instead of ADVANCED.
642  */
643  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
644 .if prefetch_distance == 0
647  ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
649 .endif
650 
651 /*
652  * Make some macro arguments globally visible and accessible
653  * from other macros
654  */
655  .set src_bpp, src_bpp_
656  .set mask_bpp, mask_bpp_
657  .set dst_w_bpp, dst_w_bpp_
663 
664  .macro pixld_src x:vararg
665  pixld x
666  .endm
667  .macro fetch_src_pixblock
670  .endm
671 /*
672  * Assign symbolic names to registers
673  */
674  W .req r0 /* width (is updated during processing) */
675  H .req r1 /* height (is updated during processing) */
676  DST_W .req r2 /* destination buffer pointer for writes */
677  DST_STRIDE .req r3 /* destination image stride */
678  SRC .req r4 /* source buffer pointer */
679  SRC_STRIDE .req r5 /* source image stride */
680  DST_R .req r6 /* destination buffer pointer for reads */
681 
682  MASK .req r7 /* mask pointer */
683  MASK_STRIDE .req r8 /* mask stride */
684 
685  PF_CTL .req r9 /* combined lines counter and prefetch */
686  /* distance increment counter */
687  PF_X .req r10 /* pixel index in a scanline for current */
688  /* pretetch position */
689  PF_SRC .req r11 /* pointer to source scanline start */
690  /* for prefetch purposes */
691  PF_DST .req r12 /* pointer to destination scanline start */
692  /* for prefetch purposes */
693  PF_MASK .req r14 /* pointer to mask scanline start */
694  /* for prefetch purposes */
695 /*
696  * Check whether we have enough registers for all the local variables.
697  * If we don't have enough registers, original width and height are
698  * kept on top of stack (and 'regs_shortage' variable is set to indicate
699  * this for the rest of code). Even if there are enough registers, the
700  * allocation scheme may be a bit different depending on whether source
701  * or mask is not used.
702  */
704  ORIG_W .req r10 /* saved original width */
705  DUMMY .req r12 /* temporary register */
706  .set regs_shortage, 0
707 .elseif mask_bpp == 0
708  ORIG_W .req r7 /* saved original width */
709  DUMMY .req r8 /* temporary register */
710  .set regs_shortage, 0
711 .elseif src_bpp == 0
712  ORIG_W .req r4 /* saved original width */
713  DUMMY .req r5 /* temporary register */
714  .set regs_shortage, 0
715 .else
716  ORIG_W .req r1 /* saved original width */
717  DUMMY .req r1 /* temporary register */
718  .set regs_shortage, 1
719 .endif
720 
721  .set mask_bpp_shift, -1
722 .if src_bpp == 32
723  .set src_bpp_shift, 2
724 .elseif src_bpp == 24
725  .set src_bpp_shift, 0
726 .elseif src_bpp == 16
727  .set src_bpp_shift, 1
728 .elseif src_bpp == 8
729  .set src_bpp_shift, 0
730 .elseif src_bpp == 0
731  .set src_bpp_shift, -1
732 .else
733  .error "requested src bpp (src_bpp) is not supported"
734 .endif
735 .if mask_bpp == 32
736  .set mask_bpp_shift, 2
737 .elseif mask_bpp == 24
738  .set mask_bpp_shift, 0
739 .elseif mask_bpp == 8
740  .set mask_bpp_shift, 0
741 .elseif mask_bpp == 0
742  .set mask_bpp_shift, -1
743 .else
744  .error "requested mask bpp (mask_bpp) is not supported"
745 .endif
746 .if dst_w_bpp == 32
747  .set dst_bpp_shift, 2
748 .elseif dst_w_bpp == 24
749  .set dst_bpp_shift, 0
750 .elseif dst_w_bpp == 16
751  .set dst_bpp_shift, 1
752 .elseif dst_w_bpp == 8
753  .set dst_bpp_shift, 0
754 .else
755  .error "requested dst bpp (dst_w_bpp) is not supported"
756 .endif
757 
758 .if (((flags) & FLAG_DST_READWRITE) != 0)
759  .set dst_r_bpp, dst_w_bpp
760 .else
761  .set dst_r_bpp, 0
762 .endif
763 .if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
764  .set DEINTERLEAVE_32BPP_ENABLED, 1
765 .else
766  .set DEINTERLEAVE_32BPP_ENABLED, 0
767 .endif
768 
769 .if prefetch_distance < 0 || prefetch_distance > 15
770  .error "invalid prefetch distance (prefetch_distance)"
771 .endif
772 
773 .if src_bpp > 0
774  ldr SRC, [sp, #40]
775 .endif
776 .if mask_bpp > 0
777  ldr MASK, [sp, #48]
778 .endif
779  PF mov PF_X, #0
780 .if src_bpp > 0
781  ldr SRC_STRIDE, [sp, #44]
782 .endif
783 .if mask_bpp > 0
784  ldr MASK_STRIDE, [sp, #52]
785 .endif
786  mov DST_R, DST_W
787 
788 .if src_bpp == 24
790  sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
791 .endif
792 .if mask_bpp == 24
794  sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
795 .endif
796 .if dst_w_bpp == 24
798  sub DST_STRIDE, DST_STRIDE, W, lsl #1
799 .endif
800 
801 /*
802  * Setup advanced prefetcher initial state
803  */
804  PF mov PF_SRC, SRC
805  PF mov PF_DST, DST_R
806  PF mov PF_MASK, MASK
807  /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
808  PF mov PF_CTL, H, lsl #4
809  PF add PF_CTL, #(prefetch_distance - 0x10)
810 
811  init
812 .if regs_shortage
813  push {r0, r1}
814 .endif
815  subs H, H, #1
816 .if regs_shortage
817  str H, [sp, #4] /* save updated height to stack */
818 .else
819  mov ORIG_W, W
820 .endif
821  blt 9f
822  cmp W, #(pixblock_size * 2)
823  blt 8f
824 /*
825  * This is the start of the pipelined loop, which if optimized for
826  * long scanlines
827  */
828 0:
829  ensure_destination_ptr_alignment process_pixblock_head, \
832 
833  /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
836  fetch_src_pixblock
839  PF add PF_X, PF_X, #pixblock_size
842  cache_preload_simple
843  subs W, W, #(pixblock_size * 2)
844  blt 2f
845 1:
847  cache_preload_simple
848  subs W, W, #pixblock_size
849  bge 1b
850 2:
854 
855  /* Process the remaining trailing pixels in the scanline */
856  process_trailing_pixels 1, 1, \
860  advance_to_next_scanline 0b
861 
862 .if regs_shortage
863  pop {r0, r1}
864 .endif
865  cleanup
866  pop {r4-r12, pc} /* exit */
867 /*
868  * This is the start of the loop, designed to process images with small width
869  * (less than pixblock_size * 2 pixels). In this case neither pipelining
870  * nor prefetch are used.
871  */
872 8:
873  /* Process exactly pixblock_size pixels if needed */
875  beq 1f
878  fetch_src_pixblock
883  pixst pixblock_size, dst_w_bpp, \
885 1:
886  /* Process the remaining trailing pixels in the scanline */
887  process_trailing_pixels 0, 0, \
891  advance_to_next_scanline 8b
892 9:
893 .if regs_shortage
894  pop {r0, r1}
895 .endif
896  cleanup
897  pop {r4-r12, pc} /* exit */
898 
899  .purgem fetch_src_pixblock
900  .purgem pixld_src
901 
902  .unreq SRC
903  .unreq MASK
904  .unreq DST_R
905  .unreq DST_W
906  .unreq ORIG_W
907  .unreq W
908  .unreq H
909  .unreq SRC_STRIDE
910  .unreq DST_STRIDE
911  .unreq MASK_STRIDE
912  .unreq PF_CTL
913  .unreq PF_X
914  .unreq PF_SRC
915  .unreq PF_DST
916  .unreq PF_MASK
917  .unreq DUMMY
918  .endfunc
919 .endm
920 
921 /*
922  * A simplified variant of function generation template for a single
923  * scanline processing (for implementing pixman combine functions)
924  */
925 .macro generate_composite_function_scanline use_nearest_scaling, \
926  fname, \
927  src_bpp_, \
928  mask_bpp_, \
929  dst_w_bpp_, \
930  flags, \
931  pixblock_size_, \
932  init, \
933  cleanup, \
934  process_pixblock_head, \
935  process_pixblock_tail, \
936  process_pixblock_tail_head, \
937  dst_w_basereg_ = 28, \
938  dst_r_basereg_ = 4, \
939  src_basereg_ = 0, \
940  mask_basereg_ = 24
941 
942  pixman_asm_function fname
943 
945 /*
946  * Make some macro arguments globally visible and accessible
947  * from other macros
948  */
949  .set src_bpp, src_bpp_
950  .set mask_bpp, mask_bpp_
951  .set dst_w_bpp, dst_w_bpp_
957 
958 .if use_nearest_scaling != 0
959  /*
960  * Assign symbolic names to registers for nearest scaling
961  */
962  W .req r0
963  DST_W .req r1
964  SRC .req r2
965  VX .req r3
966  UNIT_X .req ip
967  MASK .req lr
968  TMP1 .req r4
969  TMP2 .req r5
970  DST_R .req r6
971  SRC_WIDTH_FIXED .req r7
972 
973  .macro pixld_src x:vararg
974  pixld_s x
975  .endm
976 
977  ldr UNIT_X, [sp]
978  push {r4-r8, lr}
979  ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)]
980  .if mask_bpp != 0
981  ldr MASK, [sp, #(24 + 8)]
982  .endif
983 .else
984  /*
985  * Assign symbolic names to registers
986  */
987  W .req r0 /* width (is updated during processing) */
988  DST_W .req r1 /* destination buffer pointer for writes */
989  SRC .req r2 /* source buffer pointer */
990  DST_R .req ip /* destination buffer pointer for reads */
991  MASK .req r3 /* mask pointer */
992 
993  .macro pixld_src x:vararg
994  pixld x
995  .endm
996 .endif
997 
998 .if (((flags) & FLAG_DST_READWRITE) != 0)
999  .set dst_r_bpp, dst_w_bpp
1000 .else
1001  .set dst_r_bpp, 0
1002 .endif
1003 .if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
1004  .set DEINTERLEAVE_32BPP_ENABLED, 1
1005 .else
1006  .set DEINTERLEAVE_32BPP_ENABLED, 0
1007 .endif
1008 
1009  .macro fetch_src_pixblock
1011  (src_basereg - pixblock_size * src_bpp / 64), SRC
1012  .endm
1013 
1014  init
1015  mov DST_R, DST_W
1016 
1017  cmp W, #pixblock_size
1018  blt 8f
1019 
1020  ensure_destination_ptr_alignment process_pixblock_head, \
1023 
1024  subs W, W, #pixblock_size
1025  blt 7f
1026 
1027  /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
1030  fetch_src_pixblock
1034  subs W, W, #pixblock_size
1035  blt 2f
1036 1:
1038  subs W, W, #pixblock_size
1039  bge 1b
1040 2:
1044 7:
1045  /* Process the remaining trailing pixels in the scanline (dst aligned) */
1046  process_trailing_pixels 0, 1, \
1047  process_pixblock_head, \
1048  process_pixblock_tail, \
1049  process_pixblock_tail_head
1050 
1051  cleanup
1052 .if use_nearest_scaling != 0
1053  pop {r4-r8, pc} /* exit */
1054 .else
1055  bx lr /* exit */
1056 .endif
1057 8:
1058  /* Process the remaining trailing pixels in the scanline (dst unaligned) */
1059  process_trailing_pixels 0, 0, \
1060  process_pixblock_head, \
1061  process_pixblock_tail, \
1062  process_pixblock_tail_head
1063 
1064  cleanup
1065 
1066 .if use_nearest_scaling != 0
1067  pop {r4-r8, pc} /* exit */
1068 
1069  .unreq DST_R
1070  .unreq SRC
1071  .unreq W
1072  .unreq VX
1073  .unreq UNIT_X
1074  .unreq TMP1
1075  .unreq TMP2
1076  .unreq DST_W
1077  .unreq MASK
1078  .unreq SRC_WIDTH_FIXED
1079 
1080 .else
1081  bx lr /* exit */
1082 
1083  .unreq SRC
1084  .unreq MASK
1085  .unreq DST_R
1086  .unreq DST_W
1087  .unreq W
1088 .endif
1089 
1090  .purgem fetch_src_pixblock
1091  .purgem pixld_src
1092 
1093  .endfunc
1094 .endm
1095 
1096 .macro generate_composite_function_single_scanline x:vararg
1097  generate_composite_function_scanline 0, x
1098 .endm
1099 
1100 .macro generate_composite_function_nearest_scanline x:vararg
1101  generate_composite_function_scanline 1, x
1102 .endm
1103 
1104 /* Default prologue/epilogue, nothing special needs to be done */
1105 
1106 .macro default_init
1107 .endm
1108 
1109 .macro default_cleanup
1110 .endm
1111 
1112 /*
1113  * Prologue/epilogue variant which additionally saves/restores d8-d15
1114  * registers (they need to be saved/restored by callee according to ABI).
1115  * This is required if the code needs to use all the NEON registers.
1116  */
1117 
1118 .macro default_init_need_all_regs
1119  vpush {d8-d15}
1120 .endm
1121 
1122 .macro default_cleanup_need_all_regs
1123  vpop {d8-d15}
1124 .endm
1125 
1126 /******************************************************************************/
1127 
1128 /*
1129  * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
1130  * into a planar a8r8g8b8 format (with a, r, g, b color components
1131  * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
1132  *
1133  * Warning: the conversion is destructive and the original
1134  * value (in) is lost.
1135  */
1136 .macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
1137  vshrn.u16 out_r, in, #8
1138  vshrn.u16 out_g, in, #3
1139  vsli.u16 in, in, #5
1140  vmov.u8 out_a, #255
1141  vsri.u8 out_r, out_r, #5
1142  vsri.u8 out_g, out_g, #6
1143  vshrn.u16 out_b, in, #2
1144 .endm
1145 
1146 .macro convert_0565_to_x888 in, out_r, out_g, out_b
1147  vshrn.u16 out_r, in, #8
1148  vshrn.u16 out_g, in, #3
1149  vsli.u16 in, in, #5
1150  vsri.u8 out_r, out_r, #5
1151  vsri.u8 out_g, out_g, #6
1152  vshrn.u16 out_b, in, #2
1153 .endm
1154 
1155 /*
1156  * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
1157  * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
1158  * pixels packed in 128-bit register (out). Requires two temporary 128-bit
1159  * registers (tmp1, tmp2)
1160  */
1161 .macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
1162  vshll.u8 tmp1, in_g, #8
1163  vshll.u8 out, in_r, #8
1164  vshll.u8 tmp2, in_b, #8
1165  vsri.u16 out, tmp1, #5
1166  vsri.u16 out, tmp2, #11
1167 .endm
1168 
1169 /*
1170  * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
1171  * returned in (out0, out1) registers pair. Requires one temporary
1172  * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
1173  * value from 'in' is lost
1174  */
1175 .macro convert_four_0565_to_x888_packed in, out0, out1, tmp
1176  vshl.u16 out0, in, #5 /* G top 6 bits */
1177  vshl.u16 tmp, in, #11 /* B top 5 bits */
1178  vsri.u16 in, in, #5 /* R is ready in top bits */
1179  vsri.u16 out0, out0, #6 /* G is ready in top bits */
1180  vsri.u16 tmp, tmp, #5 /* B is ready in top bits */
1181  vshr.u16 out1, in, #8 /* R is in place */
1182  vsri.u16 out0, tmp, #8 /* G & B is in place */
1183  vzip.u16 out0, out1 /* everything is in place */
1184 .endm
static integer lr
Definition: aptex.h:532
int cmp(const void *p, const void *q)
Definition: bkmk2uni.c:1611
#define b
Definition: jpegint.h:372
long pc
Definition: disdvi.c:114
#define error(a)
Definition: dviinfo.c:48
mpz_t * f
Definition: gen-fib.c:34
#define r1
#define r7
#define r3
#define r5
#define r2
#define r4
#define r0
#define r6
#define d(n)
Definition: gpos-common.c:151
FILE * out
Definition: hbf2gf.c:286
#define local
Definition: zutil.h:30
static pdf_obj * tmp1
Definition: pdfdoc.c:76
#define macro
Definition: ctangleboot.c:153
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_tail_head[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixblock_size[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 reg3
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld endif[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 mem_operand
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_W[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst numpix
set set set set set set set macro pixldst1 reg1
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl vld1
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 reg2
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF_CTL
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add TMP1
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if VX
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld prefetch_distance[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld init[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld flags[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld MASK_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld fname[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set PREFETCH_TYPE_NONE
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst basereg
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld lowbit[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp
set set FLAG_DST_READWRITE
set set set set set set PREFETCH_TYPE_SIMPLE
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld chunk_size[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld H[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if std_increment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if mask_bpp_shift
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg vzip8(basereg+1)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_head[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF a
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_r_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld_a[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp(lowbit *8/dst_r_bpp)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC(lowbit *8/dst_w_bpp)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF x
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp pixld_s_internal(numpix *bpp/8)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_tail[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld SRC[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov TMP2
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cache_preload[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if
set FLAG_DST_WRITEONLY
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld_src[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif(lowbit *8/dst_w_bpp)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld MASK[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 reg4
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported abits
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds UNIT_X
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld SRC_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if dst_r_bpp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cleanup[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst numbytes
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if PF_X
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_R[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_r_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set FLAG_DEINTERLEAVE_32BPP
set set set set set set set PREFETCH_TYPE_ADVANCED
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if src_bpp_shift
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 idx
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_aligned_flag[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 op
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixblock_size_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 elem_size
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set ARGS_STACK_OFFSET
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld W[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cache_preload_flag[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg vuzp8(basereg+2)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixst_a[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif(lowbit *8/dst_w_bpp)
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if vars_spilled word LINE_SAVED_REGS endif subs if vars_spilled endif endif add STRIDE_D if src_bpp add STRIDE_S endif if mask_bpp add STRIDE_M endif if restore_x mov ORIG_W endif bhs loop_label ifc if vars_spilled b else b endif else process_inner_loop pixman_asm_function fname set src_bpp_ set mask_bpp_ set dst_w_bpp_ set flags_ set prefetch_distance_ if PREFETCH_TYPE_NONE else set PREFETCH_TYPE_CURRENT
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF add
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF sub
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF pld
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF mov
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF tst
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF bpl
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF adds
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK dst_bpp_shift
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF beq
static uint32_t convert_0565_to_8888(uint16_t s)
static uint16_t convert_8888_to_0565(uint32_t s)
#define DUMMY
Definition: ps_tiny.c:1
#define r8(n)
Definition: pts_fax.c:3137
#define pop()
Definition: opcodes.h:78
#define push(n)
Definition: opcodes.h:77
#define subs(p, n, t)
Definition: interp.c:107
#define str(s)
Definition: sh6.c:399
static FILE * in
Definition: squeeze.c:36
Definition: sh.h:1226
#define sp
Definition: stack.c:11
#define ip
Definition: stack.c:14