w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pixman-mips-dspr2-asm.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
30  */
31 
32 #ifndef PIXMAN_MIPS_DSPR2_ASM_H
33 #define PIXMAN_MIPS_DSPR2_ASM_H
34 
35 #define zero $0
36 #define AT $1
37 #define v0 $2
38 #define v1 $3
39 #define a0 $4
40 #define a1 $5
41 #define a2 $6
42 #define a3 $7
43 #define t0 $8
44 #define t1 $9
45 #define t2 $10
46 #define t3 $11
47 #define t4 $12
48 #define t5 $13
49 #define t6 $14
50 #define t7 $15
51 #define s0 $16
52 #define s1 $17
53 #define s2 $18
54 #define s3 $19
55 #define s4 $20
56 #define s5 $21
57 #define s6 $22
58 #define s7 $23
59 #define t8 $24
60 #define t9 $25
61 #define k0 $26
62 #define k1 $27
63 #define gp $28
64 #define sp $29
65 #define fp $30
66 #define s8 $30
67 #define ra $31
68 
69 /*
70  * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
71  */
72 #define LEAF_MIPS32R2(symbol) \
73  .globl symbol; \
74  .align 2; \
75  .hidden symbol; \
76  .type symbol, @function; \
77  .ent symbol, 0; \
78 symbol: .frame sp, 0, ra; \
79  .set push; \
80  .set arch=mips32r2; \
81  .set noreorder; \
82  .set noat;
83 
84 /*
85  * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2
86  */
87 #define LEAF_MIPS_DSPR2(symbol) \
88 LEAF_MIPS32R2(symbol) \
89  .set dspr2;
90 
91 /*
92  * END - mark end of function
93  */
94 #define END(function) \
95  .set pop; \
96  .end function; \
97  .size function,.-function
98 
99 /*
100  * Checks if stack offset is big enough for storing/restoring regs_num
101  * number of register to/from stack. Stack offset must be greater than
102  * or equal to the number of bytes needed for storing registers (regs_num*4).
103  * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
104  * preserved for input arguments of the functions, already stored in a0-a3),
105  * stack size can be further optimized by utilizing this space.
106  */
107 .macro CHECK_STACK_OFFSET regs_num, stack_offset
108 .if \stack_offset < \regs_num * 4 - 16
109 .error "Stack offset too small."
110 .endif
111 .endm
112 
113 /*
114  * Saves set of registers on stack. Maximum number of registers that
115  * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
116  * Stack offset is number of bytes that are added to stack pointer (sp)
117  * before registers are pushed in order to provide enough space on stack
118  * (offset must be multiple of 4, and must be big enough, as described by
119  * CHECK_STACK_OFFSET macro). This macro is intended to be used in
120  * combination with RESTORE_REGS_FROM_STACK macro. Example:
121  * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
122  * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
123  */
124 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
125  r2 = 0, r3 = 0, r4 = 0, \
126  r5 = 0, r6 = 0, r7 = 0, \
127  r8 = 0, r9 = 0, r10 = 0, \
128  r11 = 0, r12 = 0, r13 = 0, \
129  r14 = 0
130  .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
131  .error "Stack offset must be pozitive and multiple of 4."
132  .endif
133  .if \stack_offset != 0
134  addiu sp, sp, -\stack_offset
135  .endif
136  sw \r1, 0(sp)
137  .if \r2 != 0
138  sw \r2, 4(sp)
139  .endif
140  .if \r3 != 0
141  sw \r3, 8(sp)
142  .endif
143  .if \r4 != 0
144  sw \r4, 12(sp)
145  .endif
146  .if \r5 != 0
147  CHECK_STACK_OFFSET 5, \stack_offset
148  sw \r5, 16(sp)
149  .endif
150  .if \r6 != 0
151  CHECK_STACK_OFFSET 6, \stack_offset
152  sw \r6, 20(sp)
153  .endif
154  .if \r7 != 0
155  CHECK_STACK_OFFSET 7, \stack_offset
156  sw \r7, 24(sp)
157  .endif
158  .if \r8 != 0
159  CHECK_STACK_OFFSET 8, \stack_offset
160  sw \r8, 28(sp)
161  .endif
162  .if \r9 != 0
163  CHECK_STACK_OFFSET 9, \stack_offset
164  sw \r9, 32(sp)
165  .endif
166  .if \r10 != 0
167  CHECK_STACK_OFFSET 10, \stack_offset
168  sw \r10, 36(sp)
169  .endif
170  .if \r11 != 0
171  CHECK_STACK_OFFSET 11, \stack_offset
172  sw \r11, 40(sp)
173  .endif
174  .if \r12 != 0
175  CHECK_STACK_OFFSET 12, \stack_offset
176  sw \r12, 44(sp)
177  .endif
178  .if \r13 != 0
179  CHECK_STACK_OFFSET 13, \stack_offset
180  sw \r13, 48(sp)
181  .endif
182  .if \r14 != 0
183  CHECK_STACK_OFFSET 14, \stack_offset
184  sw \r14, 52(sp)
185  .endif
186 .endm
187 
188 /*
189  * Restores set of registers from stack. Maximum number of registers that
190  * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
191  * Stack offset is number of bytes that are added to stack pointer (sp)
192  * after registers are restored (offset must be multiple of 4, and must
193  * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
194  * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
195  * Example:
196  * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
197  * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
198  */
199 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
200  r2 = 0, r3 = 0, r4 = 0, \
201  r5 = 0, r6 = 0, r7 = 0, \
202  r8 = 0, r9 = 0, r10 = 0, \
203  r11 = 0, r12 = 0, r13 = 0, \
204  r14 = 0
205  .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
206  .error "Stack offset must be pozitive and multiple of 4."
207  .endif
208  lw \r1, 0(sp)
209  .if \r2 != 0
210  lw \r2, 4(sp)
211  .endif
212  .if \r3 != 0
213  lw \r3, 8(sp)
214  .endif
215  .if \r4 != 0
216  lw \r4, 12(sp)
217  .endif
218  .if \r5 != 0
219  CHECK_STACK_OFFSET 5, \stack_offset
220  lw \r5, 16(sp)
221  .endif
222  .if \r6 != 0
223  CHECK_STACK_OFFSET 6, \stack_offset
224  lw \r6, 20(sp)
225  .endif
226  .if \r7 != 0
227  CHECK_STACK_OFFSET 7, \stack_offset
228  lw \r7, 24(sp)
229  .endif
230  .if \r8 != 0
231  CHECK_STACK_OFFSET 8, \stack_offset
232  lw \r8, 28(sp)
233  .endif
234  .if \r9 != 0
235  CHECK_STACK_OFFSET 9, \stack_offset
236  lw \r9, 32(sp)
237  .endif
238  .if \r10 != 0
239  CHECK_STACK_OFFSET 10, \stack_offset
240  lw \r10, 36(sp)
241  .endif
242  .if \r11 != 0
243  CHECK_STACK_OFFSET 11, \stack_offset
244  lw \r11, 40(sp)
245  .endif
246  .if \r12 != 0
247  CHECK_STACK_OFFSET 12, \stack_offset
248  lw \r12, 44(sp)
249  .endif
250  .if \r13 != 0
251  CHECK_STACK_OFFSET 13, \stack_offset
252  lw \r13, 48(sp)
253  .endif
254  .if \r14 != 0
255  CHECK_STACK_OFFSET 14, \stack_offset
256  lw \r14, 52(sp)
257  .endif
258  .if \stack_offset != 0
259  addiu sp, sp, \stack_offset
260  .endif
261 .endm
262 
263 /*
264  * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
265  * returned in (out_8888) register. Requires two temporary registers
266  * (scratch1 and scratch2).
267  */
268 .macro CONVERT_1x0565_TO_1x8888 in_565, \
269  out_8888, \
270  scratch1, scratch2
271  lui \out_8888, 0xff00
272  sll \scratch1, \in_565, 0x3
273  andi \scratch2, \scratch1, 0xff
274  ext \scratch1, \in_565, 0x2, 0x3
275  or \scratch1, \scratch2, \scratch1
276  or \out_8888, \out_8888, \scratch1
277 
278  sll \scratch1, \in_565, 0x5
279  andi \scratch1, \scratch1, 0xfc00
280  srl \scratch2, \in_565, 0x1
281  andi \scratch2, \scratch2, 0x300
282  or \scratch2, \scratch1, \scratch2
283  or \out_8888, \out_8888, \scratch2
284 
285  andi \scratch1, \in_565, 0xf800
286  srl \scratch2, \scratch1, 0x5
287  andi \scratch2, \scratch2, 0xff00
288  or \scratch1, \scratch1, \scratch2
289  sll \scratch1, \scratch1, 0x8
290  or \out_8888, \out_8888, \scratch1
291 .endm
292 
293 /*
294  * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
295  * returned in (out1_8888 and out2_8888) registers. Requires four scratch
296  * registers (scratch1 ... scratch4). It also requires maskG and maskB for
297  * color component extractions. These masks must have following values:
298  * li maskG, 0x07e007e0
299  * li maskB, 0x001F001F
300  */
301 .macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \
302  out1_8888, out2_8888, \
303  maskG, maskB, \
304  scratch1, scratch2, scratch3, scratch4
305  sll \scratch1, \in1_565, 16
306  or \scratch1, \scratch1, \in2_565
307  lui \out2_8888, 0xff00
308  ori \out2_8888, \out2_8888, 0xff00
309  shrl.ph \scratch2, \scratch1, 11
310  and \scratch3, \scratch1, \maskG
311  shra.ph \scratch4, \scratch2, 2
312  shll.ph \scratch2, \scratch2, 3
313  shll.ph \scratch3, \scratch3, 5
314  or \scratch2, \scratch2, \scratch4
315  shrl.qb \scratch4, \scratch3, 6
316  or \out2_8888, \out2_8888, \scratch2
317  or \scratch3, \scratch3, \scratch4
318  and \scratch1, \scratch1, \maskB
319  shll.ph \scratch2, \scratch1, 3
320  shra.ph \scratch4, \scratch1, 2
321  or \scratch2, \scratch2, \scratch4
322  or \scratch3, \scratch2, \scratch3
323  precrq.ph.w \out1_8888, \out2_8888, \scratch3
324  precr_sra.ph.w \out2_8888, \scratch3, 0
325 .endm
326 
327 /*
328  * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
329  * returned in (out_565) register. Requires two temporary registers
330  * (scratch1 and scratch2).
331  */
332 .macro CONVERT_1x8888_TO_1x0565 in_8888, \
333  out_565, \
334  scratch1, scratch2
335  ext \out_565, \in_8888, 0x3, 0x5
336  srl \scratch1, \in_8888, 0x5
337  andi \scratch1, \scratch1, 0x07e0
338  srl \scratch2, \in_8888, 0x8
339  andi \scratch2, \scratch2, 0xf800
340  or \out_565, \out_565, \scratch1
341  or \out_565, \out_565, \scratch2
342 .endm
343 
344 /*
345  * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5
346  * pixels returned in (out1_565 and out2_565) registers. Requires two temporary
347  * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB
348  * for color component extractions. These masks must have following values:
349  * li maskR, 0xf800f800
350  * li maskG, 0x07e007e0
351  * li maskB, 0x001F001F
352  * Value of input register in2_8888 is lost.
353  */
354 .macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \
355  out1_565, out2_565, \
356  maskR, maskG, maskB, \
357  scratch1, scratch2
358  precr.qb.ph \scratch1, \in2_8888, \in1_8888
359  precrq.qb.ph \in2_8888, \in2_8888, \in1_8888
360  and \out1_565, \scratch1, \maskR
361  shrl.ph \scratch1, \scratch1, 3
362  shll.ph \in2_8888, \in2_8888, 3
363  and \scratch1, \scratch1, \maskB
364  or \out1_565, \out1_565, \scratch1
365  and \in2_8888, \in2_8888, \maskG
366  or \out1_565, \out1_565, \in2_8888
367  srl \out2_565, \out1_565, 16
368 .endm
369 
370 /*
371  * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed
372  * for rounding process. maskLSR must have following value:
373  * li maskLSR, 0x00ff00ff
374  */
375 .macro MIPS_UN8x4_MUL_UN8 s_8888, \
376  m_8, \
377  d_8888, \
378  maskLSR, \
379  scratch1, scratch2, scratch3
380  replv.ph \m_8, \m_8 /* 0 | M | 0 | M */
381  muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */
382  muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */
383  shra_r.ph \scratch3, \scratch1, 8
384  shra_r.ph \d_8888, \scratch2, 8
385  and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */
386  and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */
387  addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */
388  addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */
389  shra_r.ph \scratch1, \scratch1, 8
390  shra_r.ph \scratch2, \scratch2, 8
391  precr.qb.ph \d_8888, \scratch1, \scratch2
392 .endm
393 
394 /*
395  * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR
396  * needed for rounding process. maskLSR must have following value:
397  * li maskLSR, 0x00ff00ff
398  */
399 .macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \
400  s2_8888, \
401  m1_8, \
402  m2_8, \
403  d1_8888, \
404  d2_8888, \
405  maskLSR, \
406  scratch1, scratch2, scratch3, \
407  scratch4, scratch5, scratch6
408  replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */
409  replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */
410  muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */
411  muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */
412  muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */
413  muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */
414  shra_r.ph \scratch5, \scratch1, 8
415  shra_r.ph \d1_8888, \scratch2, 8
416  shra_r.ph \scratch6, \scratch3, 8
417  shra_r.ph \d2_8888, \scratch4, 8
418  and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */
419  and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */
420  and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */
421  and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */
422  addq.ph \scratch1, \scratch1, \scratch5
423  addq.ph \scratch2, \scratch2, \d1_8888
424  addq.ph \scratch3, \scratch3, \scratch6
425  addq.ph \scratch4, \scratch4, \d2_8888
426  shra_r.ph \scratch1, \scratch1, 8
427  shra_r.ph \scratch2, \scratch2, 8
428  shra_r.ph \scratch3, \scratch3, 8
429  shra_r.ph \scratch4, \scratch4, 8
430  precr.qb.ph \d1_8888, \scratch1, \scratch2
431  precr.qb.ph \d2_8888, \scratch3, \scratch4
432 .endm
433 
434 /*
435  * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR
436  * needed for rounding process. maskLSR must have following value:
437  * li maskLSR, 0x00ff00ff
438  */
439 .macro MIPS_UN8x4_MUL_UN8x4 s_8888, \
440  m_8888, \
441  d_8888, \
442  maskLSR, \
443  scratch1, scratch2, scratch3, scratch4
444  preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */
445  preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */
446  muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */
447  muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */
448  shra_r.ph \scratch1, \scratch3, 8
449  shra_r.ph \scratch2, \scratch4, 8
450  and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */
451  and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */
452  addq.ph \scratch1, \scratch1, \scratch3
453  addq.ph \scratch2, \scratch2, \scratch4
454  shra_r.ph \scratch1, \scratch1, 8
455  shra_r.ph \scratch2, \scratch2, 8
456  precr.qb.ph \d_8888, \scratch1, \scratch2
457 .endm
458 
459 /*
460  * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires
461  * maskLSR needed for rounding process. maskLSR must have following value:
462  * li maskLSR, 0x00ff00ff
463  */
464 
465 .macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \
466  s2_8888, \
467  m1_8888, \
468  m2_8888, \
469  d1_8888, \
470  d2_8888, \
471  maskLSR, \
472  scratch1, scratch2, scratch3, \
473  scratch4, scratch5, scratch6
474  preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */
475  preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */
476  preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */
477  preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */
478  muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */
479  muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */
480  muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */
481  muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */
482  shra_r.ph \scratch3, \scratch5, 8
483  shra_r.ph \scratch4, \scratch6, 8
484  shra_r.ph \d1_8888, \scratch1, 8
485  shra_r.ph \d2_8888, \scratch2, 8
486  and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */
487  and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */
488  and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */
489  and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */
490  addq.ph \scratch3, \scratch3, \scratch5
491  addq.ph \scratch4, \scratch4, \scratch6
492  addq.ph \d1_8888, \d1_8888, \scratch1
493  addq.ph \d2_8888, \d2_8888, \scratch2
494  shra_r.ph \scratch3, \scratch3, 8
495  shra_r.ph \scratch4, \scratch4, 8
496  shra_r.ph \scratch5, \d1_8888, 8
497  shra_r.ph \scratch6, \d2_8888, 8
498  precr.qb.ph \d1_8888, \scratch3, \scratch4
499  precr.qb.ph \d2_8888, \scratch5, \scratch6
500 .endm
501 
502 /*
503  * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
504  * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
505  * needed for rounding process. maskLSR must have following value:
506  * li maskLSR, 0x00ff00ff
507  */
508 .macro OVER_8888_8_8888 s_8888, \
509  m_8, \
510  d_8888, \
511  out_8888, \
512  maskLSR, \
513  scratch1, scratch2, scratch3, scratch4
514  MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
515  \scratch1, \maskLSR, \
516  \scratch2, \scratch3, \scratch4
517 
518  not \scratch2, \scratch1
519  srl \scratch2, \scratch2, 24
520 
521  MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \
522  \d_8888, \maskLSR, \
523  \scratch3, \scratch4, \out_8888
524 
525  addu_s.qb \out_8888, \d_8888, \scratch1
526 .endm
527 
528 /*
529  * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
530  * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and
531  * m2_8). It also requires maskLSR needed for rounding process. maskLSR must
532  * have following value:
533  * li maskLSR, 0x00ff00ff
534  */
535 .macro OVER_2x8888_2x8_2x8888 s1_8888, \
536  s2_8888, \
537  m1_8, \
538  m2_8, \
539  d1_8888, \
540  d2_8888, \
541  out1_8888, \
542  out2_8888, \
543  maskLSR, \
544  scratch1, scratch2, scratch3, \
545  scratch4, scratch5, scratch6
546  MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \
547  \m1_8, \m2_8, \
548  \scratch1, \scratch2, \
549  \maskLSR, \
550  \scratch3, \scratch4, \out1_8888, \
551  \out2_8888, \scratch5, \scratch6
552 
553  not \scratch3, \scratch1
554  srl \scratch3, \scratch3, 24
555  not \scratch4, \scratch2
556  srl \scratch4, \scratch4, 24
557 
558  MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
559  \scratch3, \scratch4, \
560  \d1_8888, \d2_8888, \
561  \maskLSR, \
562  \scratch5, \scratch6, \out1_8888, \
563  \out2_8888, \scratch3, \scratch4
564 
565  addu_s.qb \out1_8888, \d1_8888, \scratch1
566  addu_s.qb \out2_8888, \d2_8888, \scratch2
567 .endm
568 
569 /*
570  * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
571  * destination pixel (d_8888). It also requires maskLSR needed for rounding
572  * process. maskLSR must have following value:
573  * li maskLSR, 0x00ff00ff
574  */
575 .macro OVER_8888_8888 s_8888, \
576  d_8888, \
577  out_8888, \
578  maskLSR, \
579  scratch1, scratch2, scratch3, scratch4
580  not \scratch1, \s_8888
581  srl \scratch1, \scratch1, 24
582 
583  MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \
584  \out_8888, \maskLSR, \
585  \scratch2, \scratch3, \scratch4
586 
587  addu_s.qb \out_8888, \out_8888, \s_8888
588 .endm
589 
590 /*
591  * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
592  * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
593  * needed for rounding process. maskLSR must have following value:
594  * li maskLSR, 0x00ff00ff
595  */
596 .macro OVER_2x8888_2x8888 s1_8888, \
597  s2_8888, \
598  d1_8888, \
599  d2_8888, \
600  out1_8888, \
601  out2_8888, \
602  maskLSR, \
603  scratch1, scratch2, scratch3, \
604  scratch4, scratch5, scratch6
605  not \scratch1, \s1_8888
606  srl \scratch1, \scratch1, 24
607  not \scratch2, \s2_8888
608  srl \scratch2, \scratch2, 24
609  MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
610  \scratch1, \scratch2, \
611  \out1_8888, \out2_8888, \
612  \maskLSR, \
613  \scratch3, \scratch4, \scratch5, \
614  \scratch6, \d1_8888, \d2_8888
615 
616  addu_s.qb \out1_8888, \out1_8888, \s1_8888
617  addu_s.qb \out2_8888, \out2_8888, \s2_8888
618 .endm
619 
620 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
621  m_8, \
622  d_8888, \
623  out_8888, \
624  maskLSR, \
625  scratch1, scratch2, scratch3
626  MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
627  \out_8888, \maskLSR, \
628  \scratch1, \scratch2, \scratch3
629 
630  addu_s.qb \out_8888, \out_8888, \d_8888
631 .endm
632 
633 .macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \
634  s2_8888, \
635  m1_8, \
636  m2_8, \
637  d1_8888, \
638  d2_8888, \
639  out1_8888, \
640  out2_8888, \
641  maskLSR, \
642  scratch1, scratch2, scratch3, \
643  scratch4, scratch5, scratch6
644  MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \
645  \m1_8, \m2_8, \
646  \out1_8888, \out2_8888, \
647  \maskLSR, \
648  \scratch1, \scratch2, \scratch3, \
649  \scratch4, \scratch5, \scratch6
650 
651  addu_s.qb \out1_8888, \out1_8888, \d1_8888
652  addu_s.qb \out2_8888, \out2_8888, \d2_8888
653 .endm
654 
655 .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \
656  scratch1, scratch2, \
657  alpha, red, green, blue \
658  wt1, wt2, wb1, wb2
659  andi \scratch1, \tl, 0xff
660  andi \scratch2, \tr, 0xff
661  andi \alpha, \bl, 0xff
662  andi \red, \br, 0xff
663 
664  multu $ac0, \wt1, \scratch1
665  maddu $ac0, \wt2, \scratch2
666  maddu $ac0, \wb1, \alpha
667  maddu $ac0, \wb2, \red
668 
669  ext \scratch1, \tl, 8, 8
670  ext \scratch2, \tr, 8, 8
671  ext \alpha, \bl, 8, 8
672  ext \red, \br, 8, 8
673 
674  multu $ac1, \wt1, \scratch1
675  maddu $ac1, \wt2, \scratch2
676  maddu $ac1, \wb1, \alpha
677  maddu $ac1, \wb2, \red
678 
679  ext \scratch1, \tl, 16, 8
680  ext \scratch2, \tr, 16, 8
681  ext \alpha, \bl, 16, 8
682  ext \red, \br, 16, 8
683 
684  mflo \blue, $ac0
685 
686  multu $ac2, \wt1, \scratch1
687  maddu $ac2, \wt2, \scratch2
688  maddu $ac2, \wb1, \alpha
689  maddu $ac2, \wb2, \red
690 
691  ext \scratch1, \tl, 24, 8
692  ext \scratch2, \tr, 24, 8
693  ext \alpha, \bl, 24, 8
694  ext \red, \br, 24, 8
695 
696  mflo \green, $ac1
697 
698  multu $ac3, \wt1, \scratch1
699  maddu $ac3, \wt2, \scratch2
700  maddu $ac3, \wb1, \alpha
701  maddu $ac3, \wb2, \red
702 
703  mflo \red, $ac2
704  mflo \alpha, $ac3
705 
706  precr.qb.ph \alpha, \alpha, \red
707  precr.qb.ph \scratch1, \green, \blue
708  precrq.qb.ph \tl, \alpha, \scratch1
709 .endm
710 
711 #endif //PIXMAN_MIPS_DSPR2_ASM_H
#define blue
Definition: backend_eps.c:37
#define green
Definition: backend_eps.c:36
#define red
Definition: backend_eps.c:35
#define r1
#define r7
#define r3
#define r5
#define r2
#define r4
#define r6
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld endif[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if
#define sp
macro CHECK_STACK_OFFSET regs_num
static enum alpha_handling alpha
Definition: pngtopnm.c:70
#define r8(n)
Definition: pts_fax.c:3137