w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pixman-vmx.c
Go to the documentation of this file.
1 /*
2  * Copyright © 2007 Luca Barbato
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of Luca Barbato not be used in advertising or
9  * publicity pertaining to distribution of the software without specific,
10  * written prior permission. Luca Barbato makes no representations about the
11  * suitability of this software for any purpose. It is provided "as is"
12  * without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author: Luca Barbato (lu_zero@gentoo.org)
24  *
25  * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34 #include <altivec.h>
35 
36 #define AVV(x...) {x}
37 
38 static vector unsigned int mask_ff000000;
39 static vector unsigned int mask_red;
40 static vector unsigned int mask_green;
41 static vector unsigned int mask_blue;
42 static vector unsigned int mask_565_fix_rb;
43 static vector unsigned int mask_565_fix_g;
44 
45 static force_inline vector unsigned int
46 splat_alpha (vector unsigned int pix)
47 {
48 #ifdef WORDS_BIGENDIAN
49  return vec_perm (pix, pix,
50  (vector unsigned char)AVV (
51  0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
52  0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
53 #else
54  return vec_perm (pix, pix,
55  (vector unsigned char)AVV (
56  0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,
57  0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));
58 #endif
59 }
60 
61 static force_inline vector unsigned int
62 splat_pixel (vector unsigned int pix)
63 {
64  return vec_perm (pix, pix,
65  (vector unsigned char)AVV (
66  0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
67  0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03));
68 }
69 
70 static force_inline vector unsigned int
71 pix_multiply (vector unsigned int p, vector unsigned int a)
72 {
73  vector unsigned short hi, lo, mod;
74 
75  /* unpack to short */
76  hi = (vector unsigned short)
77 #ifdef WORDS_BIGENDIAN
78  vec_mergeh ((vector unsigned char)AVV (0),
79  (vector unsigned char)p);
80 #else
81  vec_mergeh ((vector unsigned char) p,
82  (vector unsigned char) AVV (0));
83 #endif
84 
85  mod = (vector unsigned short)
86 #ifdef WORDS_BIGENDIAN
87  vec_mergeh ((vector unsigned char)AVV (0),
88  (vector unsigned char)a);
89 #else
90  vec_mergeh ((vector unsigned char) a,
91  (vector unsigned char) AVV (0));
92 #endif
93 
94  hi = vec_mladd (hi, mod, (vector unsigned short)
95  AVV (0x0080, 0x0080, 0x0080, 0x0080,
96  0x0080, 0x0080, 0x0080, 0x0080));
97 
98  hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
99 
100  hi = vec_sr (hi, vec_splat_u16 (8));
101 
102  /* unpack to short */
103  lo = (vector unsigned short)
104 #ifdef WORDS_BIGENDIAN
105  vec_mergel ((vector unsigned char)AVV (0),
106  (vector unsigned char)p);
107 #else
108  vec_mergel ((vector unsigned char) p,
109  (vector unsigned char) AVV (0));
110 #endif
111 
112  mod = (vector unsigned short)
113 #ifdef WORDS_BIGENDIAN
114  vec_mergel ((vector unsigned char)AVV (0),
115  (vector unsigned char)a);
116 #else
117  vec_mergel ((vector unsigned char) a,
118  (vector unsigned char) AVV (0));
119 #endif
120 
121  lo = vec_mladd (lo, mod, (vector unsigned short)
122  AVV (0x0080, 0x0080, 0x0080, 0x0080,
123  0x0080, 0x0080, 0x0080, 0x0080));
124 
125  lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
126 
127  lo = vec_sr (lo, vec_splat_u16 (8));
128 
129  return (vector unsigned int)vec_packsu (hi, lo);
130 }
131 
132 static force_inline vector unsigned int
133 pix_add (vector unsigned int a, vector unsigned int b)
134 {
135  return (vector unsigned int)vec_adds ((vector unsigned char)a,
136  (vector unsigned char)b);
137 }
138 
139 static force_inline vector unsigned int
140 pix_add_mul (vector unsigned int x,
141  vector unsigned int a,
142  vector unsigned int y,
143  vector unsigned int b)
144 {
145  vector unsigned int t1, t2;
146 
147  t1 = pix_multiply (x, a);
148  t2 = pix_multiply (y, b);
149 
150  return pix_add (t1, t2);
151 }
152 
153 static force_inline vector unsigned int
154 negate (vector unsigned int src)
155 {
156  return vec_nor (src, src);
157 }
158 
159 /* dest*~srca + src */
160 static force_inline vector unsigned int
161 over (vector unsigned int src,
162  vector unsigned int srca,
163  vector unsigned int dest)
164 {
165  vector unsigned char tmp = (vector unsigned char)
166  pix_multiply (dest, negate (srca));
167 
168  tmp = vec_adds ((vector unsigned char)src, tmp);
169  return (vector unsigned int)tmp;
170 }
171 
172 /* in == pix_multiply */
173 #define in_over(src, srca, mask, dest) \
174  over (pix_multiply (src, mask), \
175  pix_multiply (srca, mask), dest)
176 
177 #ifdef WORDS_BIGENDIAN
178 
179 #define COMPUTE_SHIFT_MASK(source) \
180  source ## _mask = vec_lvsl (0, source);
181 
182 #define COMPUTE_SHIFT_MASKS(dest, source) \
183  source ## _mask = vec_lvsl (0, source);
184 
185 #define COMPUTE_SHIFT_MASKC(dest, source, mask) \
186  mask ## _mask = vec_lvsl (0, mask); \
187  source ## _mask = vec_lvsl (0, source);
188 
189 #define LOAD_VECTOR(source) \
190 do \
191 { \
192  vector unsigned char tmp1, tmp2; \
193  tmp1 = (typeof(tmp1))vec_ld (0, source); \
194  tmp2 = (typeof(tmp2))vec_ld (15, source); \
195  v ## source = (typeof(v ## source)) \
196  vec_perm (tmp1, tmp2, source ## _mask); \
197 } while (0)
198 
199 #define LOAD_VECTORS(dest, source) \
200 do \
201 { \
202  LOAD_VECTOR(source); \
203  v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
204 } while (0)
205 
206 #define LOAD_VECTORSC(dest, source, mask) \
207 do \
208 { \
209  LOAD_VECTORS(dest, source); \
210  LOAD_VECTOR(mask); \
211 } while (0)
212 
213 #define DECLARE_SRC_MASK_VAR vector unsigned char src_mask
214 #define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask
215 
216 #else
217 
218 /* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
219  * They are defined that way because little endian altivec can do unaligned
220  * reads natively and have no need for constructing the permutation pattern
221  * variables.
222  */
223 #define COMPUTE_SHIFT_MASK(source)
224 
225 #define COMPUTE_SHIFT_MASKS(dest, source)
226 
227 #define COMPUTE_SHIFT_MASKC(dest, source, mask)
228 
229 # define LOAD_VECTOR(source) \
230  v ## source = (typeof(v ## source))vec_xl(0, source);
231 
232 # define LOAD_VECTORS(dest, source) \
233  LOAD_VECTOR(source); \
234  LOAD_VECTOR(dest); \
235 
236 # define LOAD_VECTORSC(dest, source, mask) \
237  LOAD_VECTORS(dest, source); \
238  LOAD_VECTOR(mask); \
239 
240 #define DECLARE_SRC_MASK_VAR
241 #define DECLARE_MASK_MASK_VAR
242 
243 #endif /* WORDS_BIGENDIAN */
244 
245 #define LOAD_VECTORSM(dest, source, mask) \
246  LOAD_VECTORSC (dest, source, mask); \
247  v ## source = pix_multiply (v ## source, \
248  splat_alpha (v ## mask));
249 
250 #define STORE_VECTOR(dest) \
251  vec_st ((vector unsigned int) v ## dest, 0, dest);
252 
253 /* load 4 pixels from a 16-byte boundary aligned address */
254 static force_inline vector unsigned int
256 {
257  return *((vector unsigned int *) src);
258 }
259 
260 /* load 4 pixels from a unaligned address */
261 static force_inline vector unsigned int
263 {
264  vector unsigned int vsrc;
266 
268  LOAD_VECTOR (src);
269 
270  return vsrc;
271 }
272 
273 /* save 4 pixels on a 16-byte boundary aligned address */
274 static force_inline void
276  vector unsigned int vdata)
277 {
279 }
280 
281 static force_inline vector unsigned int
283 {
284  vector unsigned int vsrc;
286 
288  LOAD_VECTOR (src);
289  return vec_splat(vsrc, 0);
290 }
291 
292 static force_inline vector unsigned int
294 {
295  return create_mask_1x32_128(&mask);
296 }
297 
298 static force_inline vector unsigned int
299 unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
300 {
301  vector unsigned char lo;
302 
303  /* unpack to short */
304  lo = (vector unsigned char)
305 #ifdef WORDS_BIGENDIAN
306  vec_mergel ((vector unsigned char) data2,
307  (vector unsigned char) data1);
308 #else
309  vec_mergel ((vector unsigned char) data1,
310  (vector unsigned char) data2);
311 #endif
312 
313  return (vector unsigned int) lo;
314 }
315 
316 static force_inline vector unsigned int
317 unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
318 {
319  vector unsigned char hi;
320 
321  /* unpack to short */
322  hi = (vector unsigned char)
323 #ifdef WORDS_BIGENDIAN
324  vec_mergeh ((vector unsigned char) data2,
325  (vector unsigned char) data1);
326 #else
327  vec_mergeh ((vector unsigned char) data1,
328  (vector unsigned char) data2);
329 #endif
330 
331  return (vector unsigned int) hi;
332 }
333 
334 static force_inline vector unsigned int
335 unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
336 {
337  vector unsigned short lo;
338 
339  /* unpack to char */
340  lo = (vector unsigned short)
341 #ifdef WORDS_BIGENDIAN
342  vec_mergel ((vector unsigned short) data2,
343  (vector unsigned short) data1);
344 #else
345  vec_mergel ((vector unsigned short) data1,
346  (vector unsigned short) data2);
347 #endif
348 
349  return (vector unsigned int) lo;
350 }
351 
352 static force_inline vector unsigned int
353 unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
354 {
355  vector unsigned short hi;
356 
357  /* unpack to char */
358  hi = (vector unsigned short)
359 #ifdef WORDS_BIGENDIAN
360  vec_mergeh ((vector unsigned short) data2,
361  (vector unsigned short) data1);
362 #else
363  vec_mergeh ((vector unsigned short) data1,
364  (vector unsigned short) data2);
365 #endif
366 
367  return (vector unsigned int) hi;
368 }
369 
370 static force_inline void
371 unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
372  vector unsigned int* data_lo, vector unsigned int* data_hi)
373 {
374  *data_lo = unpacklo_128_16x8(data1, data2);
375  *data_hi = unpackhi_128_16x8(data1, data2);
376 }
377 
378 static force_inline void
379 unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
380  vector unsigned int* data_lo, vector unsigned int* data_hi)
381 {
382  *data_lo = unpacklo_128_8x16(data1, data2);
383  *data_hi = unpackhi_128_8x16(data1, data2);
384 }
385 
386 static force_inline vector unsigned int
387 unpack_565_to_8888 (vector unsigned int lo)
388 {
389  vector unsigned int r, g, b, rb, t;
390 
391  r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
392  g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
393  b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
394 
395  rb = vec_or (r, b);
396  t = vec_and (rb, mask_565_fix_rb);
397  t = vec_sr (t, create_mask_32_128(5));
398  rb = vec_or (rb, t);
399 
400  t = vec_and (g, mask_565_fix_g);
401  t = vec_sr (t, create_mask_32_128(6));
402  g = vec_or (g, t);
403 
404  return vec_or (rb, g);
405 }
406 
407 static force_inline int
408 is_opaque (vector unsigned int x)
409 {
410  uint32_t cmp_result;
411  vector bool int ffs = vec_cmpeq(x, x);
412 
413  cmp_result = vec_all_eq(x, ffs);
414 
415  return (cmp_result & 0x8888) == 0x8888;
416 }
417 
418 static force_inline int
419 is_zero (vector unsigned int x)
420 {
421  uint32_t cmp_result;
422 
423  cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
424 
425  return cmp_result == 0xffff;
426 }
427 
428 static force_inline int
429 is_transparent (vector unsigned int x)
430 {
431  uint32_t cmp_result;
432 
433  cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
434  return (cmp_result & 0x8888) == 0x8888;
435 }
436 
437 static force_inline uint32_t
439 {
440  uint32_t a;
441 
442  a = ALPHA_8(src);
443 
444  if (a == 0xff)
445  {
446  return src;
447  }
448  else if (src)
449  {
451  }
452 
453  return dst;
454 }
455 
456 static force_inline uint32_t
457 combine1 (const uint32_t *ps, const uint32_t *pm)
458 {
459  uint32_t s = *ps;
460 
461  if (pm)
462  UN8x4_MUL_UN8(s, ALPHA_8(*pm));
463 
464  return s;
465 }
466 
467 static force_inline vector unsigned int
468 combine4 (const uint32_t* ps, const uint32_t* pm)
469 {
470  vector unsigned int src, msk;
471 
472  if (pm)
473  {
474  msk = load_128_unaligned(pm);
475 
476  if (is_transparent(msk))
477  return (vector unsigned int) AVV(0);
478  }
479 
481 
482  if (pm)
483  src = pix_multiply(src, msk);
484 
485  return src;
486 }
487 
488 static void
490  const uint32_t *src,
491  int width)
492 {
493  int i;
494  vector unsigned int vdest, vsrc;
496 
497  while (width && ((uintptr_t)dest & 15))
498  {
499  uint32_t s = *src++;
500  uint32_t d = *dest;
501  uint32_t ia = ALPHA_8 (~s);
502 
503  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
504 
505  *dest++ = d;
506  width--;
507  }
508 
510 
511  /* printf ("%s\n",__PRETTY_FUNCTION__); */
512  for (i = width / 4; i > 0; i--)
513  {
514 
515  LOAD_VECTORS (dest, src);
516 
517  vdest = over (vsrc, splat_alpha (vsrc), vdest);
518 
519  STORE_VECTOR (dest);
520 
521  src += 4;
522  dest += 4;
523  }
524 
525  for (i = width % 4; --i >= 0;)
526  {
527  uint32_t s = src[i];
528  uint32_t d = dest[i];
529  uint32_t ia = ALPHA_8 (~s);
530 
531  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
532 
533  dest[i] = d;
534  }
535 }
536 
537 static void
539  const uint32_t *src,
540  const uint32_t *mask,
541  int width)
542 {
543  int i;
544  vector unsigned int vdest, vsrc, vmask;
547 
548  while (width && ((uintptr_t)dest & 15))
549  {
550  uint32_t m = ALPHA_8 (*mask++);
551  uint32_t s = *src++;
552  uint32_t d = *dest;
553  uint32_t ia;
554 
555  UN8x4_MUL_UN8 (s, m);
556 
557  ia = ALPHA_8 (~s);
558 
559  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
560  *dest++ = d;
561  width--;
562  }
563 
565 
566  /* printf ("%s\n",__PRETTY_FUNCTION__); */
567  for (i = width / 4; i > 0; i--)
568  {
570 
571  vdest = over (vsrc, splat_alpha (vsrc), vdest);
572 
573  STORE_VECTOR (dest);
574 
575  src += 4;
576  dest += 4;
577  mask += 4;
578  }
579 
580  for (i = width % 4; --i >= 0;)
581  {
582  uint32_t m = ALPHA_8 (mask[i]);
583  uint32_t s = src[i];
584  uint32_t d = dest[i];
585  uint32_t ia;
586 
587  UN8x4_MUL_UN8 (s, m);
588 
589  ia = ALPHA_8 (~s);
590 
591  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
592  dest[i] = d;
593  }
594 }
595 
596 static void
598  pixman_op_t op,
599  uint32_t * dest,
600  const uint32_t * src,
601  const uint32_t * mask,
602  int width)
603 {
604  if (mask)
606  else
608 }
609 
610 static void
612  const uint32_t *src,
613  int width)
614 {
615  int i;
616  vector unsigned int vdest, vsrc;
618 
619  while (width && ((uintptr_t)dest & 15))
620  {
621  uint32_t s = *src++;
622  uint32_t d = *dest;
623  uint32_t ia = ALPHA_8 (~d);
624 
625  UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
626  *dest++ = s;
627  width--;
628  }
629 
631 
632  /* printf ("%s\n",__PRETTY_FUNCTION__); */
633  for (i = width / 4; i > 0; i--)
634  {
635 
636  LOAD_VECTORS (dest, src);
637 
638  vdest = over (vdest, splat_alpha (vdest), vsrc);
639 
640  STORE_VECTOR (dest);
641 
642  src += 4;
643  dest += 4;
644  }
645 
646  for (i = width % 4; --i >= 0;)
647  {
648  uint32_t s = src[i];
649  uint32_t d = dest[i];
650  uint32_t ia = ALPHA_8 (~dest[i]);
651 
652  UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
653  dest[i] = s;
654  }
655 }
656 
657 static void
659  const uint32_t *src,
660  const uint32_t *mask,
661  int width)
662 {
663  int i;
664  vector unsigned int vdest, vsrc, vmask;
667 
668  while (width && ((uintptr_t)dest & 15))
669  {
670  uint32_t m = ALPHA_8 (*mask++);
671  uint32_t s = *src++;
672  uint32_t d = *dest;
673  uint32_t ia = ALPHA_8 (~d);
674 
675  UN8x4_MUL_UN8 (s, m);
676 
677  UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
678  *dest++ = s;
679  width--;
680  }
681 
683 
684  /* printf ("%s\n",__PRETTY_FUNCTION__); */
685  for (i = width / 4; i > 0; i--)
686  {
687 
689 
690  vdest = over (vdest, splat_alpha (vdest), vsrc);
691 
692  STORE_VECTOR (dest);
693 
694  src += 4;
695  dest += 4;
696  mask += 4;
697  }
698 
699  for (i = width % 4; --i >= 0;)
700  {
701  uint32_t m = ALPHA_8 (mask[i]);
702  uint32_t s = src[i];
703  uint32_t d = dest[i];
704  uint32_t ia = ALPHA_8 (~dest[i]);
705 
706  UN8x4_MUL_UN8 (s, m);
707 
708  UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
709  dest[i] = s;
710  }
711 }
712 
713 static void
715  pixman_op_t op,
716  uint32_t * dest,
717  const uint32_t * src,
718  const uint32_t * mask,
719  int width)
720 {
721  if (mask)
723  else
725 }
726 
727 static void
729  const uint32_t *src,
730  int width)
731 {
732  int i;
733  vector unsigned int vdest, vsrc;
735 
736  while (width && ((uintptr_t)dest & 15))
737  {
738  uint32_t s = *src++;
739  uint32_t a = ALPHA_8 (*dest);
740 
741  UN8x4_MUL_UN8 (s, a);
742  *dest++ = s;
743  width--;
744  }
745 
747 
748  /* printf ("%s\n",__PRETTY_FUNCTION__); */
749  for (i = width / 4; i > 0; i--)
750  {
751  LOAD_VECTORS (dest, src);
752 
753  vdest = pix_multiply (vsrc, splat_alpha (vdest));
754 
755  STORE_VECTOR (dest);
756 
757  src += 4;
758  dest += 4;
759  }
760 
761  for (i = width % 4; --i >= 0;)
762  {
763  uint32_t s = src[i];
764  uint32_t a = ALPHA_8 (dest[i]);
765 
766  UN8x4_MUL_UN8 (s, a);
767  dest[i] = s;
768  }
769 }
770 
771 static void
773  const uint32_t *src,
774  const uint32_t *mask,
775  int width)
776 {
777  int i;
778  vector unsigned int vdest, vsrc, vmask;
781 
782  while (width && ((uintptr_t)dest & 15))
783  {
784  uint32_t m = ALPHA_8 (*mask++);
785  uint32_t s = *src++;
786  uint32_t a = ALPHA_8 (*dest);
787 
788  UN8x4_MUL_UN8 (s, m);
789  UN8x4_MUL_UN8 (s, a);
790 
791  *dest++ = s;
792  width--;
793  }
794 
796 
797  /* printf ("%s\n",__PRETTY_FUNCTION__); */
798  for (i = width / 4; i > 0; i--)
799  {
801 
802  vdest = pix_multiply (vsrc, splat_alpha (vdest));
803 
804  STORE_VECTOR (dest);
805 
806  src += 4;
807  dest += 4;
808  mask += 4;
809  }
810 
811  for (i = width % 4; --i >= 0;)
812  {
813  uint32_t m = ALPHA_8 (mask[i]);
814  uint32_t s = src[i];
815  uint32_t a = ALPHA_8 (dest[i]);
816 
817  UN8x4_MUL_UN8 (s, m);
818  UN8x4_MUL_UN8 (s, a);
819 
820  dest[i] = s;
821  }
822 }
823 
824 static void
826  pixman_op_t op,
827  uint32_t * dest,
828  const uint32_t * src,
829  const uint32_t * mask,
830  int width)
831 {
832  if (mask)
834  else
836 }
837 
838 static void
840  const uint32_t *src,
841  int width)
842 {
843  int i;
844  vector unsigned int vdest, vsrc;
846 
847  while (width && ((uintptr_t)dest & 15))
848  {
849  uint32_t d = *dest;
850  uint32_t a = ALPHA_8 (*src++);
851 
852  UN8x4_MUL_UN8 (d, a);
853 
854  *dest++ = d;
855  width--;
856  }
857 
859 
860  /* printf ("%s\n",__PRETTY_FUNCTION__); */
861  for (i = width / 4; i > 0; i--)
862  {
863  LOAD_VECTORS (dest, src);
864 
865  vdest = pix_multiply (vdest, splat_alpha (vsrc));
866 
867  STORE_VECTOR (dest);
868 
869  src += 4;
870  dest += 4;
871  }
872 
873  for (i = width % 4; --i >= 0;)
874  {
875  uint32_t d = dest[i];
876  uint32_t a = ALPHA_8 (src[i]);
877 
878  UN8x4_MUL_UN8 (d, a);
879 
880  dest[i] = d;
881  }
882 }
883 
884 static void
886  const uint32_t *src,
887  const uint32_t *mask,
888  int width)
889 {
890  int i;
891  vector unsigned int vdest, vsrc, vmask;
894 
895  while (width && ((uintptr_t)dest & 15))
896  {
897  uint32_t m = ALPHA_8 (*mask++);
898  uint32_t d = *dest;
899  uint32_t a = *src++;
900 
901  UN8x4_MUL_UN8 (a, m);
902  a = ALPHA_8 (a);
903  UN8x4_MUL_UN8 (d, a);
904 
905  *dest++ = d;
906  width--;
907  }
908 
910 
911  /* printf ("%s\n",__PRETTY_FUNCTION__); */
912  for (i = width / 4; i > 0; i--)
913  {
915 
916  vdest = pix_multiply (vdest, splat_alpha (vsrc));
917 
918  STORE_VECTOR (dest);
919 
920  src += 4;
921  dest += 4;
922  mask += 4;
923  }
924 
925  for (i = width % 4; --i >= 0;)
926  {
927  uint32_t m = ALPHA_8 (mask[i]);
928  uint32_t d = dest[i];
929  uint32_t a = src[i];
930 
931  UN8x4_MUL_UN8 (a, m);
932  a = ALPHA_8 (a);
933  UN8x4_MUL_UN8 (d, a);
934 
935  dest[i] = d;
936  }
937 }
938 
939 static void
941  pixman_op_t op,
942  uint32_t * dest,
943  const uint32_t * src,
944  const uint32_t * mask,
945  int width)
946 {
947  if (mask)
949  else
951 }
952 
953 static void
955  const uint32_t *src,
956  int width)
957 {
958  int i;
959  vector unsigned int vdest, vsrc;
961 
962  while (width && ((uintptr_t)dest & 15))
963  {
964  uint32_t s = *src++;
965  uint32_t a = ALPHA_8 (~(*dest));
966 
967  UN8x4_MUL_UN8 (s, a);
968 
969  *dest++ = s;
970  width--;
971  }
972 
974 
975  /* printf ("%s\n",__PRETTY_FUNCTION__); */
976  for (i = width / 4; i > 0; i--)
977  {
978  LOAD_VECTORS (dest, src);
979 
980  vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
981 
982  STORE_VECTOR (dest);
983 
984  src += 4;
985  dest += 4;
986  }
987 
988  for (i = width % 4; --i >= 0;)
989  {
990  uint32_t s = src[i];
991  uint32_t a = ALPHA_8 (~dest[i]);
992 
993  UN8x4_MUL_UN8 (s, a);
994 
995  dest[i] = s;
996  }
997 }
998 
999 static void
1001  const uint32_t *src,
1002  const uint32_t *mask,
1003  int width)
1004 {
1005  int i;
1006  vector unsigned int vdest, vsrc, vmask;
1009 
1010  while (width && ((uintptr_t)dest & 15))
1011  {
1012  uint32_t m = ALPHA_8 (*mask++);
1013  uint32_t s = *src++;
1014  uint32_t a = ALPHA_8 (~(*dest));
1015 
1016  UN8x4_MUL_UN8 (s, m);
1017  UN8x4_MUL_UN8 (s, a);
1018 
1019  *dest++ = s;
1020  width--;
1021  }
1022 
1024 
1025  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1026  for (i = width / 4; i > 0; i--)
1027  {
1028  LOAD_VECTORSM (dest, src, mask);
1029 
1030  vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
1031 
1032  STORE_VECTOR (dest);
1033 
1034  src += 4;
1035  dest += 4;
1036  mask += 4;
1037  }
1038 
1039  for (i = width % 4; --i >= 0;)
1040  {
1041  uint32_t m = ALPHA_8 (mask[i]);
1042  uint32_t s = src[i];
1043  uint32_t a = ALPHA_8 (~dest[i]);
1044 
1045  UN8x4_MUL_UN8 (s, m);
1046  UN8x4_MUL_UN8 (s, a);
1047 
1048  dest[i] = s;
1049  }
1050 }
1051 
1052 static void
1054  pixman_op_t op,
1055  uint32_t * dest,
1056  const uint32_t * src,
1057  const uint32_t * mask,
1058  int width)
1059 {
1060  if (mask)
1062  else
1064 }
1065 
1066 static void
1068  const uint32_t *src,
1069  int width)
1070 {
1071  int i;
1072  vector unsigned int vdest, vsrc;
1074 
1075  while (width && ((uintptr_t)dest & 15))
1076  {
1077  uint32_t d = *dest;
1078  uint32_t a = ALPHA_8 (~(*src++));
1079 
1080  UN8x4_MUL_UN8 (d, a);
1081 
1082  *dest++ = d;
1083  width--;
1084  }
1085 
1087 
1088  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1089  for (i = width / 4; i > 0; i--)
1090  {
1091 
1092  LOAD_VECTORS (dest, src);
1093 
1094  vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
1095 
1096  STORE_VECTOR (dest);
1097 
1098  src += 4;
1099  dest += 4;
1100  }
1101 
1102  for (i = width % 4; --i >= 0;)
1103  {
1104  uint32_t d = dest[i];
1105  uint32_t a = ALPHA_8 (~src[i]);
1106 
1107  UN8x4_MUL_UN8 (d, a);
1108 
1109  dest[i] = d;
1110  }
1111 }
1112 
1113 static void
1115  const uint32_t *src,
1116  const uint32_t *mask,
1117  int width)
1118 {
1119  int i;
1120  vector unsigned int vdest, vsrc, vmask;
1123 
1124  while (width && ((uintptr_t)dest & 15))
1125  {
1126  uint32_t m = ALPHA_8 (*mask++);
1127  uint32_t d = *dest;
1128  uint32_t a = *src++;
1129 
1130  UN8x4_MUL_UN8 (a, m);
1131  a = ALPHA_8 (~a);
1132  UN8x4_MUL_UN8 (d, a);
1133 
1134  *dest++ = d;
1135  width--;
1136  }
1137 
1139 
1140  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1141  for (i = width / 4; i > 0; i--)
1142  {
1143  LOAD_VECTORSM (dest, src, mask);
1144 
1145  vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
1146 
1147  STORE_VECTOR (dest);
1148 
1149  src += 4;
1150  dest += 4;
1151  mask += 4;
1152  }
1153 
1154  for (i = width % 4; --i >= 0;)
1155  {
1156  uint32_t m = ALPHA_8 (mask[i]);
1157  uint32_t d = dest[i];
1158  uint32_t a = src[i];
1159 
1160  UN8x4_MUL_UN8 (a, m);
1161  a = ALPHA_8 (~a);
1162  UN8x4_MUL_UN8 (d, a);
1163 
1164  dest[i] = d;
1165  }
1166 }
1167 
1168 static void
1170  pixman_op_t op,
1171  uint32_t * dest,
1172  const uint32_t * src,
1173  const uint32_t * mask,
1174  int width)
1175 {
1176  if (mask)
1178  else
1180 }
1181 
1182 static void
1184  const uint32_t *src,
1185  int width)
1186 {
1187  int i;
1188  vector unsigned int vdest, vsrc;
1190 
1191  while (width && ((uintptr_t)dest & 15))
1192  {
1193  uint32_t s = *src++;
1194  uint32_t d = *dest;
1195  uint32_t dest_a = ALPHA_8 (d);
1196  uint32_t src_ia = ALPHA_8 (~s);
1197 
1198  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
1199 
1200  *dest++ = s;
1201  width--;
1202  }
1203 
1205 
1206  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1207  for (i = width / 4; i > 0; i--)
1208  {
1209  LOAD_VECTORS (dest, src);
1210 
1211  vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1212  vdest, splat_alpha (negate (vsrc)));
1213 
1214  STORE_VECTOR (dest);
1215 
1216  src += 4;
1217  dest += 4;
1218  }
1219 
1220  for (i = width % 4; --i >= 0;)
1221  {
1222  uint32_t s = src[i];
1223  uint32_t d = dest[i];
1224  uint32_t dest_a = ALPHA_8 (d);
1225  uint32_t src_ia = ALPHA_8 (~s);
1226 
1227  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
1228 
1229  dest[i] = s;
1230  }
1231 }
1232 
1233 static void
1235  const uint32_t *src,
1236  const uint32_t *mask,
1237  int width)
1238 {
1239  int i;
1240  vector unsigned int vdest, vsrc, vmask;
1243 
1244  while (width && ((uintptr_t)dest & 15))
1245  {
1246  uint32_t m = ALPHA_8 (*mask++);
1247  uint32_t s = *src++;
1248  uint32_t d = *dest;
1249  uint32_t dest_a = ALPHA_8 (d);
1250  uint32_t src_ia;
1251 
1252  UN8x4_MUL_UN8 (s, m);
1253 
1254  src_ia = ALPHA_8 (~s);
1255 
1256  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
1257 
1258  *dest++ = s;
1259  width--;
1260  }
1261 
1263 
1264  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1265  for (i = width / 4; i > 0; i--)
1266  {
1267  LOAD_VECTORSM (dest, src, mask);
1268 
1269  vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1270  vdest, splat_alpha (negate (vsrc)));
1271 
1272  STORE_VECTOR (dest);
1273 
1274  src += 4;
1275  dest += 4;
1276  mask += 4;
1277  }
1278 
1279  for (i = width % 4; --i >= 0;)
1280  {
1281  uint32_t m = ALPHA_8 (mask[i]);
1282  uint32_t s = src[i];
1283  uint32_t d = dest[i];
1284  uint32_t dest_a = ALPHA_8 (d);
1285  uint32_t src_ia;
1286 
1287  UN8x4_MUL_UN8 (s, m);
1288 
1289  src_ia = ALPHA_8 (~s);
1290 
1291  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
1292 
1293  dest[i] = s;
1294  }
1295 }
1296 
1297 static void
1299  pixman_op_t op,
1300  uint32_t * dest,
1301  const uint32_t * src,
1302  const uint32_t * mask,
1303  int width)
1304 {
1305  if (mask)
1307  else
1309 }
1310 
1311 static void
1313  const uint32_t *src,
1314  int width)
1315 {
1316  int i;
1317  vector unsigned int vdest, vsrc;
1319 
1320  while (width && ((uintptr_t)dest & 15))
1321  {
1322  uint32_t s = *src++;
1323  uint32_t d = *dest;
1324  uint32_t src_a = ALPHA_8 (s);
1325  uint32_t dest_ia = ALPHA_8 (~d);
1326 
1327  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1328 
1329  *dest++ = s;
1330  width--;
1331  }
1332 
1334 
1335  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1336  for (i = width / 4; i > 0; i--)
1337  {
1338  LOAD_VECTORS (dest, src);
1339 
1340  vdest = pix_add_mul (vdest, splat_alpha (vsrc),
1341  vsrc, splat_alpha (negate (vdest)));
1342 
1343  STORE_VECTOR (dest);
1344 
1345  src += 4;
1346  dest += 4;
1347  }
1348 
1349  for (i = width % 4; --i >= 0;)
1350  {
1351  uint32_t s = src[i];
1352  uint32_t d = dest[i];
1353  uint32_t src_a = ALPHA_8 (s);
1354  uint32_t dest_ia = ALPHA_8 (~d);
1355 
1356  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1357 
1358  dest[i] = s;
1359  }
1360 }
1361 
1362 static void
1364  const uint32_t *src,
1365  const uint32_t *mask,
1366  int width)
1367 {
1368  int i;
1369  vector unsigned int vdest, vsrc, vmask;
1372 
1373  while (width && ((uintptr_t)dest & 15))
1374  {
1375  uint32_t m = ALPHA_8 (*mask++);
1376  uint32_t s = *src++;
1377  uint32_t d = *dest;
1378  uint32_t src_a;
1379  uint32_t dest_ia = ALPHA_8 (~d);
1380 
1381  UN8x4_MUL_UN8 (s, m);
1382 
1383  src_a = ALPHA_8 (s);
1384 
1385  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1386 
1387  *dest++ = s;
1388  width--;
1389  }
1390 
1392 
1393  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1394  for (i = width / 4; i > 0; i--)
1395  {
1396  LOAD_VECTORSM (dest, src, mask);
1397 
1398  vdest = pix_add_mul (vdest, splat_alpha (vsrc),
1399  vsrc, splat_alpha (negate (vdest)));
1400 
1401  STORE_VECTOR (dest);
1402 
1403  src += 4;
1404  dest += 4;
1405  mask += 4;
1406  }
1407 
1408  for (i = width % 4; --i >= 0;)
1409  {
1410  uint32_t m = ALPHA_8 (mask[i]);
1411  uint32_t s = src[i];
1412  uint32_t d = dest[i];
1413  uint32_t src_a;
1414  uint32_t dest_ia = ALPHA_8 (~d);
1415 
1416  UN8x4_MUL_UN8 (s, m);
1417 
1418  src_a = ALPHA_8 (s);
1419 
1420  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1421 
1422  dest[i] = s;
1423  }
1424 }
1425 
1426 static void
1428  pixman_op_t op,
1429  uint32_t * dest,
1430  const uint32_t * src,
1431  const uint32_t * mask,
1432  int width)
1433 {
1434  if (mask)
1436  else
1438 }
1439 
1440 static void
1442  const uint32_t *src,
1443  int width)
1444 {
1445  int i;
1446  vector unsigned int vdest, vsrc;
1448 
1449  while (width && ((uintptr_t)dest & 15))
1450  {
1451  uint32_t s = *src++;
1452  uint32_t d = *dest;
1453  uint32_t src_ia = ALPHA_8 (~s);
1454  uint32_t dest_ia = ALPHA_8 (~d);
1455 
1456  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1457 
1458  *dest++ = s;
1459  width--;
1460  }
1461 
1463 
1464  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1465  for (i = width / 4; i > 0; i--)
1466  {
1467  LOAD_VECTORS (dest, src);
1468 
1469  vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
1470  vdest, splat_alpha (negate (vsrc)));
1471 
1472  STORE_VECTOR (dest);
1473 
1474  src += 4;
1475  dest += 4;
1476  }
1477 
1478  for (i = width % 4; --i >= 0;)
1479  {
1480  uint32_t s = src[i];
1481  uint32_t d = dest[i];
1482  uint32_t src_ia = ALPHA_8 (~s);
1483  uint32_t dest_ia = ALPHA_8 (~d);
1484 
1485  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1486 
1487  dest[i] = s;
1488  }
1489 }
1490 
1491 static void
1493  const uint32_t *src,
1494  const uint32_t *mask,
1495  int width)
1496 {
1497  int i;
1498  vector unsigned int vdest, vsrc, vmask;
1501 
1502  while (width && ((uintptr_t)dest & 15))
1503  {
1504  uint32_t m = ALPHA_8 (*mask++);
1505  uint32_t s = *src++;
1506  uint32_t d = *dest;
1507  uint32_t src_ia;
1508  uint32_t dest_ia = ALPHA_8 (~d);
1509 
1510  UN8x4_MUL_UN8 (s, m);
1511 
1512  src_ia = ALPHA_8 (~s);
1513 
1514  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1515 
1516  *dest++ = s;
1517  width--;
1518  }
1519 
1521 
1522  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1523  for (i = width / 4; i > 0; i--)
1524  {
1525  LOAD_VECTORSM (dest, src, mask);
1526 
1527  vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
1528  vdest, splat_alpha (negate (vsrc)));
1529 
1530  STORE_VECTOR (dest);
1531 
1532  src += 4;
1533  dest += 4;
1534  mask += 4;
1535  }
1536 
1537  for (i = width % 4; --i >= 0;)
1538  {
1539  uint32_t m = ALPHA_8 (mask[i]);
1540  uint32_t s = src[i];
1541  uint32_t d = dest[i];
1542  uint32_t src_ia;
1543  uint32_t dest_ia = ALPHA_8 (~d);
1544 
1545  UN8x4_MUL_UN8 (s, m);
1546 
1547  src_ia = ALPHA_8 (~s);
1548 
1549  UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1550 
1551  dest[i] = s;
1552  }
1553 }
1554 
1555 static void
1557  pixman_op_t op,
1558  uint32_t * dest,
1559  const uint32_t * src,
1560  const uint32_t * mask,
1561  int width)
1562 {
1563  if (mask)
1565  else
1567 }
1568 
1569 static void
1571  const uint32_t *src,
1572  int width)
1573 {
1574  int i;
1575  vector unsigned int vdest, vsrc;
1577 
1578  while (width && ((uintptr_t)dest & 15))
1579  {
1580  uint32_t s = *src++;
1581  uint32_t d = *dest;
1582 
1583  UN8x4_ADD_UN8x4 (d, s);
1584 
1585  *dest++ = d;
1586  width--;
1587  }
1588 
1590  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1591  for (i = width / 4; i > 0; i--)
1592  {
1593  LOAD_VECTORS (dest, src);
1594 
1595  vdest = pix_add (vsrc, vdest);
1596 
1597  STORE_VECTOR (dest);
1598 
1599  src += 4;
1600  dest += 4;
1601  }
1602 
1603  for (i = width % 4; --i >= 0;)
1604  {
1605  uint32_t s = src[i];
1606  uint32_t d = dest[i];
1607 
1608  UN8x4_ADD_UN8x4 (d, s);
1609 
1610  dest[i] = d;
1611  }
1612 }
1613 
1614 static void
1616  const uint32_t *src,
1617  const uint32_t *mask,
1618  int width)
1619 {
1620  int i;
1621  vector unsigned int vdest, vsrc, vmask;
1624 
1625  while (width && ((uintptr_t)dest & 15))
1626  {
1627  uint32_t m = ALPHA_8 (*mask++);
1628  uint32_t s = *src++;
1629  uint32_t d = *dest;
1630 
1631  UN8x4_MUL_UN8 (s, m);
1632  UN8x4_ADD_UN8x4 (d, s);
1633 
1634  *dest++ = d;
1635  width--;
1636  }
1637 
1639 
1640  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1641  for (i = width / 4; i > 0; i--)
1642  {
1643  LOAD_VECTORSM (dest, src, mask);
1644 
1645  vdest = pix_add (vsrc, vdest);
1646 
1647  STORE_VECTOR (dest);
1648 
1649  src += 4;
1650  dest += 4;
1651  mask += 4;
1652  }
1653 
1654  for (i = width % 4; --i >= 0;)
1655  {
1656  uint32_t m = ALPHA_8 (mask[i]);
1657  uint32_t s = src[i];
1658  uint32_t d = dest[i];
1659 
1660  UN8x4_MUL_UN8 (s, m);
1661  UN8x4_ADD_UN8x4 (d, s);
1662 
1663  dest[i] = d;
1664  }
1665 }
1666 
1667 static void
1669  pixman_op_t op,
1670  uint32_t * dest,
1671  const uint32_t * src,
1672  const uint32_t * mask,
1673  int width)
1674 {
1675  if (mask)
1677  else
1679 }
1680 
1681 static void
1683  pixman_op_t op,
1684  uint32_t * dest,
1685  const uint32_t * src,
1686  const uint32_t * mask,
1687  int width)
1688 {
1689  int i;
1690  vector unsigned int vdest, vsrc, vmask;
1693 
1694  while (width && ((uintptr_t)dest & 15))
1695  {
1696  uint32_t a = *mask++;
1697  uint32_t s = *src++;
1698 
1699  UN8x4_MUL_UN8x4 (s, a);
1700 
1701  *dest++ = s;
1702  width--;
1703  }
1704 
1706 
1707  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1708  for (i = width / 4; i > 0; i--)
1709  {
1710  LOAD_VECTORSC (dest, src, mask);
1711 
1712  vdest = pix_multiply (vsrc, vmask);
1713 
1714  STORE_VECTOR (dest);
1715 
1716  mask += 4;
1717  src += 4;
1718  dest += 4;
1719  }
1720 
1721  for (i = width % 4; --i >= 0;)
1722  {
1723  uint32_t a = mask[i];
1724  uint32_t s = src[i];
1725 
1726  UN8x4_MUL_UN8x4 (s, a);
1727 
1728  dest[i] = s;
1729  }
1730 }
1731 
1732 static void
1734  pixman_op_t op,
1735  uint32_t * dest,
1736  const uint32_t * src,
1737  const uint32_t * mask,
1738  int width)
1739 {
1740  int i;
1741  vector unsigned int vdest, vsrc, vmask;
1744 
1745  while (width && ((uintptr_t)dest & 15))
1746  {
1747  uint32_t a = *mask++;
1748  uint32_t s = *src++;
1749  uint32_t d = *dest;
1750  uint32_t sa = ALPHA_8 (s);
1751 
1752  UN8x4_MUL_UN8x4 (s, a);
1753  UN8x4_MUL_UN8 (a, sa);
1755 
1756  *dest++ = d;
1757  width--;
1758  }
1759 
1761 
1762  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1763  for (i = width / 4; i > 0; i--)
1764  {
1765  LOAD_VECTORSC (dest, src, mask);
1766 
1767  vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
1768 
1769  STORE_VECTOR (dest);
1770 
1771  mask += 4;
1772  src += 4;
1773  dest += 4;
1774  }
1775 
1776  for (i = width % 4; --i >= 0;)
1777  {
1778  uint32_t a = mask[i];
1779  uint32_t s = src[i];
1780  uint32_t d = dest[i];
1781  uint32_t sa = ALPHA_8 (s);
1782 
1783  UN8x4_MUL_UN8x4 (s, a);
1784  UN8x4_MUL_UN8 (a, sa);
1786 
1787  dest[i] = d;
1788  }
1789 }
1790 
1791 static void
1793  pixman_op_t op,
1794  uint32_t * dest,
1795  const uint32_t * src,
1796  const uint32_t * mask,
1797  int width)
1798 {
1799  int i;
1800  vector unsigned int vdest, vsrc, vmask;
1803 
1804  while (width && ((uintptr_t)dest & 15))
1805  {
1806  uint32_t a = *mask++;
1807  uint32_t s = *src++;
1808  uint32_t d = *dest;
1809  uint32_t ida = ALPHA_8 (~d);
1810 
1811  UN8x4_MUL_UN8x4 (s, a);
1812  UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1813 
1814  *dest++ = s;
1815  width--;
1816  }
1817 
1819 
1820  /* printf("%s\n",__PRETTY_FUNCTION__); */
1821  for (i = width / 4; i > 0; i--)
1822  {
1823  LOAD_VECTORSC (dest, src, mask);
1824 
1825  vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
1826 
1827  STORE_VECTOR (dest);
1828 
1829  mask += 4;
1830  src += 4;
1831  dest += 4;
1832  }
1833 
1834  for (i = width % 4; --i >= 0;)
1835  {
1836  uint32_t a = mask[i];
1837  uint32_t s = src[i];
1838  uint32_t d = dest[i];
1839  uint32_t ida = ALPHA_8 (~d);
1840 
1841  UN8x4_MUL_UN8x4 (s, a);
1842  UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1843 
1844  dest[i] = s;
1845  }
1846 }
1847 
1848 static void
1850  pixman_op_t op,
1851  uint32_t * dest,
1852  const uint32_t * src,
1853  const uint32_t * mask,
1854  int width)
1855 {
1856  int i;
1857  vector unsigned int vdest, vsrc, vmask;
1860 
1861  while (width && ((uintptr_t)dest & 15))
1862  {
1863  uint32_t a = *mask++;
1864  uint32_t s = *src++;
1865  uint32_t da = ALPHA_8 (*dest);
1866 
1867  UN8x4_MUL_UN8x4 (s, a);
1868  UN8x4_MUL_UN8 (s, da);
1869 
1870  *dest++ = s;
1871  width--;
1872  }
1873 
1875 
1876  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1877  for (i = width / 4; i > 0; i--)
1878  {
1879  LOAD_VECTORSC (dest, src, mask);
1880 
1881  vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
1882 
1883  STORE_VECTOR (dest);
1884 
1885  src += 4;
1886  dest += 4;
1887  mask += 4;
1888  }
1889 
1890  for (i = width % 4; --i >= 0;)
1891  {
1892  uint32_t a = mask[i];
1893  uint32_t s = src[i];
1894  uint32_t da = ALPHA_8 (dest[i]);
1895 
1896  UN8x4_MUL_UN8x4 (s, a);
1897  UN8x4_MUL_UN8 (s, da);
1898 
1899  dest[i] = s;
1900  }
1901 }
1902 
1903 static void
1905  pixman_op_t op,
1906  uint32_t * dest,
1907  const uint32_t * src,
1908  const uint32_t * mask,
1909  int width)
1910 {
1911  int i;
1912  vector unsigned int vdest, vsrc, vmask;
1915 
1916  while (width && ((uintptr_t)dest & 15))
1917  {
1918  uint32_t a = *mask++;
1919  uint32_t d = *dest;
1920  uint32_t sa = ALPHA_8 (*src++);
1921 
1922  UN8x4_MUL_UN8 (a, sa);
1923  UN8x4_MUL_UN8x4 (d, a);
1924 
1925  *dest++ = d;
1926  width--;
1927  }
1928 
1930 
1931  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1932  for (i = width / 4; i > 0; i--)
1933  {
1934 
1935  LOAD_VECTORSC (dest, src, mask);
1936 
1937  vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
1938 
1939  STORE_VECTOR (dest);
1940 
1941  src += 4;
1942  dest += 4;
1943  mask += 4;
1944  }
1945 
1946  for (i = width % 4; --i >= 0;)
1947  {
1948  uint32_t a = mask[i];
1949  uint32_t d = dest[i];
1950  uint32_t sa = ALPHA_8 (src[i]);
1951 
1952  UN8x4_MUL_UN8 (a, sa);
1953  UN8x4_MUL_UN8x4 (d, a);
1954 
1955  dest[i] = d;
1956  }
1957 }
1958 
1959 static void
1961  pixman_op_t op,
1962  uint32_t * dest,
1963  const uint32_t * src,
1964  const uint32_t * mask,
1965  int width)
1966 {
1967  int i;
1968  vector unsigned int vdest, vsrc, vmask;
1971 
1972  while (width && ((uintptr_t)dest & 15))
1973  {
1974  uint32_t a = *mask++;
1975  uint32_t s = *src++;
1976  uint32_t d = *dest;
1977  uint32_t da = ALPHA_8 (~d);
1978 
1979  UN8x4_MUL_UN8x4 (s, a);
1980  UN8x4_MUL_UN8 (s, da);
1981 
1982  *dest++ = s;
1983  width--;
1984  }
1985 
1987 
1988  /* printf ("%s\n",__PRETTY_FUNCTION__); */
1989  for (i = width / 4; i > 0; i--)
1990  {
1991  LOAD_VECTORSC (dest, src, mask);
1992 
1993  vdest = pix_multiply (
1994  pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
1995 
1996  STORE_VECTOR (dest);
1997 
1998  src += 4;
1999  dest += 4;
2000  mask += 4;
2001  }
2002 
2003  for (i = width % 4; --i >= 0;)
2004  {
2005  uint32_t a = mask[i];
2006  uint32_t s = src[i];
2007  uint32_t d = dest[i];
2008  uint32_t da = ALPHA_8 (~d);
2009 
2010  UN8x4_MUL_UN8x4 (s, a);
2011  UN8x4_MUL_UN8 (s, da);
2012 
2013  dest[i] = s;
2014  }
2015 }
2016 
2017 static void
2019  pixman_op_t op,
2020  uint32_t * dest,
2021  const uint32_t * src,
2022  const uint32_t * mask,
2023  int width)
2024 {
2025  int i;
2026  vector unsigned int vdest, vsrc, vmask;
2029 
2030  while (width && ((uintptr_t)dest & 15))
2031  {
2032  uint32_t a = *mask++;
2033  uint32_t s = *src++;
2034  uint32_t d = *dest;
2035  uint32_t sa = ALPHA_8 (s);
2036 
2037  UN8x4_MUL_UN8 (a, sa);
2038  UN8x4_MUL_UN8x4 (d, ~a);
2039 
2040  *dest++ = d;
2041  width--;
2042  }
2043 
2045 
2046  /* printf ("%s\n",__PRETTY_FUNCTION__); */
2047  for (i = width / 4; i > 0; i--)
2048  {
2049  LOAD_VECTORSC (dest, src, mask);
2050 
2051  vdest = pix_multiply (
2052  vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
2053 
2054  STORE_VECTOR (dest);
2055 
2056  src += 4;
2057  dest += 4;
2058  mask += 4;
2059  }
2060 
2061  for (i = width % 4; --i >= 0;)
2062  {
2063  uint32_t a = mask[i];
2064  uint32_t s = src[i];
2065  uint32_t d = dest[i];
2066  uint32_t sa = ALPHA_8 (s);
2067 
2068  UN8x4_MUL_UN8 (a, sa);
2069  UN8x4_MUL_UN8x4 (d, ~a);
2070 
2071  dest[i] = d;
2072  }
2073 }
2074 
2075 static void
2077  pixman_op_t op,
2078  uint32_t * dest,
2079  const uint32_t * src,
2080  const uint32_t * mask,
2081  int width)
2082 {
2083  int i;
2084  vector unsigned int vdest, vsrc, vmask, vsrca;
2087 
2088  while (width && ((uintptr_t)dest & 15))
2089  {
2090  uint32_t a = *mask++;
2091  uint32_t s = *src++;
2092  uint32_t d = *dest;
2093  uint32_t sa = ALPHA_8 (s);
2094  uint32_t da = ALPHA_8 (d);
2095 
2096  UN8x4_MUL_UN8x4 (s, a);
2097  UN8x4_MUL_UN8 (a, sa);
2099 
2100  *dest++ = d;
2101  width--;
2102  }
2103 
2105 
2106  /* printf ("%s\n",__PRETTY_FUNCTION__); */
2107  for (i = width / 4; i > 0; i--)
2108  {
2109  LOAD_VECTORSC (dest, src, mask);
2110 
2111  vsrca = splat_alpha (vsrc);
2112 
2113  vsrc = pix_multiply (vsrc, vmask);
2114  vmask = pix_multiply (vmask, vsrca);
2115 
2116  vdest = pix_add_mul (vsrc, splat_alpha (vdest),
2117  negate (vmask), vdest);
2118 
2119  STORE_VECTOR (dest);
2120 
2121  src += 4;
2122  dest += 4;
2123  mask += 4;
2124  }
2125 
2126  for (i = width % 4; --i >= 0;)
2127  {
2128  uint32_t a = mask[i];
2129  uint32_t s = src[i];
2130  uint32_t d = dest[i];
2131  uint32_t sa = ALPHA_8 (s);
2132  uint32_t da = ALPHA_8 (d);
2133 
2134  UN8x4_MUL_UN8x4 (s, a);
2135  UN8x4_MUL_UN8 (a, sa);
2137 
2138  dest[i] = d;
2139  }
2140 }
2141 
2142 static void
2144  pixman_op_t op,
2145  uint32_t * dest,
2146  const uint32_t * src,
2147  const uint32_t * mask,
2148  int width)
2149 {
2150  int i;
2151  vector unsigned int vdest, vsrc, vmask;
2154 
2155  while (width && ((uintptr_t)dest & 15))
2156  {
2157  uint32_t a = *mask++;
2158  uint32_t s = *src++;
2159  uint32_t d = *dest;
2160  uint32_t sa = ALPHA_8 (s);
2161  uint32_t da = ALPHA_8 (~d);
2162 
2163  UN8x4_MUL_UN8x4 (s, a);
2164  UN8x4_MUL_UN8 (a, sa);
2166 
2167  *dest++ = d;
2168  width--;
2169  }
2170 
2172 
2173  /* printf ("%s\n",__PRETTY_FUNCTION__); */
2174  for (i = width / 4; i > 0; i--)
2175  {
2176  LOAD_VECTORSC (dest, src, mask);
2177 
2178  vdest = pix_add_mul (vdest,
2179  pix_multiply (vmask, splat_alpha (vsrc)),
2180  pix_multiply (vsrc, vmask),
2181  negate (splat_alpha (vdest)));
2182 
2183  STORE_VECTOR (dest);
2184 
2185  src += 4;
2186  dest += 4;
2187  mask += 4;
2188  }
2189 
2190  for (i = width % 4; --i >= 0;)
2191  {
2192  uint32_t a = mask[i];
2193  uint32_t s = src[i];
2194  uint32_t d = dest[i];
2195  uint32_t sa = ALPHA_8 (s);
2196  uint32_t da = ALPHA_8 (~d);
2197 
2198  UN8x4_MUL_UN8x4 (s, a);
2199  UN8x4_MUL_UN8 (a, sa);
2201 
2202  dest[i] = d;
2203  }
2204 }
2205 
2206 static void
2208  pixman_op_t op,
2209  uint32_t * dest,
2210  const uint32_t * src,
2211  const uint32_t * mask,
2212  int width)
2213 {
2214  int i;
2215  vector unsigned int vdest, vsrc, vmask;
2218 
2219  while (width && ((uintptr_t)dest & 15))
2220  {
2221  uint32_t a = *mask++;
2222  uint32_t s = *src++;
2223  uint32_t d = *dest;
2224  uint32_t sa = ALPHA_8 (s);
2225  uint32_t da = ALPHA_8 (~d);
2226 
2227  UN8x4_MUL_UN8x4 (s, a);
2228  UN8x4_MUL_UN8 (a, sa);
2230 
2231  *dest++ = d;
2232  width--;
2233  }
2234 
2236 
2237  /* printf ("%s\n",__PRETTY_FUNCTION__); */
2238  for (i = width / 4; i > 0; i--)
2239  {
2240  LOAD_VECTORSC (dest, src, mask);
2241 
2242  vdest = pix_add_mul (vdest,
2243  negate (pix_multiply (vmask, splat_alpha (vsrc))),
2244  pix_multiply (vsrc, vmask),
2245  negate (splat_alpha (vdest)));
2246 
2247  STORE_VECTOR (dest);
2248 
2249  src += 4;
2250  dest += 4;
2251  mask += 4;
2252  }
2253 
2254  for (i = width % 4; --i >= 0;)
2255  {
2256  uint32_t a = mask[i];
2257  uint32_t s = src[i];
2258  uint32_t d = dest[i];
2259  uint32_t sa = ALPHA_8 (s);
2260  uint32_t da = ALPHA_8 (~d);
2261 
2262  UN8x4_MUL_UN8x4 (s, a);
2263  UN8x4_MUL_UN8 (a, sa);
2265 
2266  dest[i] = d;
2267  }
2268 }
2269 
2270 static void
2272  pixman_op_t op,
2273  uint32_t * dest,
2274  const uint32_t * src,
2275  const uint32_t * mask,
2276  int width)
2277 {
2278  int i;
2279  vector unsigned int vdest, vsrc, vmask;
2282 
2283  while (width && ((uintptr_t)dest & 15))
2284  {
2285  uint32_t a = *mask++;
2286  uint32_t s = *src++;
2287  uint32_t d = *dest;
2288 
2289  UN8x4_MUL_UN8x4 (s, a);
2290  UN8x4_ADD_UN8x4 (s, d);
2291 
2292  *dest++ = s;
2293  width--;
2294  }
2295 
2297 
2298  /* printf ("%s\n",__PRETTY_FUNCTION__); */
2299  for (i = width / 4; i > 0; i--)
2300  {
2301  LOAD_VECTORSC (dest, src, mask);
2302 
2303  vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
2304 
2305  STORE_VECTOR (dest);
2306 
2307  src += 4;
2308  dest += 4;
2309  mask += 4;
2310  }
2311 
2312  for (i = width % 4; --i >= 0;)
2313  {
2314  uint32_t a = mask[i];
2315  uint32_t s = src[i];
2316  uint32_t d = dest[i];
2317 
2318  UN8x4_MUL_UN8x4 (s, a);
2319  UN8x4_ADD_UN8x4 (s, d);
2320 
2321  dest[i] = s;
2322  }
2323 }
2324 
2325 static void
2328 {
2330  uint32_t src, srca;
2331  uint32_t *dst_line, *dst;
2332  uint8_t *mask_line;
2333  int dst_stride, mask_stride;
2334  int32_t w;
2335  uint32_t m, d, s, ia;
2336 
2337  vector unsigned int vsrc, valpha, vmask, vdst;
2338 
2339  src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2340 
2341  srca = ALPHA_8(src);
2342  if (src == 0)
2343  return;
2344 
2346  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2348  mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
2349 
2350  vsrc = (vector unsigned int) {src, src, src, src};
2351  valpha = splat_alpha(vsrc);
2352 
2353  while (height--)
2354  {
2355  const uint8_t *pm = mask_line;
2356  dst = dst_line;
2357  dst_line += dst_stride;
2358  mask_line += mask_stride;
2359  w = width;
2360 
2361  while (w && (uintptr_t)dst & 15)
2362  {
2363  s = src;
2364  m = *pm++;
2365 
2366  if (m)
2367  {
2368  d = *dst;
2369  UN8x4_MUL_UN8 (s, m);
2370  ia = ALPHA_8 (~s);
2371  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
2372  *dst = d;
2373  }
2374 
2375  w--;
2376  dst++;
2377  }
2378 
2379  while (w >= 4)
2380  {
2381  m = *((uint32_t*)pm);
2382 
2383  if (srca == 0xff && m == 0xffffffff)
2384  {
2385  save_128_aligned(dst, vsrc);
2386  }
2387  else if (m)
2388  {
2389  vmask = splat_pixel((vector unsigned int) {m, m, m, m});
2390 
2391  /* dst is 16-byte aligned */
2392  vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst));
2393 
2394  save_128_aligned(dst, vdst);
2395  }
2396 
2397  w -= 4;
2398  dst += 4;
2399  pm += 4;
2400  }
2401 
2402  while (w)
2403  {
2404  s = src;
2405  m = *pm++;
2406 
2407  if (m)
2408  {
2409  d = *dst;
2410  UN8x4_MUL_UN8 (s, m);
2411  ia = ALPHA_8 (~s);
2412  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
2413  *dst = d;
2414  }
2415 
2416  w--;
2417  dst++;
2418  }
2419  }
2420 
2421 }
2422 
2423 static pixman_bool_t
2425  uint32_t * bits,
2426  int stride,
2427  int bpp,
2428  int x,
2429  int y,
2430  int width,
2431  int height,
2432  uint32_t filler)
2433 {
2434  uint32_t byte_width;
2435  uint8_t *byte_line;
2436 
2437  vector unsigned int vfiller;
2438 
2439  if (bpp == 8)
2440  {
2441  uint8_t b;
2442  uint16_t w;
2443 
2444  stride = stride * (int) sizeof (uint32_t) / 1;
2445  byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
2446  byte_width = width;
2447  stride *= 1;
2448 
2449  b = filler & 0xff;
2450  w = (b << 8) | b;
2451  filler = (w << 16) | w;
2452  }
2453  else if (bpp == 16)
2454  {
2455  stride = stride * (int) sizeof (uint32_t) / 2;
2456  byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
2457  byte_width = 2 * width;
2458  stride *= 2;
2459 
2460  filler = (filler & 0xffff) * 0x00010001;
2461  }
2462  else if (bpp == 32)
2463  {
2464  stride = stride * (int) sizeof (uint32_t) / 4;
2465  byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
2466  byte_width = 4 * width;
2467  stride *= 4;
2468  }
2469  else
2470  {
2471  return FALSE;
2472  }
2473 
2474  vfiller = create_mask_1x32_128(&filler);
2475 
2476  while (height--)
2477  {
2478  int w;
2479  uint8_t *d = byte_line;
2480  byte_line += stride;
2481  w = byte_width;
2482 
2483  if (w >= 1 && ((uintptr_t)d & 1))
2484  {
2485  *(uint8_t *)d = filler;
2486  w -= 1;
2487  d += 1;
2488  }
2489 
2490  while (w >= 2 && ((uintptr_t)d & 3))
2491  {
2492  *(uint16_t *)d = filler;
2493  w -= 2;
2494  d += 2;
2495  }
2496 
2497  while (w >= 4 && ((uintptr_t)d & 15))
2498  {
2499  *(uint32_t *)d = filler;
2500 
2501  w -= 4;
2502  d += 4;
2503  }
2504 
2505  while (w >= 128)
2506  {
2507  vec_st(vfiller, 0, (uint32_t *) d);
2508  vec_st(vfiller, 0, (uint32_t *) d + 4);
2509  vec_st(vfiller, 0, (uint32_t *) d + 8);
2510  vec_st(vfiller, 0, (uint32_t *) d + 12);
2511  vec_st(vfiller, 0, (uint32_t *) d + 16);
2512  vec_st(vfiller, 0, (uint32_t *) d + 20);
2513  vec_st(vfiller, 0, (uint32_t *) d + 24);
2514  vec_st(vfiller, 0, (uint32_t *) d + 28);
2515 
2516  d += 128;
2517  w -= 128;
2518  }
2519 
2520  if (w >= 64)
2521  {
2522  vec_st(vfiller, 0, (uint32_t *) d);
2523  vec_st(vfiller, 0, (uint32_t *) d + 4);
2524  vec_st(vfiller, 0, (uint32_t *) d + 8);
2525  vec_st(vfiller, 0, (uint32_t *) d + 12);
2526 
2527  d += 64;
2528  w -= 64;
2529  }
2530 
2531  if (w >= 32)
2532  {
2533  vec_st(vfiller, 0, (uint32_t *) d);
2534  vec_st(vfiller, 0, (uint32_t *) d + 4);
2535 
2536  d += 32;
2537  w -= 32;
2538  }
2539 
2540  if (w >= 16)
2541  {
2542  vec_st(vfiller, 0, (uint32_t *) d);
2543 
2544  d += 16;
2545  w -= 16;
2546  }
2547 
2548  while (w >= 4)
2549  {
2550  *(uint32_t *)d = filler;
2551 
2552  w -= 4;
2553  d += 4;
2554  }
2555 
2556  if (w >= 2)
2557  {
2558  *(uint16_t *)d = filler;
2559  w -= 2;
2560  d += 2;
2561  }
2562 
2563  if (w >= 1)
2564  {
2565  *(uint8_t *)d = filler;
2566  w -= 1;
2567  d += 1;
2568  }
2569  }
2570 
2571  return TRUE;
2572 }
2573 
2574 static void
2577 {
2579  uint32_t *dst_line, *dst;
2580  uint32_t *src_line, *src;
2581  int32_t w;
2582  int dst_stride, src_stride;
2583 
2585  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2587  src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2588 
2589  while (height--)
2590  {
2591  dst = dst_line;
2592  dst_line += dst_stride;
2593  src = src_line;
2594  src_line += src_stride;
2595  w = width;
2596 
2597  while (w && (uintptr_t)dst & 15)
2598  {
2599  *dst++ = *src++ | 0xff000000;
2600  w--;
2601  }
2602 
2603  while (w >= 16)
2604  {
2605  vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
2606 
2607  vmx_src1 = load_128_unaligned (src);
2608  vmx_src2 = load_128_unaligned (src + 4);
2609  vmx_src3 = load_128_unaligned (src + 8);
2610  vmx_src4 = load_128_unaligned (src + 12);
2611 
2612  save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
2613  save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
2614  save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
2615  save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
2616 
2617  dst += 16;
2618  src += 16;
2619  w -= 16;
2620  }
2621 
2622  while (w)
2623  {
2624  *dst++ = *src++ | 0xff000000;
2625  w--;
2626  }
2627  }
2628 }
2629 
2630 static void
2633 {
2635  uint32_t *dst_line, *dst;
2636  uint32_t src, ia;
2637  int i, w, dst_stride;
2638  vector unsigned int vdst, vsrc, via;
2639 
2640  src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2641 
2642  if (src == 0)
2643  return;
2644 
2646  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2647 
2648  vsrc = (vector unsigned int){src, src, src, src};
2649  via = negate (splat_alpha (vsrc));
2650  ia = ALPHA_8 (~src);
2651 
2652  while (height--)
2653  {
2654  dst = dst_line;
2655  dst_line += dst_stride;
2656  w = width;
2657 
2658  while (w && ((uintptr_t)dst & 15))
2659  {
2660  uint32_t d = *dst;
2661  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
2662  *dst++ = d;
2663  w--;
2664  }
2665 
2666  for (i = w / 4; i > 0; i--)
2667  {
2668  vdst = pix_multiply (load_128_aligned (dst), via);
2669  save_128_aligned (dst, pix_add (vsrc, vdst));
2670  dst += 4;
2671  }
2672 
2673  for (i = w % 4; --i >= 0;)
2674  {
2675  uint32_t d = dst[i];
2676  UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
2677  dst[i] = d;
2678  }
2679  }
2680 }
2681 
2682 static void
2685 {
2687  int dst_stride, src_stride;
2688  uint32_t *dst_line, *dst;
2689  uint32_t *src_line, *src;
2690 
2692  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2694  src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2695 
2696  dst = dst_line;
2697  src = src_line;
2698 
2699  while (height--)
2700  {
2701  vmx_combine_over_u (imp, op, dst, src, NULL, width);
2702 
2703  dst += dst_stride;
2704  src += src_stride;
2705  }
2706 }
2707 
2708 static void
2711 {
2713  uint32_t src, ia;
2714  uint32_t *dst_line, d;
2715  uint32_t *mask_line, m;
2716  uint32_t pack_cmp;
2717  int dst_stride, mask_stride;
2718 
2719  vector unsigned int vsrc, valpha, vmask, vdest;
2720 
2721  src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2722 
2723  if (src == 0)
2724  return;
2725 
2727  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2729  mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
2730 
2731  vsrc = (vector unsigned int) {src, src, src, src};
2732  valpha = splat_alpha(vsrc);
2733  ia = ALPHA_8 (src);
2734 
2735  while (height--)
2736  {
2737  int w = width;
2738  const uint32_t *pm = (uint32_t *)mask_line;
2739  uint32_t *pd = (uint32_t *)dst_line;
2740  uint32_t s;
2741 
2742  dst_line += dst_stride;
2743  mask_line += mask_stride;
2744 
2745  while (w && (uintptr_t)pd & 15)
2746  {
2747  s = src;
2748  m = *pm++;
2749 
2750  if (m)
2751  {
2752  d = *pd;
2753  UN8x4_MUL_UN8x4 (s, m);
2754  UN8x4_MUL_UN8 (m, ia);
2755  m = ~~m;
2757  *pd = d;
2758  }
2759 
2760  pd++;
2761  w--;
2762  }
2763 
2764  while (w >= 4)
2765  {
2766  /* pm is NOT necessarily 16-byte aligned */
2767  vmask = load_128_unaligned (pm);
2768 
2769  pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0));
2770 
2771  /* if all bits in mask are zero, pack_cmp is not 0 */
2772  if (pack_cmp == 0)
2773  {
2774  /* pd is 16-byte aligned */
2775  vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd));
2776 
2777  save_128_aligned(pd, vdest);
2778  }
2779 
2780  pd += 4;
2781  pm += 4;
2782  w -= 4;
2783  }
2784 
2785  while (w)
2786  {
2787  s = src;
2788  m = *pm++;
2789 
2790  if (m)
2791  {
2792  d = *pd;
2793  UN8x4_MUL_UN8x4 (s, m);
2794  UN8x4_MUL_UN8 (m, ia);
2795  m = ~~m;
2797  *pd = d;
2798  }
2799 
2800  pd++;
2801  w--;
2802  }
2803  }
2804 }
2805 
2806 static void
2809 {
2811  uint8_t *dst_line, *dst;
2812  uint8_t *src_line, *src;
2813  int dst_stride, src_stride;
2814  int32_t w;
2815  uint16_t t;
2816 
2818  src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
2820  dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2821 
2822  while (height--)
2823  {
2824  dst = dst_line;
2825  src = src_line;
2826 
2827  dst_line += dst_stride;
2828  src_line += src_stride;
2829  w = width;
2830 
2831  /* Small head */
2832  while (w && (uintptr_t)dst & 3)
2833  {
2834  t = (*dst) + (*src++);
2835  *dst++ = t | (0 - (t >> 8));
2836  w--;
2837  }
2838 
2839  vmx_combine_add_u (imp, op,
2840  (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
2841 
2842  /* Small tail */
2843  dst += w & 0xfffc;
2844  src += w & 0xfffc;
2845 
2846  w &= 3;
2847 
2848  while (w)
2849  {
2850  t = (*dst) + (*src++);
2851  *dst++ = t | (0 - (t >> 8));
2852  w--;
2853  }
2854  }
2855 }
2856 
2857 static void
2860 {
2862  uint32_t *dst_line, *dst;
2863  uint32_t *src_line, *src;
2864  int dst_stride, src_stride;
2865 
2867  src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2869  dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2870 
2871  while (height--)
2872  {
2873  dst = dst_line;
2874  dst_line += dst_stride;
2875  src = src_line;
2876  src_line += src_stride;
2877 
2878  vmx_combine_add_u (imp, op, dst, src, NULL, width);
2879  }
2880 }
2881 
2882 static force_inline void
2884  const uint32_t* ps,
2885  int32_t w,
2886  pixman_fixed_t vx,
2887  pixman_fixed_t unit_x,
2888  pixman_fixed_t src_width_fixed,
2889  pixman_bool_t fully_transparent_src)
2890 {
2891  uint32_t s, d;
2892  const uint32_t* pm = NULL;
2893 
2894  vector unsigned int vsrc, vdst;
2895 
2896  if (fully_transparent_src)
2897  return;
2898 
2899  /* Align dst on a 16-byte boundary */
2900  while (w && ((uintptr_t)pd & 15))
2901  {
2902  d = *pd;
2903  s = combine1 (ps + pixman_fixed_to_int (vx), pm);
2904  vx += unit_x;
2905  while (vx >= 0)
2906  vx -= src_width_fixed;
2907 
2909  if (pm)
2910  pm++;
2911  w--;
2912  }
2913 
2914  while (w >= 4)
2915  {
2916  vector unsigned int tmp;
2917  uint32_t tmp1, tmp2, tmp3, tmp4;
2918 
2919  tmp1 = *(ps + pixman_fixed_to_int (vx));
2920  vx += unit_x;
2921  while (vx >= 0)
2922  vx -= src_width_fixed;
2923  tmp2 = *(ps + pixman_fixed_to_int (vx));
2924  vx += unit_x;
2925  while (vx >= 0)
2926  vx -= src_width_fixed;
2927  tmp3 = *(ps + pixman_fixed_to_int (vx));
2928  vx += unit_x;
2929  while (vx >= 0)
2930  vx -= src_width_fixed;
2931  tmp4 = *(ps + pixman_fixed_to_int (vx));
2932  vx += unit_x;
2933  while (vx >= 0)
2934  vx -= src_width_fixed;
2935 
2936  tmp[0] = tmp1;
2937  tmp[1] = tmp2;
2938  tmp[2] = tmp3;
2939  tmp[3] = tmp4;
2940 
2941  vsrc = combine4 ((const uint32_t *) &tmp, pm);
2942 
2943  if (is_opaque (vsrc))
2944  {
2945  save_128_aligned (pd, vsrc);
2946  }
2947  else if (!is_zero (vsrc))
2948  {
2949  vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd));
2950 
2951  save_128_aligned (pd, vdst);
2952  }
2953 
2954  w -= 4;
2955  pd += 4;
2956  if (pm)
2957  pm += 4;
2958  }
2959 
2960  while (w)
2961  {
2962  d = *pd;
2963  s = combine1 (ps + pixman_fixed_to_int (vx), pm);
2964  vx += unit_x;
2965  while (vx >= 0)
2966  vx -= src_width_fixed;
2967 
2969  if (pm)
2970  pm++;
2971 
2972  w--;
2973  }
2974 }
2975 
2976 FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,
2979 FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,
2982 FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,
2985 FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
2987  uint32_t, uint32_t, NORMAL)
2988 
2990 {
2991  PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, vmx_composite_over_n_8888),
2992  PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, vmx_composite_over_n_8888),
2993  PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
2994  PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
2995  PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
2996  PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
2997  PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888),
2998  PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888),
2999  PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888),
3000  PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888),
3001  PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),
3002  PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
3003  PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
3004  PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
3005 
3006  /* PIXMAN_OP_ADD */
3008  PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
3009  PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
3010 
3011  /* PIXMAN_OP_SRC */
3012  PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
3013  PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
3014 
3015  SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
3016  SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
3017  SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),
3018  SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),
3019 
3020  { PIXMAN_OP_NONE },
3021 };
3022 
3023 static uint32_t *
3025 {
3026  int w = iter->width;
3027  vector unsigned int ff000000 = mask_ff000000;
3028  uint32_t *dst = iter->buffer;
3029  uint32_t *src = (uint32_t *)iter->bits;
3030 
3031  iter->bits += iter->stride;
3032 
3033  while (w && ((uintptr_t)dst) & 0x0f)
3034  {
3035  *dst++ = (*src++) | 0xff000000;
3036  w--;
3037  }
3038 
3039  while (w >= 4)
3040  {
3041  save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));
3042 
3043  dst += 4;
3044  src += 4;
3045  w -= 4;
3046  }
3047 
3048  while (w)
3049  {
3050  *dst++ = (*src++) | 0xff000000;
3051  w--;
3052  }
3053 
3054  return iter->buffer;
3055 }
3056 
3057 static uint32_t *
3059 {
3060  int w = iter->width;
3061  uint32_t *dst = iter->buffer;
3062  uint8_t *src = iter->bits;
3063  vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
3064 
3065  iter->bits += iter->stride;
3066 
3067  while (w && (((uintptr_t)dst) & 15))
3068  {
3069  *dst++ = *(src++) << 24;
3070  w--;
3071  }
3072 
3073  while (w >= 16)
3074  {
3075  vmx0 = load_128_unaligned((uint32_t *) src);
3076 
3077  unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
3078  unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
3079  unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
3080 
3081  save_128_aligned(dst, vmx6);
3082  save_128_aligned((dst + 4), vmx5);
3083  save_128_aligned((dst + 8), vmx4);
3084  save_128_aligned((dst + 12), vmx3);
3085 
3086  dst += 16;
3087  src += 16;
3088  w -= 16;
3089  }
3090 
3091  while (w)
3092  {
3093  *dst++ = *(src++) << 24;
3094  w--;
3095  }
3096 
3097  return iter->buffer;
3098 }
3099 
3100 #define IMAGE_FLAGS \
3101  (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
3102  FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
3103 
3105 {
3108  },
3111  },
3112  { PIXMAN_null },
3113 };
3114 
3117 {
3119 
3120  /* VMX constants */
3121  mask_ff000000 = create_mask_32_128 (0xff000000);
3122  mask_red = create_mask_32_128 (0x00f80000);
3123  mask_green = create_mask_32_128 (0x0000fc00);
3124  mask_blue = create_mask_32_128 (0x000000f8);
3125  mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
3126  mask_565_fix_g = create_mask_32_128 (0x0000c000);
3127 
3128  /* Set up function pointers */
3129 
3139 
3141 
3153 
3154  imp->fill = vmx_fill;
3155 
3156  imp->iter_info = vmx_iters;
3157 
3158  return imp;
3159 }
#define WORDS_BIGENDIAN
Definition: amigaconfig.h:20
#define width(a)
Definition: aptex-macros.h:198
#define height(a)
Definition: aptex-macros.h:200
double mod(double, int)
Definition: axohelp.c:326
#define static
static struct brw_reg stride(struct brw_reg reg, uint32_t vstride, uint32_t width, uint32_t hstride)
#define b
Definition: jpegint.h:372
@ FALSE
Definition: dd.h:101
@ TRUE
Definition: dd.h:102
int w
Definition: dviconv.c:26
#define info
Definition: dviinfo.c:42
struct rect data
Definition: dvipdfm.c:64
#define s
Definition: afcover.h:80
#define t
Definition: afcover.h:96
#define a(n)
Definition: gpos-common.c:148
#define d(n)
Definition: gpos-common.c:151
pix
Definition: in_pcx.cpp:383
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
#define const
Definition: ftzconf.h:91
#define bits
Definition: infblock.c:15
kerning y
Definition: ttdriver.c:212
unsigned short uint16_t
Definition: stdint.h:79
unsigned int uint32_t
Definition: stdint.h:80
unsigned int uintptr_t
Definition: stdint.h:119
signed int int32_t
Definition: stdint.h:77
unsigned char uint8_t
Definition: stdint.h:78
#define MASK(pass, depth, display, png)
static pdf_obj * tmp1
Definition: pdfdoc.c:76
#define dest
float x
Definition: cordic.py:15
static bool ps
Definition: pdftocairo.cc:91
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld SRC[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld_src SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp
#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y)
#define UN8x4_MUL_UN8x4(x, a)
#define ALPHA_8(x)
#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y)
#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b)
#define UN8x4_MUL_UN8(x, a)
#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b)
#define UN8x4_ADD_UN8x4(x, y)
#define force_inline
uint32_t _pixman_image_get_solid(pixman_implementation_t *imp, pixman_image_t *image, pixman_format_code_t format)
Definition: pixman-image.c:946
pixman_implementation_t * _pixman_implementation_create(pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths)
#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, repeat_mode)
#define SIMPLE_NEAREST_FAST_PATH(op, s, d, func)
#define t1
#define t2
void _pixman_iter_init_bits_stride(pixman_iter_t *iter, const pixman_iter_info_t *info)
Definition: pixman-utils.c:227
#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func)
@ ITER_NARROW
#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func)
#define PIXMAN_null
#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul)
#define PIXMAN_COMPOSITE_ARGS(info)
static void unpack_128_2x128_16(vector unsigned int data1, vector unsigned int data2, vector unsigned int *data_lo, vector unsigned int *data_hi)
Definition: pixman-vmx.c:379
static void vmx_combine_in_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:772
static void vmx_combine_out_reverse_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1114
static void vmx_combine_atop_reverse_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1427
static void vmx_combine_xor_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:1441
#define STORE_VECTOR(dest)
Definition: pixman-vmx.c:250
static vector unsigned int load_128_unaligned(const uint32_t *src)
Definition: pixman-vmx.c:262
static int is_zero(vector unsigned int x)
Definition: pixman-vmx.c:419
static vector unsigned int negate(vector unsigned int src)
Definition: pixman-vmx.c:154
static void vmx_composite_add_8888_8888(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2858
static void vmx_combine_out_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1960
static void vmx_combine_over_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1733
static void vmx_combine_in_reverse_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:839
static void vmx_combine_add_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1668
static void vmx_combine_over_reverse_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:658
static vector unsigned int create_mask_32_128(uint32_t mask)
Definition: pixman-vmx.c:293
static vector unsigned int mask_blue
Definition: pixman-vmx.c:41
static void vmx_combine_over_reverse_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:714
#define COMPUTE_SHIFT_MASKS(dest, source)
Definition: pixman-vmx.c:225
static void vmx_combine_out_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1000
static void vmx_combine_out_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1053
static uint32_t combine1(const uint32_t *ps, const uint32_t *pm)
Definition: pixman-vmx.c:457
static void vmx_combine_atop_reverse_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:1312
static vector unsigned int pix_add_mul(vector unsigned int x, vector unsigned int a, vector unsigned int y, vector unsigned int b)
Definition: pixman-vmx.c:140
#define COMPUTE_SHIFT_MASK(source)
Definition: pixman-vmx.c:223
static void vmx_combine_add_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:1570
#define COMPUTE_SHIFT_MASKC(dest, source, mask)
Definition: pixman-vmx.c:227
static vector unsigned int unpack_565_to_8888(vector unsigned int lo)
Definition: pixman-vmx.c:387
static vector unsigned int mask_red
Definition: pixman-vmx.c:39
static vector unsigned int mask_565_fix_rb
Definition: pixman-vmx.c:42
static void vmx_composite_over_n_8888(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2631
static void save_128_aligned(uint32_t *data, vector unsigned int vdata)
Definition: pixman-vmx.c:275
static const pixman_fast_path_t vmx_fast_paths[]
Definition: pixman-vmx.c:2989
#define IMAGE_FLAGS
Definition: pixman-vmx.c:3100
static vector unsigned int unpackhi_128_8x16(vector unsigned int data1, vector unsigned int data2)
Definition: pixman-vmx.c:353
static pixman_bool_t vmx_fill(pixman_implementation_t *imp, uint32_t *bits, int stride, int bpp, int x, int y, int width, int height, uint32_t filler)
Definition: pixman-vmx.c:2424
static void vmx_combine_out_reverse_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1169
static vector unsigned int mask_green
Definition: pixman-vmx.c:40
#define LOAD_VECTORSM(dest, source, mask)
Definition: pixman-vmx.c:245
static void unpack_128_2x128(vector unsigned int data1, vector unsigned int data2, vector unsigned int *data_lo, vector unsigned int *data_hi)
Definition: pixman-vmx.c:371
static vector unsigned int unpacklo_128_16x8(vector unsigned int data1, vector unsigned int data2)
Definition: pixman-vmx.c:299
static void vmx_composite_over_n_8888_8888_ca(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2709
static void scaled_nearest_scanline_vmx_8888_8888_OVER(uint32_t *pd, const uint32_t *ps, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t src_width_fixed, pixman_bool_t fully_transparent_src)
Definition: pixman-vmx.c:2883
static void vmx_composite_over_n_8_8888(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2326
static vector unsigned int unpacklo_128_8x16(vector unsigned int data1, vector unsigned int data2)
Definition: pixman-vmx.c:335
static void vmx_combine_atop_reverse_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1363
static vector unsigned int load_128_aligned(const uint32_t *src)
Definition: pixman-vmx.c:255
static int is_transparent(vector unsigned int x)
Definition: pixman-vmx.c:429
static void vmx_combine_atop_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1298
static vector unsigned int combine4(const uint32_t *ps, const uint32_t *pm)
Definition: pixman-vmx.c:468
static int is_opaque(vector unsigned int x)
Definition: pixman-vmx.c:408
#define LOAD_VECTOR(source)
Definition: pixman-vmx.c:229
static void vmx_combine_add_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:2271
static void vmx_combine_xor_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:2207
static void vmx_combine_xor_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1492
static vector unsigned int over(vector unsigned int src, vector unsigned int srca, vector unsigned int dest)
Definition: pixman-vmx.c:161
static vector unsigned int create_mask_1x32_128(const uint32_t *src)
Definition: pixman-vmx.c:282
static vector unsigned int mask_565_fix_g
Definition: pixman-vmx.c:43
static void vmx_combine_in_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:728
static void vmx_combine_add_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1615
static void vmx_combine_atop_reverse_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:2143
static void vmx_combine_over_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:597
#define in_over(src, srca, mask, dest)
Definition: pixman-vmx.c:173
static void vmx_combine_in_reverse_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1904
static void vmx_combine_in_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1849
static vector unsigned int splat_alpha(vector unsigned int pix)
Definition: pixman-vmx.c:46
static void vmx_combine_in_reverse_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:940
static uint32_t core_combine_over_u_pixel_vmx(uint32_t src, uint32_t dst)
Definition: pixman-vmx.c:438
static void vmx_combine_out_reverse_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:1067
#define DECLARE_MASK_MASK_VAR
Definition: pixman-vmx.c:241
static void vmx_combine_in_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:825
static vector unsigned int mask_ff000000
Definition: pixman-vmx.c:38
static void vmx_combine_atop_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:1183
static void vmx_combine_out_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:954
static void vmx_combine_over_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:538
static void vmx_combine_atop_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1234
static void vmx_combine_src_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1682
static const pixman_iter_info_t vmx_iters[]
Definition: pixman-vmx.c:3104
static void vmx_combine_over_reverse_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:611
#define AVV(x...)
Definition: pixman-vmx.c:36
static vector unsigned int pix_multiply(vector unsigned int p, vector unsigned int a)
Definition: pixman-vmx.c:71
static void vmx_composite_src_x888_8888(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2575
static void vmx_composite_add_8_8(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2807
static uint32_t * vmx_fetch_x8r8g8b8(pixman_iter_t *iter, const uint32_t *mask)
Definition: pixman-vmx.c:3024
pixman_implementation_t * _pixman_implementation_create_vmx(pixman_implementation_t *fallback)
Definition: pixman-vmx.c:3116
static void vmx_combine_over_u_no_mask(uint32_t *dest, const uint32_t *src, int width)
Definition: pixman-vmx.c:489
static vector unsigned int unpackhi_128_16x8(vector unsigned int data1, vector unsigned int data2)
Definition: pixman-vmx.c:317
static void vmx_combine_out_reverse_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:2018
static void vmx_combine_over_reverse_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1792
static void vmx_combine_in_reverse_u_mask(uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:885
#define LOAD_VECTORSC(dest, source, mask)
Definition: pixman-vmx.c:236
static vector unsigned int splat_pixel(vector unsigned int pix)
Definition: pixman-vmx.c:62
static vector unsigned int pix_add(vector unsigned int a, vector unsigned int b)
Definition: pixman-vmx.c:133
static void vmx_combine_atop_ca(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:2076
static void vmx_combine_xor_u(pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
Definition: pixman-vmx.c:1556
static void vmx_composite_over_8888_8888(pixman_implementation_t *imp, pixman_composite_info_t *info)
Definition: pixman-vmx.c:2683
#define DECLARE_SRC_MASK_VAR
Definition: pixman-vmx.c:240
static uint32_t * vmx_fetch_a8(pixman_iter_t *iter, const uint32_t *mask)
Definition: pixman-vmx.c:3058
#define LOAD_VECTORS(dest, source)
Definition: pixman-vmx.c:232
pixman_fixed_16_16_t pixman_fixed_t
Definition: pixman.h:123
int pixman_bool_t
Definition: pixman.h:113
#define pixman_fixed_to_int(f)
Definition: pixman.h:129
pixman_op_t
Definition: pixman.h:389
@ PIXMAN_OP_OVER_REVERSE
Definition: pixman.h:394
@ PIXMAN_OP_OUT_REVERSE
Definition: pixman.h:398
@ PIXMAN_OP_ATOP
Definition: pixman.h:399
@ PIXMAN_OP_SRC
Definition: pixman.h:391
@ PIXMAN_OP_OVER
Definition: pixman.h:393
@ PIXMAN_OP_IN_REVERSE
Definition: pixman.h:396
@ PIXMAN_OP_OUT
Definition: pixman.h:397
@ PIXMAN_OP_IN
Definition: pixman.h:395
@ PIXMAN_OP_XOR
Definition: pixman.h:401
@ PIXMAN_OP_ATOP_REVERSE
Definition: pixman.h:400
@ PIXMAN_OP_ADD
Definition: pixman.h:402
@ PIXMAN_x8r8g8b8
Definition: pixman.h:879
@ PIXMAN_a8
Definition: pixman.h:913
#define NONE
Definition: pngtopnm.c:48
int g
Definition: ppmqvga.c:68
int r
Definition: ppmqvga.c:68
#define PAD(n)
Definition: ppmtoilbm.c:115
static struct pd pd
Definition: pswl.c:17
#define mask(n)
Definition: lbitlib.c:93
@ ADD
Definition: strexpr.c:18
Definition: namelist.c:170
Definition: sh.h:1226
Definition: ps.h:43
const pixman_iter_info_t * iter_info
pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]
pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]
pixman_fill_func_t fill
uint8_t * bits
uint32_t * buffer
Definition: dvips.h:235
Definition: sed.h:50
while(temp)
Definition: t4ht.c:858
m
Definition: tex4ht.c:3990
return() int(((double) *(font_tbl[cur_fnt].wtbl+(int)(*(font_tbl[cur_fnt].char_wi+(int)(ch - font_tbl[cur_fnt].char_f)% 256)))/(double)(1L<< 20)) *(double) font_tbl[cur_fnt].scale)
static UBool fallback(char *loc)
Definition: ucurr.cpp:604