w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pixman-ssse3.c
Go to the documentation of this file.
1 /*
2  * Copyright © 2013 Soren Sandmann Pedersen
3  * Copyright © 2013 Red Hat, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Author: Soren Sandmann (soren.sandmann@gmail.com)
25  */
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 
30 #include <stdlib.h>
31 #include <mmintrin.h>
32 #include <xmmintrin.h>
33 #include <emmintrin.h>
34 #include <tmmintrin.h>
35 #include "pixman-private.h"
36 #include "pixman-inlines.h"
37 
38 typedef struct
39 {
40  int y;
41  uint64_t * buffer;
42 } line_t;
43 
44 typedef struct
45 {
46  line_t lines[2];
49  uint64_t data[1];
51 
52 static void
54  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
55 {
56  uint32_t *bits = image->bits + y * image->rowstride;
57  __m128i vx = _mm_set_epi16 (
58  - (x + 1), x, - (x + 1), x,
59  - (x + ux + 1), x + ux, - (x + ux + 1), x + ux);
60  __m128i vux = _mm_set_epi16 (
61  - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
62  - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
63  __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
64  __m128i *b = (__m128i *)line->buffer;
65  __m128i vrl0, vrl1;
66 
67  while ((n -= 2) >= 0)
68  {
69  __m128i vw, vr, s;
70 
71  vrl1 = _mm_loadl_epi64 (
72  (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
73  /* vrl1: R1, L1 */
74 
75  final_pixel:
76  vrl0 = _mm_loadl_epi64 (
77  (__m128i *)(bits + pixman_fixed_to_int (x)));
78  /* vrl0: R0, L0 */
79 
80  /* The weights are based on vx which is a vector of
81  *
82  * - (x + 1), x, - (x + 1), x,
83  * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
84  *
85  * so the 16 bit weights end up like this:
86  *
87  * iw0, w0, iw0, w0, iw1, w1, iw1, w1
88  *
89  * and after shifting and packing, we get these bytes:
90  *
91  * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
92  * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
93  *
94  * which means the first and the second input pixel
95  * have to be interleaved like this:
96  *
97  * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
98  * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
99  *
100  * before maddubsw can be used.
101  */
102 
103  vw = _mm_add_epi16 (
104  vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
105  /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
106  */
107 
108  vw = _mm_packus_epi16 (vw, vw);
109  /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
110  * iw0, w0, iw0, w0, iw1, w1, iw1, w1
111  */
112  vx = _mm_add_epi16 (vx, vux);
113 
114  x += 2 * ux;
115 
116  vr = _mm_unpacklo_epi16 (vrl1, vrl0);
117  /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
118 
119  s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
120  /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
121 
122  vr = _mm_unpackhi_epi8 (vr, s);
123  /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
124  * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
125  */
126 
127  vr = _mm_maddubs_epi16 (vr, vw);
128 
129  /* When the weight is 0, the inverse weight is
130  * 128 which can't be represented in a signed byte.
131  * As a result maddubsw computes the following:
132  *
133  * r = l * -128 + r * 0
134  *
135  * rather than the desired
136  *
137  * r = l * 128 + r * 0
138  *
139  * We fix this by taking the absolute value of the
140  * result.
141  */
142  vr = _mm_abs_epi16 (vr);
143 
144  /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
145  _mm_store_si128 (b++, vr);
146  }
147 
148  if (n == -1)
149  {
150  vrl1 = _mm_setzero_si128();
151  goto final_pixel;
152  }
153 
154  line->y = y;
155 }
156 
157 static uint32_t *
159 {
160  pixman_fixed_t fx, ux;
161  bilinear_info_t *info = iter->data;
162  line_t *line0, *line1;
163  int y0, y1;
164  int32_t dist_y;
165  __m128i vw;
166  int i;
167 
168  fx = info->x;
169  ux = iter->image->common.transform->matrix[0][0];
170 
171  y0 = pixman_fixed_to_int (info->y);
172  y1 = y0 + 1;
173 
174  line0 = &info->lines[y0 & 0x01];
175  line1 = &info->lines[y1 & 0x01];
176 
177  if (line0->y != y0)
178  {
180  &iter->image->bits, line0, y0, fx, ux, iter->width);
181  }
182 
183  if (line1->y != y1)
184  {
186  &iter->image->bits, line1, y1, fx, ux, iter->width);
187  }
188 
190  dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
191 
192  vw = _mm_set_epi16 (
193  dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
194 
195  for (i = 0; i + 3 < iter->width; i += 4)
196  {
197  __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
198  __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
199  __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
200  __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
201  __m128i r0, r1, tmp, p;
202 
203  r0 = _mm_mulhi_epu16 (
204  _mm_sub_epi16 (bot0, top0), vw);
205  tmp = _mm_cmplt_epi16 (bot0, top0);
206  tmp = _mm_and_si128 (tmp, vw);
207  r0 = _mm_sub_epi16 (r0, tmp);
208  r0 = _mm_add_epi16 (r0, top0);
209  r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
210  /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
211  r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
212  /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
213 
214  r1 = _mm_mulhi_epu16 (
215  _mm_sub_epi16 (bot1, top1), vw);
216  tmp = _mm_cmplt_epi16 (bot1, top1);
217  tmp = _mm_and_si128 (tmp, vw);
218  r1 = _mm_sub_epi16 (r1, tmp);
219  r1 = _mm_add_epi16 (r1, top1);
220  r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
221  r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
222  /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
223 
224  p = _mm_packus_epi16 (r0, r1);
225 
226  _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
227  }
228 
229  while (i < iter->width)
230  {
231  __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
232  __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
233  __m128i r0, tmp, p;
234 
235  r0 = _mm_mulhi_epu16 (
236  _mm_sub_epi16 (bot0, top0), vw);
237  tmp = _mm_cmplt_epi16 (bot0, top0);
238  tmp = _mm_and_si128 (tmp, vw);
239  r0 = _mm_sub_epi16 (r0, tmp);
240  r0 = _mm_add_epi16 (r0, top0);
241  r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
242  /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
243  r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
244  /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
245 
246  p = _mm_packus_epi16 (r0, r0);
247 
248  if (iter->width - i == 1)
249  {
250  *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
251  i++;
252  }
253  else
254  {
255  _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
256  i += 2;
257  }
258  }
259 
260  info->y += iter->image->common.transform->matrix[1][1];
261 
262  return iter->buffer;
263 }
264 
265 static void
267 {
268  free (iter->data);
269 }
270 
271 static void
273 {
274  int width = iter->width;
277 
278  /* Reference point is the center of the pixel */
279  v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
280  v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
281  v.vector[2] = pixman_fixed_1;
282 
284  goto fail;
285 
286  info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
287  if (!info)
288  goto fail;
289 
290  info->x = v.vector[0] - pixman_fixed_1 / 2;
291  info->y = v.vector[1] - pixman_fixed_1 / 2;
292 
293 #define ALIGN(addr) \
294  ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
295 
296  /* It is safe to set the y coordinates to -1 initially
297  * because COVER_CLIP_BILINEAR ensures that we will only
298  * be asked to fetch lines in the [0, height) interval
299  */
300  info->lines[0].y = -1;
301  info->lines[0].buffer = ALIGN (&(info->data[0]));
302  info->lines[1].y = -1;
303  info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
304 
307 
308  iter->data = info;
309  return;
310 
311 fail:
312  /* Something went wrong, either a bad matrix or OOM; in such cases,
313  * we don't guarantee any particular rendering.
314  */
316  FUNC, "Allocation failure or bad matrix, skipping rendering\n");
317 
319  iter->fini = NULL;
320 }
321 
323 {
324  { PIXMAN_a8r8g8b8,
331  NULL, NULL
332  },
333 
334  { PIXMAN_null },
335 };
336 
338 {
339  { PIXMAN_OP_NONE },
340 };
341 
344 {
347 
348  imp->iter_info = ssse3_iters;
349 
350  return imp;
351 }
#define y0
#define width(a)
Definition: aptex-macros.h:198
#define n
Definition: t4ht.c:1290
#define b
Definition: jpegint.h:372
#define free(a)
Definition: decNumber.cpp:310
int v
Definition: dviconv.c:10
#define info
Definition: dviinfo.c:42
struct rect data
Definition: dvipdfm.c:64
#define s
Definition: afcover.h:80
#define r1
#define r0
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
kerning y
Definition: ttdriver.c:212
unsigned int uint32_t
Definition: stdint.h:80
signed int int32_t
Definition: stdint.h:77
unsigned __int64 uint64_t
Definition: stdint.h:90
int lines
Definition: var.h:5
#define malloc
Definition: alloca.c:91
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
float x
Definition: cordic.py:15
#define FUNC
pixman_implementation_t * _pixman_implementation_create(pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths)
static int pixman_fixed_to_bilinear_weight(pixman_fixed_t x)
pixman_bool_t pixman_transform_point_3d(const struct pixman_transform *transform, struct pixman_vector *vector)
#define FAST_PATH_STANDARD_FLAGS
@ ITER_SRC
@ ITER_NARROW
void _pixman_log_error(const char *function, const char *message)
Definition: pixman-utils.c:316
#define FAST_PATH_SCALE_TRANSFORM
#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR
#define FAST_PATH_BILINEAR_FILTER
#define BILINEAR_INTERPOLATION_BITS
Definition: pixman-private.h:9
uint32_t * _pixman_iter_get_scanline_noop(pixman_iter_t *iter, const uint32_t *mask)
Definition: pixman-utils.c:221
#define PIXMAN_null
static void ssse3_bilinear_cover_iter_fini(pixman_iter_t *iter)
Definition: pixman-ssse3.c:266
pixman_implementation_t * _pixman_implementation_create_ssse3(pixman_implementation_t *fallback)
Definition: pixman-ssse3.c:343
static const pixman_iter_info_t ssse3_iters[]
Definition: pixman-ssse3.c:322
static const pixman_fast_path_t ssse3_fast_paths[]
Definition: pixman-ssse3.c:337
static void ssse3_fetch_horizontal(bits_image_t *image, line_t *line, int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
Definition: pixman-ssse3.c:53
static void ssse3_bilinear_cover_iter_init(pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
Definition: pixman-ssse3.c:272
#define ALIGN(addr)
static uint32_t * ssse3_fetch_bilinear_cover(pixman_iter_t *iter, const uint32_t *mask)
Definition: pixman-ssse3.c:158
#define pixman_int_to_fixed(i)
Definition: pixman.h:130
pixman_fixed_16_16_t pixman_fixed_t
Definition: pixman.h:123
#define pixman_fixed_1
Definition: pixman.h:126
#define pixman_fixed_to_int(f)
Definition: pixman.h:129
@ PIXMAN_a8r8g8b8
Definition: pixman.h:878
#define y1
#define mask(n)
Definition: lbitlib.c:93
Definition: namelist.c:170
Definition: drvpic.cpp:36
pixman_transform_t * transform
Definition: sd.h:76
uint64_t * buffer
Definition: bdf.c:133
const pixman_iter_info_t * iter_info
pixman_iter_get_scanline_t get_scanline
pixman_iter_fini_t fini
pixman_image_t * image
uint32_t * buffer
pixman_fixed_t matrix[3][3]
Definition: pixman.h:180
Definition: splinefont.h:579
while(temp)
Definition: t4ht.c:858
static UBool fallback(char *loc)
Definition: ucurr.cpp:604
image_common_t common
bits_image_t bits
#define buffer
Definition: xmlparse.c:611