FFmpeg  4.3.6
swscale_altivec.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv2yuvX
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <inttypes.h>
25 
26 #include "config.h"
27 #include "libswscale/swscale.h"
29 #include "libavutil/attributes.h"
30 #include "libavutil/cpu.h"
31 #include "yuv2rgb_altivec.h"
33 
34 #if HAVE_ALTIVEC
35 #if HAVE_BIGENDIAN
36 #define vzero vec_splat_s32(0)
37 
38 #define GET_LS(a,b,c,s) {\
39  vector signed short l2 = vec_ld(((b) << 1) + 16, s);\
40  ls = vec_perm(a, l2, c);\
41  a = l2;\
42  }
43 
44 #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\
45  vector signed short ls;\
46  vector signed int vf1, vf2, i1, i2;\
47  GET_LS(l1, x, perm, src);\
48  i1 = vec_mule(filter, ls);\
49  i2 = vec_mulo(filter, ls);\
50  vf1 = vec_mergeh(i1, i2);\
51  vf2 = vec_mergel(i1, i2);\
52  d1 = vec_add(d1, vf1);\
53  d2 = vec_add(d2, vf2);\
54  } while (0)
55 
56 #define LOAD_FILTER(vf,f) {\
57  vector unsigned char perm0 = vec_lvsl(joffset, f);\
58  vf = vec_ld(joffset, f);\
59  vf = vec_perm(vf, vf, perm0);\
60 }
61 #define LOAD_L1(ll1,s,p){\
62  p = vec_lvsl(xoffset, s);\
63  ll1 = vec_ld(xoffset, s);\
64 }
65 
66 // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
67 
68 // The neat trick: We only care for half the elements,
69 // high or low depending on (i<<3)%16 (it's 0 or 8 here),
70 // and we're going to use vec_mule, so we choose
71 // carefully how to "unpack" the elements into the even slots.
72 #define GET_VF4(a, vf, f) {\
73  vf = vec_ld(a<< 3, f);\
74  if ((a << 3) % 16)\
75  vf = vec_mergel(vf, (vector signed short)vzero);\
76  else\
77  vf = vec_mergeh(vf, (vector signed short)vzero);\
78 }
79 #define FIRST_LOAD(sv, pos, s, per) {\
80  sv = vec_ld(pos, s);\
81  per = vec_lvsl(pos, s);\
82 }
83 #define UPDATE_PTR(s0, d0, s1, d1) {\
84  d0 = s0;\
85  d1 = s1;\
86 }
87 #define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
88  v1 = vec_ld(pos + a + 16, s);\
89  vf = vec_perm(v0, v1, per);\
90 }
91 #define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\
92  if ((((uintptr_t)s + pos) % 16) > 8) {\
93  v1 = vec_ld(pos + a + 16, s);\
94  }\
95  vf = vec_perm(v0, src_v1, per);\
96 }
97 #define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
98  vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\
99  vf = vec_perm(vf0, vf1, per);\
100 }
101 
102 #define FUNC(name) name ## _altivec
103 #include "swscale_ppc_template.c"
104 #undef FUNC
105 
106 #undef vzero
107 
108 #endif /* HAVE_BIGENDIAN */
109 
110 #define output_pixel(pos, val, bias, signedness) \
111  if (big_endian) { \
112  AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
113  } else { \
114  AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
115  }
116 
117 static void
118 yuv2plane1_float_u(const int32_t *src, float *dest, int dstW, int start)
119 {
120  static const int big_endian = HAVE_BIGENDIAN;
121  static const int shift = 3;
122  static const float float_mult = 1.0f / 65535.0f;
123  int i, val;
124  uint16_t val_uint;
125 
126  for (i = start; i < dstW; ++i){
127  val = src[i] + (1 << (shift - 1));
128  output_pixel(&val_uint, val, 0, uint);
129  dest[i] = float_mult * (float)val_uint;
130  }
131 }
132 
133 static void
134 yuv2plane1_float_bswap_u(const int32_t *src, uint32_t *dest, int dstW, int start)
135 {
136  static const int big_endian = HAVE_BIGENDIAN;
137  static const int shift = 3;
138  static const float float_mult = 1.0f / 65535.0f;
139  int i, val;
140  uint16_t val_uint;
141 
142  for (i = start; i < dstW; ++i){
143  val = src[i] + (1 << (shift - 1));
144  output_pixel(&val_uint, val, 0, uint);
145  dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
146  }
147 }
148 
149 static void yuv2plane1_float_altivec(const int32_t *src, float *dest, int dstW)
150 {
151  const int dst_u = -(uintptr_t)dest & 3;
152  const int shift = 3;
153  const int add = (1 << (shift - 1));
154  const int clip = (1 << 16) - 1;
155  const float fmult = 1.0f / 65535.0f;
156  const vec_u32 vadd = (vec_u32) {add, add, add, add};
157  const vec_u32 vshift = (vec_u32) vec_splat_u32(shift);
158  const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
159  const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
160  const vec_f vzero = (vec_f) {0, 0, 0, 0};
161  vec_u32 v;
162  vec_f vd;
163  int i;
164 
165  yuv2plane1_float_u(src, dest, dst_u, 0);
166 
167  for (i = dst_u; i < dstW - 3; i += 4) {
168  v = vec_ld(0, (const uint32_t *) &src[i]);
169  v = vec_add(v, vadd);
170  v = vec_sr(v, vshift);
171  v = vec_min(v, vlargest);
172 
173  vd = vec_ctf(v, 0);
174  vd = vec_madd(vd, vmul, vzero);
175 
176  vec_st(vd, 0, &dest[i]);
177  }
178 
179  yuv2plane1_float_u(src, dest, dstW, i);
180 }
181 
182 static void yuv2plane1_float_bswap_altivec(const int32_t *src, uint32_t *dest, int dstW)
183 {
184  const int dst_u = -(uintptr_t)dest & 3;
185  const int shift = 3;
186  const int add = (1 << (shift - 1));
187  const int clip = (1 << 16) - 1;
188  const float fmult = 1.0f / 65535.0f;
189  const vec_u32 vadd = (vec_u32) {add, add, add, add};
190  const vec_u32 vshift = (vec_u32) vec_splat_u32(shift);
191  const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
192  const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
193  const vec_f vzero = (vec_f) {0, 0, 0, 0};
194  const vec_u32 vswapbig = (vec_u32) {16, 16, 16, 16};
195  const vec_u16 vswapsmall = vec_splat_u16(8);
196  vec_u32 v;
197  vec_f vd;
198  int i;
199 
200  yuv2plane1_float_bswap_u(src, dest, dst_u, 0);
201 
202  for (i = dst_u; i < dstW - 3; i += 4) {
203  v = vec_ld(0, (const uint32_t *) &src[i]);
204  v = vec_add(v, vadd);
205  v = vec_sr(v, vshift);
206  v = vec_min(v, vlargest);
207 
208  vd = vec_ctf(v, 0);
209  vd = vec_madd(vd, vmul, vzero);
210 
211  vd = (vec_f) vec_rl((vec_u32) vd, vswapbig);
212  vd = (vec_f) vec_rl((vec_u16) vd, vswapsmall);
213 
214  vec_st(vd, 0, (float *) &dest[i]);
215  }
216 
217  yuv2plane1_float_bswap_u(src, dest, dstW, i);
218 }
219 
220 #define yuv2plane1_float(template, dest_type, BE_LE) \
221 static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \
222  int dstW, \
223  const uint8_t *dither, int offset) \
224 { \
225  template((const int32_t *)src, (dest_type *)dest, dstW); \
226 }
227 
228 #if HAVE_BIGENDIAN
229 yuv2plane1_float(yuv2plane1_float_altivec, float, BE)
230 yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, LE)
231 #else
232 yuv2plane1_float(yuv2plane1_float_altivec, float, LE)
233 yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, BE)
234 #endif
235 
236 #endif /* HAVE_ALTIVEC */
237 
239 {
240 #if HAVE_ALTIVEC
241  enum AVPixelFormat dstFormat = c->dstFormat;
242 
244  return;
245 
246 #if HAVE_BIGENDIAN
247  if (c->srcBpc == 8 && c->dstBpc <= 14) {
248  c->hyScale = c->hcScale = hScale_real_altivec;
249  }
250  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) &&
251  dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
252  !c->needAlpha) {
253  c->yuv2planeX = yuv2planeX_altivec;
254  }
255 #endif
256 
257  if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
258  c->yuv2plane1 = yuv2plane1_floatBE_altivec;
259  } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
260  c->yuv2plane1 = yuv2plane1_floatLE_altivec;
261  }
262 
263  /* The following list of supported dstFormat values should
264  * match what's found in the body of ff_yuv2packedX_altivec() */
265  if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->needAlpha) {
266  switch (c->dstFormat) {
267  case AV_PIX_FMT_ABGR:
268  c->yuv2packedX = ff_yuv2abgr_X_altivec;
269  break;
270  case AV_PIX_FMT_BGRA:
271  c->yuv2packedX = ff_yuv2bgra_X_altivec;
272  break;
273  case AV_PIX_FMT_ARGB:
274  c->yuv2packedX = ff_yuv2argb_X_altivec;
275  break;
276  case AV_PIX_FMT_RGBA:
277  c->yuv2packedX = ff_yuv2rgba_X_altivec;
278  break;
279  case AV_PIX_FMT_BGR24:
280  c->yuv2packedX = ff_yuv2bgr24_X_altivec;
281  break;
282  case AV_PIX_FMT_RGB24:
283  c->yuv2packedX = ff_yuv2rgb24_X_altivec;
284  break;
285  }
286  }
287 #endif /* HAVE_ALTIVEC */
288 
290 }
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:340
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
#define vec_f
Definition: util_altivec.h:40
#define AV_CPU_FLAG_ALTIVEC
standard
Definition: cpu.h:60
static int shift(int a, int b)
Definition: sonic.c:82
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
Macro definitions for various function/variable attributes.
#define av_cold
Definition: attributes.h:88
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:79
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:94
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
yuv2packedX_fn yuv2packedX
#define src
Definition: vp8dsp.c:254
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
yuv2plane1_float(yuv2plane1_float_c_template, yuv2plane1_float(float, LE)
Definition: output.c:304
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:95
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:92
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:93
av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
yuv2planar1_fn yuv2plane1
int32_t
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
#define av_bswap32
Definition: bswap.h:33
#define vec_u32
Definition: util_altivec.h:38
yuv2planarX_fn yuv2planeX
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
static av_always_inline uint32_t av_float2int(float f)
Reinterpret a float as a 32-bit integer.
Definition: intfloat.h:50
Contains misc utility macros and inline functions.
#define SWS_BITEXACT
Definition: swscale.h:84
static double clip(void *opaque, double val)
Clip value val in the minval - maxval range.
Definition: vf_lut.c:162
static double c[64]
#define output_pixel(pos, val, bias, signedness)
Definition: output.c:887
av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
Definition: swscale_vsx.c:2076
#define vec_u16
Definition: util_altivec.h:36
#define HAVE_BIGENDIAN
Definition: config.h:199
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:341
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
static double val(void *priv, double ch)
Definition: aeval.c:76
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)