Files
ffmpeg/libswscale/x86/swscale.c
Ramiro Polla 384fe39623 swscale/range_convert: fix mpeg ranges in yuv range conversion for non-8-bit pixel formats
There is an issue with the constants used in YUV to YUV range conversion,
where the upper bound is not respected when converting to mpeg range.

With this commit, the constants are calculated at runtime, depending on
the bit depth. This approach also allows us to more easily understand how
the constants are derived.

For bit depths <= 14, the number of fixed point bits has been set to 14
for all conversions, to simplify the code.
For bit depths > 14, the number of fixed points bits has been raised and
set to 18, to allow for the conversion to be accurate enough for the mpeg
range to be respected.

The convert functions now take the conversion constants (coeff and offset)
as function arguments.
For bit depths <= 14, coeff is unsigned 16-bit and offset is 32-bit.
For bit depths > 14, coeff is unsigned 32-bit and offset is 64-bit.

x86_64:
chrRangeFromJpeg8_1920_c:    2127.4   2125.0  (1.00x)
chrRangeFromJpeg16_1920_c:   2325.2   2127.2  (1.09x)
chrRangeToJpeg8_1920_c:      3166.9   3168.7  (1.00x)
chrRangeToJpeg16_1920_c:     2152.4   3164.8  (0.68x)
lumRangeFromJpeg8_1920_c:    1263.0   1302.5  (0.97x)
lumRangeFromJpeg16_1920_c:   1080.5   1299.2  (0.83x)
lumRangeToJpeg8_1920_c:      1886.8   2112.2  (0.89x)
lumRangeToJpeg16_1920_c:     1077.0   1906.5  (0.56x)

aarch64 A55:
chrRangeFromJpeg8_1920_c:   28835.2  28835.6  (1.00x)
chrRangeFromJpeg16_1920_c:  28839.8  32680.8  (0.88x)
chrRangeToJpeg8_1920_c:     23074.7  23075.4  (1.00x)
chrRangeToJpeg16_1920_c:    17318.9  24996.0  (0.69x)
lumRangeFromJpeg8_1920_c:   15389.7  15384.5  (1.00x)
lumRangeFromJpeg16_1920_c:  15388.2  17306.7  (0.89x)
lumRangeToJpeg8_1920_c:     19227.8  19226.6  (1.00x)
lumRangeToJpeg16_1920_c:    15387.0  21146.3  (0.73x)

aarch64 A76:
chrRangeFromJpeg8_1920_c:    6324.4   6268.1  (1.01x)
chrRangeFromJpeg16_1920_c:   6339.9  11521.5  (0.55x)
chrRangeToJpeg8_1920_c:      9656.0   9612.8  (1.00x)
chrRangeToJpeg16_1920_c:     6340.4  11651.8  (0.54x)
lumRangeFromJpeg8_1920_c:    4422.0   4420.8  (1.00x)
lumRangeFromJpeg16_1920_c:   4420.9   5762.0  (0.77x)
lumRangeToJpeg8_1920_c:      5949.1   5977.5  (1.00x)
lumRangeToJpeg16_1920_c:     4446.8   5946.2  (0.75x)

NOTE: all simd optimizations for range_convert have been disabled.
      they will be re-enabled when they are fixed for each architecture.

NOTE2: the same issue still exists in rgb2yuv conversions, which is not
       addressed in this commit.
2024-12-05 21:10:29 +01:00

855 lines
35 KiB
C

/*
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <inttypes.h>
#include "config.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/mem_internal.h"
#include "libavutil/pixdesc.h"
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
0x0103010301030103LL,
0x0200020002000200LL,};
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
0x0602060206020602LL,
0x0004000400040004LL,};
#if HAVE_INLINE_ASM
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
// MMXEXT versions
#if HAVE_MMXEXT_INLINE
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _mmxext
#include "swscale_template.c"
#endif
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
{
const int dstH= c->opts.dst_h;
const int flags= c->opts.flags;
SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
int hasAlpha = c->needAlpha;
int32_t *vLumFilterPos= c->vLumFilterPos;
int32_t *vChrFilterPos= c->vChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int chrDstY= dstY>>c->chrDstVSubSample;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
c->blueDither= ff_dither8[dstY&1];
if (c->opts.dst_format == AV_PIX_FMT_RGB555 || c->opts.dst_format == AV_PIX_FMT_BGR555)
c->greenDither= ff_dither8[dstY&1];
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
int i;
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->opts.src_h) {
const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
int neg = -firstLumSrcY, i, end = FFMIN(c->opts.src_h - firstLumSrcY, vLumFilterSize);
for (i = 0; i < neg; i++)
tmpY[i] = lumSrcPtr[neg];
for ( ; i < end; i++)
tmpY[i] = lumSrcPtr[i];
for ( ; i < vLumFilterSize; i++)
tmpY[i] = tmpY[i-1];
lumSrcPtr = tmpY;
if (alpSrcPtr) {
const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
for (i = 0; i < neg; i++)
tmpA[i] = alpSrcPtr[neg];
for ( ; i < end; i++)
tmpA[i] = alpSrcPtr[i];
for ( ; i < vLumFilterSize; i++)
tmpA[i] = tmpA[i - 1];
alpSrcPtr = tmpA;
}
}
if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
for (i = 0; i < neg; i++) {
tmpU[i] = chrUSrcPtr[neg];
}
for ( ; i < end; i++) {
tmpU[i] = chrUSrcPtr[i];
}
for ( ; i < vChrFilterSize; i++) {
tmpU[i] = tmpU[i - 1];
}
chrUSrcPtr = tmpU;
}
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
}
for (i=0; i<vChrFilterSize; i+=2) {
*(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
}
} else {
for (i=0; i<vLumFilterSize; i++) {
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
}
}
for (i=0; i<vChrFilterSize; i++) {
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
}
}
}
}
#endif /* HAVE_INLINE_ASM */
#define YUV2YUVX_FUNC_MMX(opt, step) \
void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
uint8_t *dest, int dstW, \
const uint8_t *dither, int offset); \
static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
const int16_t **src, uint8_t *dest, int dstW, \
const uint8_t *dither, int offset) \
{ \
if(dstW > 0) \
ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
return; \
}
#define YUV2YUVX_FUNC(opt, step) \
void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
uint8_t *dest, int dstW, \
const uint8_t *dither, int offset); \
static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
const int16_t **src, uint8_t *dest, int dstW, \
const uint8_t *dither, int offset) \
{ \
int remainder = (dstW % step); \
int pixelsProcessed = dstW - remainder; \
if(((uintptr_t)dest) & 15){ \
yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
return; \
} \
if(pixelsProcessed > 0) \
ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
if(remainder > 0){ \
ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
} \
return; \
}
#if HAVE_MMXEXT_EXTERNAL
YUV2YUVX_FUNC_MMX(mmxext, 16)
#endif
#if HAVE_SSE3_EXTERNAL
YUV2YUVX_FUNC(sse3, 32)
#endif
#if HAVE_AVX2_EXTERNAL
YUV2YUVX_FUNC(avx2, 64)
#endif
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
SwsInternal *c, int16_t *data, \
int dstW, const uint8_t *src, \
const int16_t *filter, \
const int32_t *filterPos, int filterSize)
#define SCALE_FUNCS(filter_n, opt) \
SCALE_FUNC(filter_n, 8, 15, opt); \
SCALE_FUNC(filter_n, 9, 15, opt); \
SCALE_FUNC(filter_n, 10, 15, opt); \
SCALE_FUNC(filter_n, 12, 15, opt); \
SCALE_FUNC(filter_n, 14, 15, opt); \
SCALE_FUNC(filter_n, 16, 15, opt); \
SCALE_FUNC(filter_n, 8, 19, opt); \
SCALE_FUNC(filter_n, 9, 19, opt); \
SCALE_FUNC(filter_n, 10, 19, opt); \
SCALE_FUNC(filter_n, 12, 19, opt); \
SCALE_FUNC(filter_n, 14, 19, opt); \
SCALE_FUNC(filter_n, 16, 19, opt)
#define SCALE_FUNCS_MMX(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X, opt)
#define SCALE_FUNCS_SSE(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X4, opt); \
SCALE_FUNCS(X8, opt)
SCALE_FUNCS_SSE(sse2);
SCALE_FUNCS_SSE(ssse3);
SCALE_FUNCS_SSE(sse4);
SCALE_FUNC(4, 8, 15, avx2);
SCALE_FUNC(X4, 8, 15, avx2);
#define VSCALEX_FUNC(size, opt) \
void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
const int16_t **src, uint8_t *dest, int dstW, \
const uint8_t *dither, int offset)
#define VSCALEX_FUNCS(opt) \
VSCALEX_FUNC(8, opt); \
VSCALEX_FUNC(9, opt); \
VSCALEX_FUNC(10, opt)
VSCALEX_FUNC(8, mmxext);
VSCALEX_FUNCS(sse2);
VSCALEX_FUNCS(sse4);
VSCALEX_FUNC(16, sse4);
VSCALEX_FUNCS(avx);
#define VSCALE_FUNC(size, opt) \
void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
const uint8_t *dither, int offset)
#define VSCALE_FUNCS(opt1, opt2) \
VSCALE_FUNC(8, opt1); \
VSCALE_FUNC(9, opt2); \
VSCALE_FUNC(10, opt2); \
VSCALE_FUNC(16, opt1)
VSCALE_FUNCS(sse2, sse2);
VSCALE_FUNC(16, sse4);
VSCALE_FUNCS(avx, avx);
#define INPUT_Y_FUNC(fmt, opt) \
void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
const uint8_t *unused1, const uint8_t *unused2, \
int w, uint32_t *unused, void *opq)
#define INPUT_UV_FUNC(fmt, opt) \
void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
const uint8_t *unused0, \
const uint8_t *src1, \
const uint8_t *src2, \
int w, uint32_t *unused, void *opq)
#define INPUT_FUNC(fmt, opt) \
INPUT_Y_FUNC(fmt, opt); \
INPUT_UV_FUNC(fmt, opt)
#define INPUT_FUNCS(opt) \
INPUT_FUNC(uyvy, opt); \
INPUT_FUNC(yuyv, opt); \
INPUT_UV_FUNC(nv12, opt); \
INPUT_UV_FUNC(nv21, opt); \
INPUT_FUNC(rgba, opt); \
INPUT_FUNC(bgra, opt); \
INPUT_FUNC(argb, opt); \
INPUT_FUNC(abgr, opt); \
INPUT_FUNC(rgb24, opt); \
INPUT_FUNC(bgr24, opt)
INPUT_FUNCS(sse2);
INPUT_FUNCS(ssse3);
INPUT_FUNCS(avx);
INPUT_FUNC(rgba, avx2);
INPUT_FUNC(bgra, avx2);
INPUT_FUNC(argb, avx2);
INPUT_FUNC(abgr, avx2);
INPUT_FUNC(rgb24, avx2);
INPUT_FUNC(bgr24, avx2);
#if ARCH_X86_64
#define YUV2NV_DECL(fmt, opt) \
void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
const int16_t *filter, int filterSize, \
const int16_t **u, const int16_t **v, \
uint8_t *dst, int dstWidth)
YUV2NV_DECL(nv12, avx2);
YUV2NV_DECL(nv21, avx2);
#define YUV2GBRP_FN_DECL(fmt, opt) \
void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
const int16_t **lumSrcx, int lumFilterSize, \
const int16_t *chrFilter, const int16_t **chrUSrcx, \
const int16_t **chrVSrcx, int chrFilterSize, \
const int16_t **alpSrcx, uint8_t **dest, \
int dstW, int y)
#define YUV2GBRP_DECL(opt) \
YUV2GBRP_FN_DECL(gbrp, opt); \
YUV2GBRP_FN_DECL(gbrap, opt); \
YUV2GBRP_FN_DECL(gbrp9le, opt); \
YUV2GBRP_FN_DECL(gbrp10le, opt); \
YUV2GBRP_FN_DECL(gbrap10le, opt); \
YUV2GBRP_FN_DECL(gbrp12le, opt); \
YUV2GBRP_FN_DECL(gbrap12le, opt); \
YUV2GBRP_FN_DECL(gbrp14le, opt); \
YUV2GBRP_FN_DECL(gbrp16le, opt); \
YUV2GBRP_FN_DECL(gbrap16le, opt); \
YUV2GBRP_FN_DECL(gbrpf32le, opt); \
YUV2GBRP_FN_DECL(gbrapf32le, opt); \
YUV2GBRP_FN_DECL(gbrp9be, opt); \
YUV2GBRP_FN_DECL(gbrp10be, opt); \
YUV2GBRP_FN_DECL(gbrap10be, opt); \
YUV2GBRP_FN_DECL(gbrp12be, opt); \
YUV2GBRP_FN_DECL(gbrap12be, opt); \
YUV2GBRP_FN_DECL(gbrp14be, opt); \
YUV2GBRP_FN_DECL(gbrp16be, opt); \
YUV2GBRP_FN_DECL(gbrap16be, opt); \
YUV2GBRP_FN_DECL(gbrpf32be, opt); \
YUV2GBRP_FN_DECL(gbrapf32be, opt)
YUV2GBRP_DECL(sse2);
YUV2GBRP_DECL(sse4);
YUV2GBRP_DECL(avx2);
#define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
const uint8_t *src[4], int w, int32_t *rgb2yuv, \
void *opq)
#define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
const uint8_t *src[4], int w, int32_t *rgb2yuv, \
void *opq)
#define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
const uint8_t *src[4], int w, int32_t *rgb2yuv, \
void *opq)
#define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
#define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
#define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
#define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
#define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
#define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
#define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
#define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
#define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
#endif
#define RANGE_CONVERT_FUNCS(opt) do { \
if (c->dstBpc <= 14) { \
if (c->opts.src_range) { \
c->lumConvertRange = ff_lumRangeFromJpeg_ ##opt; \
c->chrConvertRange = ff_chrRangeFromJpeg_ ##opt; \
} else { \
c->lumConvertRange = ff_lumRangeToJpeg_ ##opt; \
c->chrConvertRange = ff_chrRangeToJpeg_ ##opt; \
} \
} \
} while (0)
#define RANGE_CONVERT_FUNCS_DECL(opt) \
void ff_lumRangeFromJpeg_ ##opt(int16_t *dst, int width); \
void ff_chrRangeFromJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
void ff_lumRangeToJpeg_ ##opt(int16_t *dst, int width); \
void ff_chrRangeToJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
RANGE_CONVERT_FUNCS_DECL(sse2);
RANGE_CONVERT_FUNCS_DECL(avx2);
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
{
/* This code is currently disabled because of changes in the base
* implementation of these functions. This code should be enabled
* again once those changes are ported to this architecture. */
#if 0
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
RANGE_CONVERT_FUNCS(avx2);
} else if (EXTERNAL_SSE2(cpu_flags)) {
RANGE_CONVERT_FUNCS(sse2);
}
#endif
}
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
{
int cpu_flags = av_get_cpu_flags();
#if HAVE_MMXEXT_INLINE
if (INLINE_MMXEXT(cpu_flags))
sws_init_swscale_mmxext(c);
#endif
if(c->use_mmx_vfilter && !(c->opts.flags & SWS_ACCURATE_RND)) {
#if HAVE_MMXEXT_EXTERNAL
if (EXTERNAL_MMXEXT(cpu_flags))
c->yuv2planeX = yuv2yuvX_mmxext;
#endif
#if HAVE_SSE3_EXTERNAL
if (EXTERNAL_SSE3(cpu_flags))
c->yuv2planeX = yuv2yuvX_sse3;
#endif
#if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags))
c->yuv2planeX = yuv2yuvX_avx2;
#endif
}
#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
// The better yuv2planeX_8 functions need aligned stack on x86-32,
// so we use MMXEXT in this case if they are not available.
if (EXTERNAL_MMXEXT(cpu_flags)) {
if (c->dstBpc == 8 && !c->use_mmx_vfilter)
c->yuv2planeX = ff_yuv2planeX_8_mmxext;
}
#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 9) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 10) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 12) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
ff_hscale12to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 14 || ((c->opts.src_format==AV_PIX_FMT_PAL8||isAnyRGB(c->opts.src_format)) && av_pix_fmt_desc_get(c->opts.src_format)->comp[0].depth<16)) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
ff_hscale14to19_ ## filtersize ## _ ## opt1; \
} else { /* c->srcBpc == 16 */ \
av_assert0(c->srcBpc == 16);\
hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
} \
} while (0)
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
switch(c->dstBpc){ \
case 16: do_16_case; break; \
case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
}
#define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
switch(c->dstBpc){ \
case 16: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
default: av_assert0(c->dstBpc>8); \
}
#define case_rgb(x, X, opt) \
case AV_PIX_FMT_ ## X: \
c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
if (!c->chrSrcHSubSample) \
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
break
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
switch (filtersize) { \
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
break; \
}
if (EXTERNAL_SSE2(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
HAVE_ALIGNED_STACK || ARCH_X86_64);
if (!(c->opts.flags & SWS_ACCURATE_RND))
ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
switch (c->opts.src_format) {
case AV_PIX_FMT_YA8:
c->lumToYV12 = ff_yuyvToY_sse2;
if (c->needAlpha)
c->alpToYV12 = ff_uyvyToY_sse2;
break;
case AV_PIX_FMT_YUYV422:
c->lumToYV12 = ff_yuyvToY_sse2;
c->chrToYV12 = ff_yuyvToUV_sse2;
break;
case AV_PIX_FMT_UYVY422:
c->lumToYV12 = ff_uyvyToY_sse2;
c->chrToYV12 = ff_uyvyToUV_sse2;
break;
case AV_PIX_FMT_NV12:
c->chrToYV12 = ff_nv12ToUV_sse2;
break;
case AV_PIX_FMT_NV21:
c->chrToYV12 = ff_nv21ToUV_sse2;
break;
case_rgb(rgb24, RGB24, sse2);
case_rgb(bgr24, BGR24, sse2);
case_rgb(bgra, BGRA, sse2);
case_rgb(rgba, RGBA, sse2);
case_rgb(abgr, ABGR, sse2);
case_rgb(argb, ARGB, sse2);
default:
break;
}
}
if (EXTERNAL_SSSE3(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
switch (c->opts.src_format) {
case_rgb(rgb24, RGB24, ssse3);
case_rgb(bgr24, BGR24, ssse3);
default:
break;
}
}
if (EXTERNAL_SSE4(cpu_flags)) {
/* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
HAVE_ALIGNED_STACK || ARCH_X86_64);
if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND))
c->yuv2plane1 = ff_yuv2plane1_16_sse4;
}
if (EXTERNAL_AVX(cpu_flags)) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
HAVE_ALIGNED_STACK || ARCH_X86_64);
if (!(c->opts.flags & SWS_ACCURATE_RND))
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
switch (c->opts.src_format) {
case AV_PIX_FMT_YUYV422:
c->chrToYV12 = ff_yuyvToUV_avx;
break;
case AV_PIX_FMT_UYVY422:
c->chrToYV12 = ff_uyvyToUV_avx;
break;
case AV_PIX_FMT_NV12:
c->chrToYV12 = ff_nv12ToUV_avx;
break;
case AV_PIX_FMT_NV21:
c->chrToYV12 = ff_nv21ToUV_avx;
break;
case_rgb(rgb24, RGB24, avx);
case_rgb(bgr24, BGR24, avx);
case_rgb(bgra, BGRA, avx);
case_rgb(rgba, RGBA, avx);
case_rgb(abgr, ABGR, avx);
case_rgb(argb, ARGB, avx);
default:
break;
}
}
#if ARCH_X86_64
#define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
switch (filtersize) { \
case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
default: hscalefn = ff_hscale8to15_X4_avx2; break; \
break; \
}
if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
}
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
if (ARCH_X86_64)
switch (c->opts.src_format) {
case_rgb(rgb24, RGB24, avx2);
case_rgb(bgr24, BGR24, avx2);
case_rgb(bgra, BGRA, avx2);
case_rgb(rgba, RGBA, avx2);
case_rgb(abgr, ABGR, avx2);
case_rgb(argb, ARGB, avx2);
}
if (!(c->opts.flags & SWS_ACCURATE_RND)) // FIXME
switch (c->opts.dst_format) {
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_NV24:
c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
break;
case AV_PIX_FMT_NV21:
case AV_PIX_FMT_NV42:
c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
break;
default:
break;
}
}
#define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
case fmt: \
c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
#define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
case rgba_fmt: \
case rgb_fmt: \
c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
break;
#define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
case fmt: \
c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
break;
#define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
case fmt: \
c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
break;
#define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
#define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
#define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
#define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
#define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
#define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
if (EXTERNAL_SSE2(cpu_flags)) {
switch (c->opts.src_format) {
INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
default:
break;
}
}
if (EXTERNAL_SSE4(cpu_flags)) {
switch (c->opts.src_format) {
case AV_PIX_FMT_GBRAP:
INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
default:
break;
}
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
switch (c->opts.src_format) {
INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
default:
break;
}
}
if(c->opts.flags & SWS_FULL_CHR_H_INT) {
#define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
case fmt: \
c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
break;
#define YUV2ANYX_GBRAP_CASES(opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
if (EXTERNAL_SSE2(cpu_flags)) {
switch (c->opts.dst_format) {
YUV2ANYX_GBRAP_CASES(sse2)
default:
break;
}
}
if (EXTERNAL_SSE4(cpu_flags)) {
switch (c->opts.dst_format) {
YUV2ANYX_GBRAP_CASES(sse4)
default:
break;
}
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
switch (c->opts.dst_format) {
YUV2ANYX_GBRAP_CASES(avx2)
default:
break;
}
}
}
#endif
}