mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-12-05 14:30:00 +01:00
Add optimized Neon code path for the little endian case of the xyz12Torgb48 function. The innermost loop processes the data in 4x2 pixel blocks using software gathers with the matrix multiplication and clipping done by Neon. Relative runtime of micro benchmarks after this patch on some Cortex and Neoverse CPU cores: xyz12le_rgb48le X1 X3 X4 X925 V2 16x4_neon: 2.55x 4.34x 3.84x 3.31x 3.22x 32x4_neon: 2.39x 3.63x 3.22x 3.35x 3.29x 64x4_neon: 2.37x 3.31x 2.91x 3.33x 3.27x 128x4_neon: 2.34x 3.28x 2.91x 3.35x 3.24x 256x4_neon: 2.30x 3.17x 2.91x 3.32x 3.10x 512x4_neon: 2.26x 3.10x 2.91x 3.30x 3.07x 1024x4_neon: 2.26x 3.07x 2.96x 3.30x 3.05x 1920x4_neon: 2.26x 3.06x 2.93x 3.28x 3.04x xyz12le_rgb48le A76 A78 A715 A720 A725 16x4_neon: 2.33x 2.28x 2.53x 3.33x 3.19x 32x4_neon: 2.35x 2.18x 2.45x 3.23x 3.24x 64x4_neon: 2.35x 2.16x 2.42x 3.15x 3.21x 128x4_neon: 2.35x 2.13x 2.39x 3.00x 3.09x 256x4_neon: 2.36x 2.12x 2.35x 2.85x 2.99x 512x4_neon: 2.35x 2.14x 2.35x 2.78x 2.95x 1024x4_neon: 2.31x 2.09x 2.33x 2.80x 2.91x 1920x4_neon: 2.30x 2.07x 2.32x 2.81x 2.94x xyz12le_rgb48le A55 A510 A520 16x4_neon: 2.09x 1.92x 2.36x 32x4_neon: 2.05x 1.89x 2.38x 64x4_neon: 2.02x 1.77x 2.35x 128x4_neon: 1.96x 1.74x 2.25x 256x4_neon: 1.90x 1.72x 2.19x 512x4_neon: 1.83x 1.75x 2.16x 1024x4_neon: 1.83x 1.62x 2.15x 1920x4_neon: 1.82x 1.60x 2.15x Signed-off-by: Arpad Panyik <Arpad.Panyik@arm.com>
437 lines
19 KiB
C
437 lines
19 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "libavutil/attributes.h"
|
|
#include "libswscale/swscale.h"
|
|
#include "libswscale/swscale_internal.h"
|
|
#include "libavutil/aarch64/cpu.h"
|
|
#include "asm-offsets.h"
|
|
|
|
#define SIZEOF_MEMBER(type, member) \
|
|
sizeof(((type*)0)->member)
|
|
|
|
static_assert(offsetof(SwsLuts, in) == SL_IN, "struct layout mismatch");
|
|
static_assert(offsetof(SwsLuts, out) == SL_OUT, "struct layout mismatch");
|
|
|
|
static_assert(offsetof(SwsColorXform, gamma) == SCX_GAMMA,
|
|
"struct layout mismatch");
|
|
static_assert(offsetof(SwsColorXform, mat) == SCX_MAT,
|
|
"struct layout mismatch");
|
|
|
|
static_assert(offsetof(SwsColorXform, mat) +
|
|
2 * SIZEOF_MEMBER(SwsColorXform, mat[0]) +
|
|
2 * SIZEOF_MEMBER(SwsColorXform, mat[0][0]) == SCX_MAT_22,
|
|
"struct layout mismatch");
|
|
|
|
void ff_xyz12Torgb48le_neon_asm(const SwsColorXform *c, uint8_t *dst,
|
|
int dst_stride, const uint8_t *src,
|
|
int src_stride, int w, int h);
|
|
|
|
static void xyz12Torgb48le_neon(const SwsInternal *c, uint8_t *dst,
|
|
int dst_stride, const uint8_t *src,
|
|
int src_stride, int w, int h)
|
|
{
|
|
ff_xyz12Torgb48le_neon_asm(&c->xyz2rgb, dst, dst_stride, src, src_stride,
|
|
w, h);
|
|
}
|
|
|
|
void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize);
|
|
|
|
static void ff_hscale16to15_4_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int sh = desc->comp[0].depth - 1;
|
|
|
|
if (sh<15) {
|
|
sh = isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1;
|
|
}
|
|
ff_hscale16to15_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
|
|
}
|
|
|
|
static void ff_hscale16to15_X8_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int sh = desc->comp[0].depth - 1;
|
|
|
|
if (sh<15) {
|
|
sh = isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1;
|
|
}
|
|
ff_hscale16to15_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
|
|
}
|
|
|
|
static void ff_hscale16to15_X4_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int sh = desc->comp[0].depth - 1;
|
|
|
|
if (sh<15) {
|
|
sh = isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1;
|
|
}
|
|
ff_hscale16to15_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
}
|
|
|
|
static void ff_hscale16to19_4_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int bits = desc->comp[0].depth - 1;
|
|
int sh = bits - 4;
|
|
|
|
if ((isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
|
|
sh = 9;
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1 - 4;
|
|
}
|
|
|
|
ff_hscale16to19_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
|
|
}
|
|
|
|
static void ff_hscale16to19_X8_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int bits = desc->comp[0].depth - 1;
|
|
int sh = bits - 4;
|
|
|
|
if ((isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
|
|
sh = 9;
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1 - 4;
|
|
}
|
|
|
|
ff_hscale16to19_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
|
|
}
|
|
|
|
static void ff_hscale16to19_X4_neon(SwsInternal *c, int16_t *_dst, int dstW,
|
|
const uint8_t *_src, const int16_t *filter,
|
|
const int32_t *filterPos, int filterSize)
|
|
{
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
|
|
int bits = desc->comp[0].depth - 1;
|
|
int sh = bits - 4;
|
|
|
|
if ((isAnyRGB(c->opts.src_format) || c->opts.src_format==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
|
|
sh = 9;
|
|
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
|
sh = 16 - 1 - 4;
|
|
}
|
|
|
|
ff_hscale16to19_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
|
|
|
|
}
|
|
|
|
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
|
|
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
|
|
SwsInternal *c, int16_t *data, \
|
|
int dstW, const uint8_t *src, \
|
|
const int16_t *filter, \
|
|
const int32_t *filterPos, int filterSize)
|
|
#define SCALE_FUNCS(filter_n, opt) \
|
|
SCALE_FUNC(filter_n, 8, 15, opt); \
|
|
SCALE_FUNC(filter_n, 8, 19, opt);
|
|
#define ALL_SCALE_FUNCS(opt) \
|
|
SCALE_FUNCS(4, opt); \
|
|
SCALE_FUNCS(X8, opt); \
|
|
SCALE_FUNCS(X4, opt)
|
|
|
|
ALL_SCALE_FUNCS(neon);
|
|
|
|
void ff_yuv2planeX_10_neon(const int16_t *filter, int filterSize,
|
|
const int16_t **src, uint16_t *dest, int dstW,
|
|
int big_endian, int output_bits);
|
|
|
|
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
|
|
static void yuv2planeX_ ## bits ## BE_LE ## _neon(const int16_t *filter, int filterSize, \
|
|
const int16_t **src, uint8_t *dest, int dstW, \
|
|
const uint8_t *dither, int offset) \
|
|
{ \
|
|
ff_yuv2planeX_## template_size ## _neon(filter, \
|
|
filterSize, (const typeX_t **) src, \
|
|
(uint16_t *) dest, dstW, is_be, bits); \
|
|
}
|
|
|
|
yuv2NBPS( 9, BE, 1, 10, int16_t)
|
|
yuv2NBPS( 9, LE, 0, 10, int16_t)
|
|
yuv2NBPS(10, BE, 1, 10, int16_t)
|
|
yuv2NBPS(10, LE, 0, 10, int16_t)
|
|
yuv2NBPS(12, BE, 1, 10, int16_t)
|
|
yuv2NBPS(12, LE, 0, 10, int16_t)
|
|
yuv2NBPS(14, BE, 1, 10, int16_t)
|
|
yuv2NBPS(14, LE, 0, 10, int16_t)
|
|
|
|
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
|
|
const int16_t **src, uint8_t *dest, int dstW,
|
|
const uint8_t *dither, int offset);
|
|
void ff_yuv2plane1_8_neon(
|
|
const int16_t *src,
|
|
uint8_t *dest,
|
|
int dstW,
|
|
const uint8_t *dither,
|
|
int offset);
|
|
|
|
void ff_yuv2nv12cX_neon_asm(int isSwapped, const uint8_t *chrDither,
|
|
const int16_t *chrFilter, int chrFilterSize,
|
|
const int16_t **chrUSrc, const int16_t **chrVSrc,
|
|
uint8_t *dest, int chrDstW);
|
|
|
|
static void ff_yuv2nv12cX_neon(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
|
|
const int16_t *chrFilter, int chrFilterSize,
|
|
const int16_t **chrUSrc, const int16_t **chrVSrc,
|
|
uint8_t *dest, int chrDstW)
|
|
{
|
|
if (!isSwappedChroma(dstFormat)) {
|
|
ff_yuv2nv12cX_neon_asm(1, chrDither, chrFilter, chrFilterSize,
|
|
chrUSrc, chrVSrc, dest, chrDstW);
|
|
} else {
|
|
ff_yuv2nv12cX_neon_asm(0, chrDither, chrFilter, chrFilterSize,
|
|
chrUSrc, chrVSrc, dest, chrDstW);
|
|
}
|
|
}
|
|
|
|
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt) do { \
|
|
if (c->srcBpc == 8) { \
|
|
if(c->dstBpc <= 14) { \
|
|
hscalefn = \
|
|
ff_hscale8to15_ ## filtersize ## _ ## opt; \
|
|
} else \
|
|
hscalefn = \
|
|
ff_hscale8to19_ ## filtersize ## _ ## opt; \
|
|
} else { \
|
|
if (c->dstBpc <= 14) \
|
|
hscalefn = \
|
|
ff_hscale16to15_ ## filtersize ## _ ## opt; \
|
|
else \
|
|
hscalefn = \
|
|
ff_hscale16to19_ ## filtersize ## _ ## opt; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \
|
|
if (filtersize == 4) \
|
|
ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \
|
|
else if (filtersize % 8 == 0) \
|
|
ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \
|
|
else if (filtersize % 4 == 0 && filtersize % 8 != 0) \
|
|
ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \
|
|
} while (0)
|
|
|
|
#define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
|
|
switch (c->dstBpc) { \
|
|
case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
|
|
default: break; \
|
|
}
|
|
|
|
#define NEON_INPUT(name) \
|
|
void ff_##name##ToY_neon(uint8_t *dst, const uint8_t *src, const uint8_t *, \
|
|
const uint8_t *, int w, uint32_t *coeffs, void *); \
|
|
void ff_##name##ToUV_neon(uint8_t *, uint8_t *, const uint8_t *, \
|
|
const uint8_t *, const uint8_t *, int w, \
|
|
uint32_t *coeffs, void *); \
|
|
void ff_##name##ToUV_half_neon(uint8_t *, uint8_t *, const uint8_t *, \
|
|
const uint8_t *, const uint8_t *, int w, \
|
|
uint32_t *coeffs, void *)
|
|
#define NEON_INPUT_DOTPROD(name) \
|
|
void ff_##name##ToY_neon_dotprod(uint8_t *dst, const uint8_t *src, const uint8_t *, \
|
|
const uint8_t *, int w, uint32_t *coeffs, void *);
|
|
|
|
NEON_INPUT(abgr32);
|
|
NEON_INPUT(argb32);
|
|
NEON_INPUT(bgr24);
|
|
NEON_INPUT(bgra32);
|
|
NEON_INPUT(rgb24);
|
|
NEON_INPUT(rgba32);
|
|
NEON_INPUT_DOTPROD(bgra32);
|
|
NEON_INPUT_DOTPROD(rgba32);
|
|
|
|
void ff_lumRangeFromJpeg8_neon(int16_t *dst, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_chrRangeFromJpeg8_neon(int16_t *dstU, int16_t *dstV, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_lumRangeToJpeg8_neon(int16_t *dst, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_chrRangeToJpeg8_neon(int16_t *dstU, int16_t *dstV, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_lumRangeFromJpeg16_neon(int16_t *dst, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_chrRangeFromJpeg16_neon(int16_t *dstU, int16_t *dstV, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_lumRangeToJpeg16_neon(int16_t *dst, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
void ff_chrRangeToJpeg16_neon(int16_t *dstU, int16_t *dstV, int width,
|
|
uint32_t coeff, int64_t offset);
|
|
|
|
av_cold void ff_sws_init_range_convert_aarch64(SwsInternal *c)
|
|
{
|
|
int cpu_flags = av_get_cpu_flags();
|
|
|
|
if (have_neon(cpu_flags)) {
|
|
if (c->dstBpc <= 14) {
|
|
if (c->opts.src_range) {
|
|
c->lumConvertRange = ff_lumRangeFromJpeg8_neon;
|
|
c->chrConvertRange = ff_chrRangeFromJpeg8_neon;
|
|
} else {
|
|
c->lumConvertRange = ff_lumRangeToJpeg8_neon;
|
|
c->chrConvertRange = ff_chrRangeToJpeg8_neon;
|
|
}
|
|
} else {
|
|
if (c->opts.src_range) {
|
|
c->lumConvertRange = ff_lumRangeFromJpeg16_neon;
|
|
c->chrConvertRange = ff_chrRangeFromJpeg16_neon;
|
|
} else {
|
|
c->lumConvertRange = ff_lumRangeToJpeg16_neon;
|
|
c->chrConvertRange = ff_chrRangeToJpeg16_neon;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
av_cold void ff_sws_init_xyzdsp_aarch64(SwsInternal *c)
|
|
{
|
|
int cpu_flags = av_get_cpu_flags();
|
|
|
|
if (have_neon(cpu_flags)) {
|
|
if (!isBE(c->opts.src_format)) {
|
|
c->xyz12Torgb48 = xyz12Torgb48le_neon;
|
|
}
|
|
}
|
|
}
|
|
|
|
av_cold void ff_sws_init_swscale_aarch64(SwsInternal *c)
|
|
{
|
|
int cpu_flags = av_get_cpu_flags();
|
|
enum AVPixelFormat dstFormat = c->opts.dst_format;
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
|
|
|
|
if (have_neon(cpu_flags)) {
|
|
ASSIGN_SCALE_FUNC(c->hyScale, c->hLumFilterSize, neon);
|
|
ASSIGN_SCALE_FUNC(c->hcScale, c->hChrFilterSize, neon);
|
|
ASSIGN_VSCALE_FUNC(c->yuv2plane1, neon);
|
|
if (c->dstBpc == 8) {
|
|
c->yuv2planeX = ff_yuv2planeX_8_neon;
|
|
if (isSemiPlanarYUV(dstFormat) && !isDataInHighBits(dstFormat))
|
|
c->yuv2nv12cX = ff_yuv2nv12cX_neon;
|
|
}
|
|
|
|
if (isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) && !isDataInHighBits(dstFormat)) {
|
|
if (desc->comp[0].depth == 9) {
|
|
c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_neon : yuv2planeX_9LE_neon;
|
|
} else if (desc->comp[0].depth == 10) {
|
|
c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_neon : yuv2planeX_10LE_neon;
|
|
} else if (desc->comp[0].depth == 12) {
|
|
c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_neon : yuv2planeX_12LE_neon;
|
|
} else if (desc->comp[0].depth == 14) {
|
|
c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_neon : yuv2planeX_14LE_neon;
|
|
} else
|
|
av_assert0(0);
|
|
}
|
|
switch (c->opts.src_format) {
|
|
case AV_PIX_FMT_ABGR:
|
|
c->lumToYV12 = ff_abgr32ToY_neon;
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_abgr32ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_abgr32ToUV_neon;
|
|
break;
|
|
|
|
case AV_PIX_FMT_ARGB:
|
|
c->lumToYV12 = ff_argb32ToY_neon;
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_argb32ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_argb32ToUV_neon;
|
|
break;
|
|
case AV_PIX_FMT_BGR24:
|
|
c->lumToYV12 = ff_bgr24ToY_neon;
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_bgr24ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_bgr24ToUV_neon;
|
|
break;
|
|
case AV_PIX_FMT_BGRA:
|
|
c->lumToYV12 = ff_bgra32ToY_neon;
|
|
#if HAVE_DOTPROD
|
|
if (have_dotprod(cpu_flags)) {
|
|
c->lumToYV12 = ff_bgra32ToY_neon_dotprod;
|
|
}
|
|
#endif
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_bgra32ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_bgra32ToUV_neon;
|
|
break;
|
|
case AV_PIX_FMT_RGB24:
|
|
c->lumToYV12 = ff_rgb24ToY_neon;
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_rgb24ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_rgb24ToUV_neon;
|
|
break;
|
|
case AV_PIX_FMT_RGBA:
|
|
c->lumToYV12 = ff_rgba32ToY_neon;
|
|
#if HAVE_DOTPROD
|
|
if (have_dotprod(cpu_flags)) {
|
|
c->lumToYV12 = ff_rgba32ToY_neon_dotprod;
|
|
}
|
|
#endif
|
|
if (c->chrSrcHSubSample)
|
|
c->chrToYV12 = ff_rgba32ToUV_half_neon;
|
|
else
|
|
c->chrToYV12 = ff_rgba32ToUV_neon;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|