mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-12-05 14:30:00 +01:00
Add optimized Neon code path for the little endian case of the xyz12Torgb48 function. The innermost loop processes the data in 4x2 pixel blocks using software gathers with the matrix multiplication and clipping done by Neon. Relative runtime of micro benchmarks after this patch on some Cortex and Neoverse CPU cores: xyz12le_rgb48le X1 X3 X4 X925 V2 16x4_neon: 2.55x 4.34x 3.84x 3.31x 3.22x 32x4_neon: 2.39x 3.63x 3.22x 3.35x 3.29x 64x4_neon: 2.37x 3.31x 2.91x 3.33x 3.27x 128x4_neon: 2.34x 3.28x 2.91x 3.35x 3.24x 256x4_neon: 2.30x 3.17x 2.91x 3.32x 3.10x 512x4_neon: 2.26x 3.10x 2.91x 3.30x 3.07x 1024x4_neon: 2.26x 3.07x 2.96x 3.30x 3.05x 1920x4_neon: 2.26x 3.06x 2.93x 3.28x 3.04x xyz12le_rgb48le A76 A78 A715 A720 A725 16x4_neon: 2.33x 2.28x 2.53x 3.33x 3.19x 32x4_neon: 2.35x 2.18x 2.45x 3.23x 3.24x 64x4_neon: 2.35x 2.16x 2.42x 3.15x 3.21x 128x4_neon: 2.35x 2.13x 2.39x 3.00x 3.09x 256x4_neon: 2.36x 2.12x 2.35x 2.85x 2.99x 512x4_neon: 2.35x 2.14x 2.35x 2.78x 2.95x 1024x4_neon: 2.31x 2.09x 2.33x 2.80x 2.91x 1920x4_neon: 2.30x 2.07x 2.32x 2.81x 2.94x xyz12le_rgb48le A55 A510 A520 16x4_neon: 2.09x 1.92x 2.36x 32x4_neon: 2.05x 1.89x 2.38x 64x4_neon: 2.02x 1.77x 2.35x 128x4_neon: 1.96x 1.74x 2.25x 256x4_neon: 1.90x 1.72x 2.19x 512x4_neon: 1.83x 1.75x 2.16x 1024x4_neon: 1.83x 1.62x 2.15x 1920x4_neon: 1.82x 1.60x 2.15x Signed-off-by: Arpad Panyik <Arpad.Panyik@arm.com>
37 lines
1.2 KiB
C
37 lines
1.2 KiB
C
/*
|
|
* Copyright (c) 2025 Arpad Panyik <Arpad.Panyik@arm.com>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef SWSCALE_AARCH64_ASM_OFFSETS_H
|
|
#define SWSCALE_AARCH64_ASM_OFFSETS_H
|
|
|
|
/* SwsLuts */
|
|
#define SL_IN 0x00
|
|
#define SL_OUT 0x08
|
|
|
|
/* SwsColorXform */
|
|
#define SCX_GAMMA 0x00
|
|
#define SCX_MAT 0x10
|
|
#define SCX_GAMMA_IN (SCX_GAMMA + SL_IN)
|
|
#define SCX_GAMMA_OUT (SCX_GAMMA + SL_OUT)
|
|
#define SCX_MAT_00 SCX_MAT
|
|
#define SCX_MAT_22 (SCX_MAT + 8 * 2)
|
|
|
|
#endif /* SWSCALE_AARCH64_ASM_OFFSETS_H */
|