mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-12-11 17:30:00 +01:00
avfilter/vf_fsppdsp: Use restrict
It is possible because the requirements are fulfilled; it is also beneficial performance and code-size wise. For GCC 14 (with -O3), this reduced codesize by 26750B here; for Clang 20, it was 432B. Old benchmarks: mul_thrmat_c: 4.3 ( 1.00x) mul_thrmat_sse2: 4.3 ( 1.00x) store_slice_c: 2810.8 ( 1.00x) store_slice_sse2: 542.5 ( 5.18x) store_slice2_c: 3817.0 ( 1.00x) store_slice2_sse2: 410.4 ( 9.30x) New benchmarks: mul_thrmat_c: 4.3 ( 1.00x) mul_thrmat_sse2: 4.3 ( 1.00x) store_slice_c: 1510.1 ( 1.00x) store_slice_sse2: 545.2 ( 2.77x) store_slice2_c: 1763.5 ( 1.00x) store_slice2_sse2: 408.3 ( 4.32x) Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -64,7 +64,7 @@ DECLARE_ALIGNED(8, const uint8_t, ff_fspp_dither)[8][8] = {
|
||||
};
|
||||
|
||||
//This func reads from 1 slice, 1 and clears 0 & 1
|
||||
void ff_store_slice_c(uint8_t *dst, int16_t *src,
|
||||
void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
|
||||
{
|
||||
@@ -93,7 +93,7 @@ void ff_store_slice_c(uint8_t *dst, int16_t *src,
|
||||
}
|
||||
|
||||
//This func reads from 2 slices, 0 & 2 and clears 2-nd
|
||||
void ff_store_slice2_c(uint8_t *dst, int16_t *src,
|
||||
void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
|
||||
{
|
||||
@@ -121,13 +121,14 @@ void ff_store_slice2_c(uint8_t *dst, int16_t *src,
|
||||
}
|
||||
}
|
||||
|
||||
void ff_mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
|
||||
void ff_mul_thrmat_c(int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q)
|
||||
{
|
||||
for (int a = 0; a < 64; a++)
|
||||
thr_adr[a] = q * thr_adr_noq[a];
|
||||
}
|
||||
|
||||
void ff_column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
|
||||
void ff_column_fidct_c(int16_t *restrict thr_adr, int16_t *restrict data,
|
||||
int16_t *restrict output, int cnt)
|
||||
{
|
||||
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int_simd16_t tmp10, tmp11, tmp12, tmp13;
|
||||
@@ -249,7 +250,8 @@ void ff_column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt
|
||||
}
|
||||
}
|
||||
|
||||
void ff_row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
|
||||
void ff_row_idct_c(int16_t *restrict workspace, int16_t *restrict output_adr,
|
||||
ptrdiff_t output_stride, int cnt)
|
||||
{
|
||||
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int_simd16_t tmp10, tmp11, tmp12, tmp13;
|
||||
@@ -311,7 +313,8 @@ void ff_row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_str
|
||||
}
|
||||
}
|
||||
|
||||
void ff_row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
|
||||
void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels,
|
||||
ptrdiff_t line_size, int cnt)
|
||||
{
|
||||
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int_simd16_t tmp10, tmp11, tmp12, tmp13;
|
||||
|
||||
@@ -31,40 +31,43 @@
|
||||
#include "libavutil/attributes_internal.h"
|
||||
|
||||
typedef struct FSPPDSPContext {
|
||||
void (*store_slice)(uint8_t *dst, int16_t *src /* align 16 */,
|
||||
void (*store_slice)(uint8_t *restrict dst, int16_t *restrict src /* align 16 */,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
|
||||
|
||||
void (*store_slice2)(uint8_t *dst, int16_t *src /* align 16 */,
|
||||
void (*store_slice2)(uint8_t *restrict dst, int16_t *restrict src /* align 16 */,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
|
||||
|
||||
void (*mul_thrmat)(int16_t *thr_adr_noq /* align 16 */,
|
||||
int16_t *thr_adr /* align 16 */, int q);
|
||||
void (*mul_thrmat)(int16_t *restrict thr_adr_noq /* align 16 */,
|
||||
int16_t *restrict thr_adr /* align 16 */, int q);
|
||||
|
||||
void (*column_fidct)(int16_t *thr_adr, int16_t *data,
|
||||
int16_t *output, int cnt);
|
||||
void (*column_fidct)(int16_t *restrict thr_adr, int16_t *data,
|
||||
int16_t *restrict output, int cnt);
|
||||
|
||||
void (*row_idct)(int16_t *workspace, int16_t *output_adr,
|
||||
void (*row_idct)(int16_t *restrict workspace, int16_t *restrict output_adr,
|
||||
ptrdiff_t output_stride, int cnt);
|
||||
|
||||
void (*row_fdct)(int16_t *data, const uint8_t *pixels,
|
||||
void (*row_fdct)(int16_t *restrict data, const uint8_t *restrict pixels,
|
||||
ptrdiff_t line_size, int cnt);
|
||||
} FSPPDSPContext;
|
||||
|
||||
FF_VISIBILITY_PUSH_HIDDEN
|
||||
extern const uint8_t ff_fspp_dither[8][8];
|
||||
|
||||
void ff_store_slice_c(uint8_t *dst, int16_t *src,
|
||||
void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
|
||||
void ff_store_slice2_c(uint8_t *dst, int16_t *src,
|
||||
void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src,
|
||||
ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
|
||||
void ff_mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q);
|
||||
void ff_column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt);
|
||||
void ff_row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt);
|
||||
void ff_row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt);
|
||||
void ff_mul_thrmat_c(int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q);
|
||||
void ff_column_fidct_c(int16_t *restrict thr_adr, int16_t *restrict data,
|
||||
int16_t *restrict output, int cnt);
|
||||
void ff_row_idct_c(int16_t *restrict workspace, int16_t *restrict output_adr,
|
||||
ptrdiff_t output_stride, int cnt);
|
||||
void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels,
|
||||
ptrdiff_t line_size, int cnt);
|
||||
|
||||
void ff_fsppdsp_init_x86(FSPPDSPContext *fspp);
|
||||
FF_VISIBILITY_POP_HIDDEN
|
||||
|
||||
Reference in New Issue
Block a user