mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-12-16 03:50:05 +01:00
These functions are currently always called with height either
being equal to the block size or block size+1. height is
a compile-time constant at every callsite. This makes it possible
to split this function into two to avoid the check inside
the function for whether height is odd or even.
The corresponding avg function is only used with height == block size,
so that it does not have a height parameter at all. Removing the
parameter from the put_l2 functions as well therefore simplifies
the C code.
The new functions increase the size of .text from qpel{dsp}.o
by 32B here, yet they save 464B of C code here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
535 lines
36 KiB
C
535 lines
36 KiB
C
/*
|
|
* quarterpel DSP functions
|
|
* Copyright (c) 2000, 2001 Fabrice Bellard
|
|
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#include "config.h"
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/cpu.h"
|
|
#include "libavutil/x86/cpu.h"
|
|
#include "libavcodec/qpeldsp.h"
|
|
#include "fpel.h"
|
|
#include "qpel.h"
|
|
|
|
void ff_put_pixels8x9_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_pixels16x17_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_no_rnd_pixels8x9_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_no_rnd_pixels16x16_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_no_rnd_pixels16x17_l2_mmxext(uint8_t *dst,
|
|
const uint8_t *src1, const uint8_t *src2,
|
|
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
|
void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
|
|
void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
|
|
void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
|
|
const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride,
|
|
int h);
|
|
void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
|
|
void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
|
|
void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
|
|
const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride,
|
|
int h);
|
|
void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
|
|
const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
|
|
const uint8_t *src,
|
|
ptrdiff_t dstStride, ptrdiff_t srcStride);
|
|
|
|
#if HAVE_X86ASM
|
|
|
|
#define QPEL_OP(OPNAME, RND, MMX) \
|
|
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[8]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
|
|
stride, 8); \
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[8]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
|
|
stride, 8); \
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + 1, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[8]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
|
|
8, stride); \
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[8]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
|
|
8, stride); \
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + stride, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 64; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
|
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, \
|
|
8, stride); \
|
|
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[8 + 9]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
|
stride); \
|
|
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[9]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
|
stride, 9); \
|
|
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 8); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[32]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
|
|
stride, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
|
|
stride, stride, 16);\
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[32]; \
|
|
uint8_t *const half = (uint8_t*) temp; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
|
|
stride, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src + 1, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[32]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
|
|
stride); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t temp[32]; \
|
|
uint8_t *const half = (uint8_t *) temp; \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
|
|
stride); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src+stride, half, \
|
|
stride, stride); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
|
stride); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[16 * 2 + 17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half + 256; \
|
|
uint8_t *const halfHV = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
|
16, 16); \
|
|
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
|
stride); \
|
|
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
|
stride); \
|
|
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 16); \
|
|
} \
|
|
\
|
|
static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
|
|
const uint8_t *src, \
|
|
ptrdiff_t stride) \
|
|
{ \
|
|
uint64_t half[17 * 2]; \
|
|
uint8_t *const halfH = (uint8_t *) half; \
|
|
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
|
stride, 17); \
|
|
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
|
|
stride, 16); \
|
|
}
|
|
|
|
QPEL_OP(put_, _, mmxext)
|
|
QPEL_OP(avg_, _, mmxext)
|
|
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
|
|
|
|
#endif /* HAVE_X86ASM */
|
|
|
|
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
|
|
do { \
|
|
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
|
|
c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
|
|
} while (0)
|
|
|
|
av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
|
|
{
|
|
int cpu_flags = av_get_cpu_flags();
|
|
|
|
if (X86_MMXEXT(cpu_flags)) {
|
|
#if HAVE_MMXEXT_EXTERNAL
|
|
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
|
c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
|
|
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
|
|
|
|
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
|
|
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
|
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
|
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
|
#endif /* HAVE_MMXEXT_EXTERNAL */
|
|
}
|
|
#if HAVE_SSE2_EXTERNAL
|
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
|
c->put_no_rnd_qpel_pixels_tab[0][0] =
|
|
c->put_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
|
|
c->put_no_rnd_qpel_pixels_tab[1][0] =
|
|
c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_sse2;
|
|
c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
|
|
}
|
|
#endif
|
|
}
|