Files
veejay/veejay-current/veejay-server/libyuv/subsample.c
niels e5ac37a5b8 fix reloaded build against newer ffmpeg, add large file support (avilib still limited to 4gb), workarround bug in lzo compressor,
refactored all memcpy into (multithreaded) frame copy, refactored all memset into (multirheaded) frame clear, multithreaded super and sub sampling (only tested 4:2:2 ), added new mode to pencil sketch, fixed bug in lens correction, multithreaded 36 effects, updated minilzo to newer version, multhreaded yuv functions for format conversion and pixel scaling, multithreaded fx chain fader, multithreaded fx apply in libvje. Fix bug in encoder when recording in high resolution,
2012-11-25 08:29:07 +01:00

991 lines
25 KiB
C

/*
* subsample.c: Routines to do chroma subsampling. ("Work In Progress")
*
*
* Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
* 2004 Niels Elburg <nwelburg@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <config.h>
#ifdef HAVE_ASM_MMX
#include "mmx.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mjpegtools/mjpeg_types.h>
#include <libvjmem/vjmem.h>
#include <libvjmsg/vj-msg.h>
#include <libvje/vje.h>
#include <libyuv/yuvconv.h>
#include <veejay/vj-task.h>
const char *ssm_id[SSM_COUNT] = {
"unknown",
"420_jpeg",
"420_mpeg2",
#if 0
"420_dv_pal",
"411_dv_ntsc"
#endif
};
const char *ssm_description[SSM_COUNT] = {
"unknown/illegal",
"4:2:0, JPEG/MPEG-1, interstitial siting",
"4:2:0, MPEG-2, horizontal cositing",
#if 0
"4:2:0, DV-PAL, cosited, Cb/Cr line alternating",
"4:1:1, DV-NTSC"
"4:2:2",
#endif
};
#define RUP8(num)(((num)+8)&~8)
// forward decl
void ss_420_to_422(uint8_t *buffer, int width, int height);
void ss_422_to_420(uint8_t *buffer, int width, int height);
/*************************************************************************
* Chroma Subsampling
*************************************************************************/
/* vertical/horizontal interstitial siting
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* Y Y Y Y
* C C
* Y Y Y Y
*
*/
/*
static void ss_444_to_420jpeg(uint8_t *buffer, int width, int height)
{
uint8_t *in0, *in1, *out;
int x, y;
in0 = buffer;
in1 = buffer + width;
out = buffer;
for (y = 0; y < height; y += 2) {
for (x = 0; x < width; x += 2) {
*out = (in0[0] + in0[1] + in1[0] + in1[1]) >> 2;
in0 += 2;
in1 += 2;
out++;
}
in0 += width;
in1 += width;
}
}
*/
/*
using weighted averaging for subsampling 2x2 -> 1x1
here, 4 pixels are filled in each inner loop, (weighting
16 source pixels)
*/
static void ss_444_to_420jpeg(uint8_t *buffer, int width, int height)
{
const uint8_t *in0, *in1;
uint8_t *out;
int x, y = height;
in0 = buffer;
in1 = buffer + width;
out = buffer;
for (y = 0; y < height; y += 4) {
for (x = 0; x < width; x += 4) {
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
out[1] = (in0[2] + 3 * (in0[3] + in1[2]) + (9 * in1[3]) + 8) >> 4;
out[2] = (in0[4] + 3 * (in0[5] + in1[4]) + (9 * in1[5]) + 8) >> 4;
out[3] = (in0[6] + 3 * (in0[7] + in1[6]) + (9 * in1[7]) + 8) >> 4;
in0 += 8;
in1 += 8;
out += 4;
}
for ( ; x < width; x +=2 )
{
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
in0 += 2;
in1 += 2;
out++;
}
in0 += width*2;
in1 += width*2;
}
}
static void ss_444_to_420jpeg_cp(uint8_t *buffer,uint8_t *dest, int width, int height)
{
const uint8_t *in0, *in1;
uint8_t *out;
int x, y = height;
in0 = buffer;
in1 = buffer + width;
out = dest;
for (y = 0; y < height; y += 4) {
for (x = 0; x < width; x += 4) {
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
out[1] = (in0[2] + 3 * (in0[3] + in1[2]) + (9 * in1[3]) + 8) >> 4;
out[2] = (in0[4] + 3 * (in0[5] + in1[4]) + (9 * in1[5]) + 8) >> 4;
out[3] = (in0[6] + 3 * (in0[7] + in1[6]) + (9 * in1[7]) + 8) >> 4;
in0 += 8;
in1 += 8;
out += 4;
}
for ( ; x < width; x +=2 )
{
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
in0 += 2;
in1 += 2;
out++;
}
in0 += width*2;
in1 += width*2;
}
}
/* horizontal interstitial siting
*
* Y Y Y Y
* C C C C in0
* Y Y Y Y
* C C C C
*
* Y Y Y Y
* C C out0
* Y Y Y Y
* C C
*
*
*/
/* vertical/horizontal interstitial siting
*
* Y Y Y Y
* C C C inm
* Y Y Y Y
*
* Y Y Y - Y out0
* C | C | C in0
* Y Y Y - Y out1
*
*
* C C C inp
*
*
* Each iteration through the loop reconstitutes one 2x2 block of
* pixels from the "surrounding" 3x3 block of samples...
* Boundary conditions are handled by cheap reflection; i.e. the
* center sample is simply reused.
*
*/
#define BLANK_CRB in0[1]
#define BLANK_CRB_2 (in0[1] << 1)
static void tr_420jpeg_to_444(uint8_t *data, uint8_t *buffer, int width, int height)
{
uint8_t *inm, *in0, *inp, *out0, *out1;
uint8_t cmm, cm0, cmp, c0m, c00, c0p, cpm, cp0, cpp;
int x, y;
uint8_t *saveme = data;
veejay_memcpy(saveme, buffer, width);
in0 = buffer + ( width * height /4) - 2;
inm = in0 - width/2;
inp = in0 + width/2;
out1 = buffer + (width * height) - 1;
out0 = out1 - width;
for (y = height; y > 0; y -= 2) {
if (y == 2) {
in0 = saveme + width/2 - 2;
inp = in0 + width/2;
}
for (x = width; x > 0; x -= 2) {
#if 0
if ((x == 2) && (y == 2)) {
cmm = in0[1];
cm0 = in0[1];
cmp = in0[2];
c0m = in0[1];
c0p = in0[2];
cpm = inp[1];
cp0 = inp[1];
cpp = inp[2];
} else if ((x == 2) && (y == height)) {
cmm = inm[1];
cm0 = inm[1];
cmp = inm[2];
c0m = in0[1];
c0p = in0[2];
cpm = in0[1];
cp0 = in0[1];
cpp = in0[2];
} else if ((x == width) && (y == height)) {
cmm = inm[0];
cm0 = inm[1];
cmp = inm[1];
c0m = in0[0];
c0p = in0[1];
cpm = in0[0];
cp0 = in0[1];
cpp = in0[1];
} else if ((x == width) && (y == 2)) {
cmm = in0[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (x == 2) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (y == 2) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (x == width) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (y == height) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
}
c00 = in0[1];
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
#else
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c00 = in0[1];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
#endif
inm--;
in0--;
inp--;
*(out1--) = (1*cpp + 3*(cp0+c0p) + 9*c00 + 8) >> 4;
*(out1--) = (1*cpm + 3*(cp0+c0m) + 9*c00 + 8) >> 4;
*(out0--) = (1*cmp + 3*(cm0+c0p) + 9*c00 + 8) >> 4;
*(out0--) = (1*cmm + 3*(cm0+c0m) + 9*c00 + 8) >> 4;
}
out1 -= width;
out0 -= width;
}
}
// lame box filter
// the dampening of high frequencies depend
// on the directions these frequencies occur in the
// image, resulting in clear edges between certain
// group of pixels.
static void ss_420jpeg_to_444(uint8_t *buffer, int width, int height)
{
#ifndef HAVE_ASM_MMX
uint8_t *in, *out0, *out1;
int x, y;
in = buffer + (width * height / 4) - 1;
out1 = buffer + (width * height) - 1;
out0 = out1 - width;
for (y = height - 1; y >= 0; y -= 2) {
for (x = width - 1; x >= 0; x -=2) {
uint8_t val = *(in--);
*(out1--) = val;
*(out1--) = val;
*(out0--) = val;
*(out0--) = val;
}
out0 -= width;
out1 -= width;
}
#else
int x,y;
const int mmx_stride = width >> 3;
uint8_t *src = buffer + ((width * height) >> 2)-1;
uint8_t *dst = buffer + (width * height) -1;
uint8_t *dst2 = dst - width;
for( y = height-1; y >= 0; y -= 2)
{
for( x = 0; x < mmx_stride; x ++ )
{
movq_m2r( *src,mm0 );
movq_m2r( *src,mm1 );
movq_r2m(mm0, *dst );
movq_r2m(mm1, *(dst+8) );
movq_r2m(mm0, *dst2 );
movq_r2m(mm1, *(dst2+8) );
dst += 16;
dst2 += 16;
src += 8;
}
dst -= width;
dst2 -= width;
}
#endif
}
void ss_420_to_422(uint8_t *buffer, int width, int height)
{
//todo, 1x2 super sampling (box)
}
void ss_422_to_420(uint8_t *buffer, int width, int height )
{
//todo 2x1 down sampling (box)
}
#ifdef HAVE_ASM_MMX
#undef HAVE_K6_2PLUS
#if !defined( HAVE_ASM_MMX2) && defined( HAVE_ASM_3DNOW )
#define HAVE_K6_2PLUS
#endif
#undef _EMMS
#ifdef HAVE_K6_2PLUS
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define _EMMS "femms"
#else
#define _EMMS "emms"
#endif
#endif
#ifdef HAVE_ASM_MMX
/* for small memory blocks (<256 bytes) this version is faster */
#define small_memcpy(to,from,n)\
{\
register unsigned long int dummy;\
__asm__ __volatile__(\
"rep; movsb"\
:"=&D"(to), "=&S"(from), "=&c"(dummy)\
:"0" (to), "1" (from),"2" (n)\
: "memory");\
}
static inline void copy8( uint8_t *dst, uint8_t *in )
{
__asm__ __volatile__ (
"movq (%0), %%mm0\n"
"movq %%mm0, (%1)\n"
:: "r" (in), "r" (dst) : "memory" );
}
static inline void copy16( uint8_t *dst, uint8_t *in)
{
__asm__ __volatile__ (
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq %%mm0, (%1)\n"
"movq %%mm1, 8(%1)\n"
:: "r" (in), "r" (dst) : "memory" );
}
static inline void copy_width( uint8_t *dst, uint8_t *in, int width )
{
int w = width >> 4;
int x;
uint8_t *d = dst;
uint8_t *i = in;
for( x = 0; x < w; x ++ )
{
copy16( d, i );
d += 16;
i += 16;
}
x = (width % 16);
if( x )
small_memcpy( d, i, x);
}
static inline void load_mask16to8()
{
const uint64_t mask = 0x00ff00ff00ff00ffLL;
const uint8_t *m = (uint8_t*)&mask;
__asm __volatile(
"movq (%0), %%mm4\n\t"
:: "r" (m)
);
}
static inline void down_sample16to8( uint8_t *out, uint8_t *in )
{
//@ down sample by dropping right pixels
__asm __volatile(
"movq (%0), %%mm1\n\t"
"movq 8(%0),%%mm3\n\t"
"pxor %%mm5,%%mm5\n\t"
"pand %%mm4,%%mm1\n\t"
"pand %%mm4,%%mm3\n\t"
"packuswb %%mm1,%%mm2\n\t"
"packuswb %%mm3,%%mm5\n\t"
"psrlq $32, %%mm2\n\t"
"por %%mm5,%%mm2\n\t"
"movq %%mm2, (%1)\n\t"
:: "r" (in), "r" (out)
);
}
#endif
static void ss_444_to_422_cp(uint8_t *data, uint8_t *buffer, uint8_t *dest, int width, int height)
{
const int dst_stride = width >> 1;
int x,y;
#ifdef HAVE_ASM_MMX
int mmxdst_stride=dst_stride >> 3;
int left = dst_stride % 8;
#endif
uint8_t *src = (uint8_t*) data;
uint8_t *dst;
#ifdef HAVE_ASM_MMX
load_mask16to8();
#endif
for(y = 0; y < height; y ++)
{
src = buffer + (y*width);
dst = dest + (y*dst_stride);
#if defined (HAVE_ASM_MMX) || defined (HAVE_ASM_MMX2)
copy_width( src, buffer + (y*width), width );
for( x= 0; x < mmxdst_stride; x++ )
{
down_sample16to8( dst, src );
src += 16;
dst += 8;
}
for(x=0; x < left; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#else
for(x=0; x < dst_stride; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#endif
}
}
//@ data = input, buffer = output
static void ss_444_to_422(uint8_t *data, uint8_t *buffer, int width, int height)
{
const int dst_stride = width >> 1;
int x,y;
#ifdef HAVE_ASM_MMX
int mmxdst_stride=dst_stride >> 3;
int left = dst_stride % 8;
#endif
uint8_t *src = (uint8_t*) data;
// uint8_t *src = buffer;
uint8_t *dst;
#ifdef HAVE_ASM_MMX
load_mask16to8();
#endif
for(y = 0; y < height; y ++)
{
src = (uint8_t*) data;
dst = buffer + (y*dst_stride);
#if defined (HAVE_ASM_MMX) || defined (HAVE_ASM_MMX2)
copy_width( src, buffer + (y*width), width );
for( x= 0; x < mmxdst_stride; x++ )
{
down_sample16to8( dst, src );
src += 16;
dst += 8;
}
for(x=0; x < left; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#else
for( x = 0; x < dst_stride; x ++ )
{
*(dst++) = (src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#endif
}
}
#ifdef HAVE_ASM_MMX
static inline void super_sample8to16( uint8_t *in, uint8_t *out )
{
//@ super sample by duplicating pixels
__asm__ __volatile__ (
"\n\tpxor %%mm2,%%mm2"
"\n\tpxor %%mm4,%%mm4"
"\n\tmovq (%0), %%mm1"
"\n\tpunpcklbw %%mm1,%%mm2"
"\n\tpunpckhbw %%mm1,%%mm4"
"\n\tmovq %%mm2,%%mm5"
"\n\tmovq %%mm4,%%mm6"
"\n\tpsrlq $8, %%mm5"
"\n\tpsrlq $8, %%mm6"
"\n\tpor %%mm5,%%mm2"
"\n\tpor %%mm6,%%mm4"
"\n\tmovq %%mm2, (%1)"
"\n\tmovq %%mm4, 8(%1)"
:: "r" (in), "r" (out)
);
}
#endif
static void tr_422_to_444(uint8_t *data, uint8_t *buffer, int width, int height)
{
int x,y;
const int stride = width >> 1;
veejay_msg(0,"%s: w=%d,h=%d, buffer=%p, temp=%p",__FUNCTION__, width,height, buffer,data );
#ifndef HAVE_ASM_MMX
for( y = height-1; y > 0 ; y -- )
{
uint8_t *dst = buffer + (y * width);
uint8_t *src = buffer + (y * stride);
for(x=0; x < stride; x++) // for 1 row
{
dst[0] = src[x]; //put to dst
dst[1] = src[x];
dst+=2; // increment dst
}
}
#else
const int mmx_stride = stride >> 3;
int left = (mmx_stride % 8)-1;
if( left < 0 ) left = 0;
for( y = height-1; y > 0 ; y -- )
{
uint8_t *src = buffer + (y * stride);
uint8_t *dst = buffer + (y * width);
for(x=0; x < mmx_stride; x++) // for 1 row
{
super_sample8to16(src,dst );
src += 8;
dst += 16;
}
/* for(x=0; x < left; x++) // for 1 row
{
dst[0] = src[x]; //put to dst
dst[1] = src[x];
dst+=2; // increment dst
}*/
}
#endif
}
static void tr_422_to_444t(uint8_t *dst, uint8_t *src, int width, int height)
{
int x,y;
const int stride = width >> 1;
#ifndef HAVE_ASM_MMX
for( y = height; y > 0 ; y -- )
{
uint8_t *d = dst + (y * width);
uint8_t *s = src + (y * stride);
for(x=0; x < stride; x++) // for 1 row
{
d[0] = s[x]; //put to dst
d[1] = s[x];
d+=2; // increment dst
}
}
#else
const int mmx_stride = stride >> 3;
int left = (mmx_stride % 8)-1;
if( left < 0 ) left = 0;
for( y = height; y > 0 ; y -- )
{
uint8_t *s = src + (y * stride);
uint8_t *d = dst + (y * width);
for(x=0; x < mmx_stride; x++) // for 1 row
{
super_sample8to16(s,d);
s += 8;
d += 16;
}
for(x=0; x < left; x++)
{
d[0] = src[x]; //put to dst
d[1] = src[x];
dst+=2; // increment dst
}
}
#endif
}
/* vertical intersitial siting; horizontal cositing
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* [1,2,1] kernel for horizontal subsampling:
*
* inX[0] [1] [2]
* | | |
* C C C C
* \ | /
* \ | /
* C
*/
static void ss_444_to_420mpeg2(uint8_t *buffer, int width, int height)
{
uint8_t *in0, *in1, *out;
int x, y;
in0 = buffer; /* points to */
in1 = buffer + width; /* second of pair of lines */
out = buffer;
for (y = 0; y < height; y += 2) {
/* first column boundary condition -- just repeat it to right */
*out = (in0[0] + (2 * in0[0]) + in0[1] +
in1[0] + (2 * in1[0]) + in1[1]) >> 3;
out++;
in0++;
in1++;
/* rest of columns just loop */
for (x = 2; x < width; x += 2) {
*out = (in0[0] + (2 * in0[1]) + in0[2] +
in1[0] + (2 * in1[1]) + in1[2]) >> 3;
in0 += 2;
in1 += 2;
out++;
}
in0 += width + 1;
in1 += width + 1;
}
}
static void chroma_subsample_task( void *ptr )
{
vj_task_arg_t *f = (vj_task_arg_t*) ptr;
switch (f->iparam) {
case SSM_420_JPEG_BOX:
case SSM_420_JPEG_TR:
ss_444_to_420jpeg(f->input[1], f->width, f->subhei);
ss_444_to_420jpeg(f->input[2], f->width, f->subhei);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_MPEG2:
ss_444_to_420mpeg2(f->input[1], f->width, f->subhei);
ss_444_to_420mpeg2(f->input[2], f->width, f->subhei);
break;
case SSM_422_444:
//@ src, dst
ss_444_to_422_cp(f->temp[1],f->output[1],f->input[1],f->width,f->subhei);
ss_444_to_422_cp(f->temp[1],f->output[2],f->input[2],f->width,f->subhei);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_422_to_420(f->input[1],f->width,f->subhei);
ss_422_to_420(f->input[2],f->width,f->subhei);
break;
default:
break;
}
}
static void chroma_supersample_task( void *ptr )
{
vj_task_arg_t *f = (vj_task_arg_t*) ptr;
switch (f->iparam) {
case SSM_420_JPEG_BOX:
ss_420jpeg_to_444(f->input[1], f->width, f->subhei);
ss_420jpeg_to_444(f->input[2], f->width, f->subhei);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_JPEG_TR:
tr_420jpeg_to_444(f->priv,f->input[1], f->width, f->subhei);
tr_420jpeg_to_444(f->priv,f->input[2], f->width, f->subhei);
break;
case SSM_422_444:
tr_422_to_444t(f->input[1],f->output[1],f->width,f->subhei);
tr_422_to_444t(f->input[2],f->output[2],f->width,f->subhei);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_420_to_422( f->input[1], f->width, f->subhei );
ss_420_to_422( f->input[2], f->width, f->subhei );
break;
default:
break;
}
}
void chroma_subsample_cp(subsample_mode_t mode,VJFrame *frame, uint8_t *ycbcr[], uint8_t *dcbcr[])
{
if( vj_task_available() ) {
void *data = vj_task_alloc_internal_buf( frame->len * 2 + (frame->width*2) );
uint8_t *plane = (uint8_t*) data;
uint8_t *vplane = plane + frame->len;
uint8_t *buffer = vplane + frame->len;
uint8_t *planes[3] = { NULL, plane,vplane };
uint8_t *temp[3] = { NULL, buffer, NULL };
int strides[4] = { 0, frame->len, frame->len, 0 };
vj_frame_copy( ycbcr, planes,strides );
vj_task_set_from_frame( frame );
vj_task_set_int( mode );
vj_task_set_sampling( 1 );
vj_task_run( ycbcr, planes, temp, NULL, 3, (performer_job_routine ) &chroma_subsample_task );
vj_task_free_internal_buf();
vj_task_set_sampling(0);
return;
}
uint8_t *data = (uint8_t*) vj_malloc(sizeof(uint8_t) * frame->width * 2 );
switch (mode) {
case SSM_420_JPEG_BOX:
case SSM_420_JPEG_TR:
ss_444_to_420jpeg_cp(ycbcr[1],dcbcr[1], frame->width, frame->height);
ss_444_to_420jpeg_cp(ycbcr[2],dcbcr[2], frame->width, frame->height);
break;
case SSM_420_MPEG2:
break;
case SSM_422_444:
// ss_444_to_422_cp(data,ycbcr[1],dcbcr[1],frame->width,frame->height);
// ss_444_to_422_cp(data,ycbcr[2],dcbcr[2],frame->width,frame->height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
default:
break;
}
free(data);
}
void chroma_subsample(subsample_mode_t mode, VJFrame *frame, uint8_t *ycbcr[] )
{
if( vj_task_available() ) {
void *data = vj_task_alloc_internal_buf( frame->len * 2 + (frame->width*2) );
uint8_t *plane = (uint8_t*) data;
uint8_t *vplane = plane + frame->len;
uint8_t *buffer = vplane + frame->len;
uint8_t *planes[3] = { NULL, plane,vplane };
uint8_t *temp[3] = { NULL, buffer, NULL };
int strides[4] = { 0, frame->len, frame->len, 0 };
vj_frame_copy( ycbcr, planes,strides );
vj_task_set_from_frame( frame );
vj_task_set_int( mode );
vj_task_set_sampling( 1 );
vj_task_run( ycbcr, planes, temp, NULL, 3, (performer_job_routine ) &chroma_subsample_task );
vj_task_free_internal_buf();
vj_task_set_sampling(0);
return;
}
uint8_t *data = (uint8_t*) vj_malloc( sizeof(uint8_t) * frame->width * 2 );
switch (mode) {
case SSM_420_JPEG_BOX:
case SSM_420_JPEG_TR:
ss_444_to_420jpeg(ycbcr[1], frame->width, frame->height);
ss_444_to_420jpeg(ycbcr[2], frame->width, frame->height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_MPEG2:
ss_444_to_420mpeg2(ycbcr[1], frame->width, frame->height);
ss_444_to_420mpeg2(ycbcr[2], frame->width, frame->height);
break;
case SSM_422_444:
ss_444_to_422(data,ycbcr[1],frame->width,frame->height);
ss_444_to_422(data,ycbcr[2],frame->width,frame->height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_422_to_420(ycbcr[1],frame->width,frame->height);
ss_422_to_420(ycbcr[2],frame->width,frame->height);
break;
default:
break;
}
free(data);
}
void chroma_supersample(subsample_mode_t mode,VJFrame *frame, uint8_t *ycbcr[] )
{
if( vj_task_available() ) {
void *data = vj_task_alloc_internal_buf( frame->uv_len * 2 );
uint8_t *plane = (uint8_t*) data;
uint8_t *vplane = plane + frame->uv_len;
uint8_t *planes[3] = { NULL, plane,vplane };
int strides[4] = { 0, frame->uv_len, frame->uv_len, 0 };
vj_task_set_sampling( 0 );
vj_frame_copy( ycbcr, planes,strides);
vj_task_set_from_frame( frame );
vj_task_set_int( mode );
vj_task_set_sampling( 1 );
vj_task_run( frame->data,planes, NULL, NULL,3, (performer_job_routine) &chroma_supersample_task );
vj_task_free_internal_buf();
vj_task_set_sampling(0);
return;
}
uint8_t *data = (uint8_t*) vj_malloc( sizeof(uint8_t) * frame->width * 2 );
switch (mode) {
case SSM_420_JPEG_BOX:
ss_420jpeg_to_444(ycbcr[1], frame->width, frame->height);
ss_420jpeg_to_444(ycbcr[2], frame->width, frame->height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_JPEG_TR:
tr_420jpeg_to_444(data,ycbcr[1], frame->width, frame->height);
tr_420jpeg_to_444(data,ycbcr[2], frame->width, frame->height);
break;
case SSM_422_444:
tr_422_to_444(data,ycbcr[1],frame->width,frame->height);
tr_422_to_444(data,ycbcr[2],frame->width,frame->height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_420_to_422( ycbcr[1], frame->width, frame->height );
ss_420_to_422( ycbcr[2], frame->width, frame->height );
break;
default:
break;
}
free( data );
}