Initial checkin of veejay 1.4

git-svn-id: svn://code.dyne.org/veejay/trunk@1172 eb8d1916-c9e9-0310-b8de-cf0c9472ead5
This commit is contained in:
Niels Elburg
2008-11-10 20:16:24 +00:00
parent d81258c54c
commit d8e6f98d53
793 changed files with 244409 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
# Makefile for veejay
MAINTAINERCLEANFILES = Makefile.in
AM_CFLAGS=$(YUVCFLAGS)
INCLUDES = -I$(top_srcdir) -I$(includedir) -I$(top_srcdir)/aclib -I$(top_srcdir)/vjmem -I$(top_srcdir)/vjmsg -I$(top_srcdir)/libpostproc
YUV_LIB_FILE = libyuv.la
pkginclude_HEADERS = yuvconv.h
noinst_LTLIBRARIES = $(YUV_LIB_FILE)
libyuv_la_SOURCES = subsample.c yuvconv.c

View File

@@ -0,0 +1,265 @@
/*
* mmx.h
* Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
* See http://libmpeg2.sourceforge.net/ for updates.
*
* mpeg2dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mpeg2dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* The type of an value that fits in an MMX register (note that long
* long constant values MUST be suffixed by LL and unsigned long long
* values by ULL, lest they be truncated by the compiler)
*/
#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
typedef union {
long long q; /* Quadword (64-bit) value */
unsigned long long uq; /* Unsigned Quadword */
int d[2]; /* 2 Doubleword (32-bit) values */
unsigned int ud[2]; /* 2 Unsigned Doubleword */
short w[4]; /* 4 Word (16-bit) values */
unsigned short uw[4]; /* 4 Unsigned Word */
char b[8]; /* 8 Byte (8-bit) values */
unsigned char ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */
#define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "i" (imm) )
#define mmx_m2r(op,mem,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "m" (mem))
#define mmx_r2m(op,reg,mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \
: /* nothing */ )
#define mmx_r2r(op,regs,regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
#define emms() __asm__ __volatile__ ("emms")
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
#define movd_v2r(var,reg) __asm__ __volatile__ ("movd %0, %%" #reg \
: /* nothing */ \
: "rm" (var))
#define movd_r2v(reg,var) __asm__ __volatile__ ("movd %%" #reg ", %0" \
: "=rm" (var) \
: /* nothing */ )
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
/* 3DNOW extensions */
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
/* AMD MMX extensions - also available in intel SSE */
#define mmx_m2ri(op,mem,reg,imm) \
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
: /* nothing */ \
: "m" (mem), "i" (imm))
#define mmx_r2ri(op,regs,regd,imm) \
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \
: "i" (imm) )
#define mmx_fetch(mem,hint) \
__asm__ __volatile__ ("prefetch" #hint " %0" \
: /* nothing */ \
: "m" (mem))
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
#define prefetcht0(mem) mmx_fetch (mem, t0)
#define prefetcht1(mem) mmx_fetch (mem, t1)
#define prefetcht2(mem) mmx_fetch (mem, t2)
#define prefetchnta(mem) mmx_fetch (mem, nta)
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
#define sfence() __asm__ __volatile__ ("sfence\n\t")

View File

@@ -0,0 +1,46 @@
#include <stdlib.h> /* size_t */
#include <config.h>
/* MMX memcpy stuff taken from MPlayer (http://www.mplayerhq.hu) */
#define BLOCK_SIZE 4096
#define CONFUSION_FACTOR 0
//Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
#undef HAVE_MMX1
#ifndef MMXEXT
/* means: mmx v.1. Note: Since we added alignment of destinition it speedups
of memory copying on PentMMX, Celeron-1 and P2 upto 12% versus
standard (non MMX-optimized) version.
Note: on K6-2+ it speedups memory copying upto 25% and
on K7 and P3 about 500% (5 times). */
#define HAVE_MMX1
#endif
#undef MMREG_SIZE
#define MMREG_SIZE 64 //8
#undef PREFETCH
#undef EMMS
#ifdef MMXEXT
#define PREFETCH "prefetchnta"
#else
#define PREFETCH "/nop"
#endif
#define EMMS "emms"
#undef MOVNTQ
#ifdef MMXEXT
#define MOVNTQ "movntq"
#else
#define MOVNTQ "movq"
#endif
#undef MIN_LEN
#ifdef HAVE_MMX1
#define MIN_LEN 0x800 /* 2K blocks */
#else
#define MIN_LEN 0x40 /* 64-byte blocks */
#endif

View File

@@ -0,0 +1,856 @@
/*
* subsample.c: Routines to do chroma subsampling. ("Work In Progress")
*
*
* Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
* 2004 Niels Elburg <nwelburg@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <config.h>
#ifdef HAVE_ASM_MMX
#include "mmx.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mjpegtools/mjpeg_types.h>
#include <libvjmem/vjmem.h>
#include <libvjmsg/vj-msg.h>
#include <libvje/vje.h>
#include <libyuv/yuvconv.h>
const char *ssm_id[SSM_COUNT] = {
"unknown",
"420_jpeg",
"420_mpeg2",
#if 0
"420_dv_pal",
"411_dv_ntsc"
#endif
};
const char *ssm_description[SSM_COUNT] = {
"unknown/illegal",
"4:2:0, JPEG/MPEG-1, interstitial siting",
"4:2:0, MPEG-2, horizontal cositing",
#if 0
"4:2:0, DV-PAL, cosited, Cb/Cr line alternating",
"4:1:1, DV-NTSC"
"4:2:2",
#endif
};
#define RUP8(num)(((num)+8)&~8)
// forward decl
void ss_420_to_422(uint8_t *buffer, int width, int height);
void ss_422_to_420(uint8_t *buffer, int width, int height);
typedef struct
{
uint8_t *buf;
} yuv_sampler_t;
void *subsample_init(int len)
{
yuv_sampler_t *s = (yuv_sampler_t*) vj_malloc(sizeof(yuv_sampler_t) );
if(!s)
return NULL;
s->buf = (uint8_t*) vj_malloc(sizeof(uint8_t) * RUP8(len*2) );
if(!s->buf)
return NULL;
return (void*) s;
}
void subsample_free(void *data)
{
yuv_sampler_t *sampler = (yuv_sampler_t*) data;
if(sampler)
{
if(sampler->buf)
free(sampler->buf);
free(sampler);
}
sampler = NULL;
}
/*************************************************************************
* Chroma Subsampling
*************************************************************************/
/* vertical/horizontal interstitial siting
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* Y Y Y Y
* C C
* Y Y Y Y
*
*/
/*
static void ss_444_to_420jpeg(uint8_t *buffer, int width, int height)
{
uint8_t *in0, *in1, *out;
int x, y;
in0 = buffer;
in1 = buffer + width;
out = buffer;
for (y = 0; y < height; y += 2) {
for (x = 0; x < width; x += 2) {
*out = (in0[0] + in0[1] + in1[0] + in1[1]) >> 2;
in0 += 2;
in1 += 2;
out++;
}
in0 += width;
in1 += width;
}
}
*/
/*
using weighted averaging for subsampling 2x2 -> 1x1
here, 4 pixels are filled in each inner loop, (weighting
16 source pixels)
*/
static void ss_444_to_420jpeg(uint8_t *buffer, int width, int height)
{
const uint8_t *in0, *in1;
uint8_t *out;
int x, y = height;
in0 = buffer;
in1 = buffer + width;
out = buffer;
for (y = 0; y < height; y += 4) {
for (x = 0; x < width; x += 4) {
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
out[1] = (in0[2] + 3 * (in0[3] + in1[2]) + (9 * in1[3]) + 8) >> 4;
out[2] = (in0[4] + 3 * (in0[5] + in1[4]) + (9 * in1[5]) + 8) >> 4;
out[3] = (in0[6] + 3 * (in0[7] + in1[6]) + (9 * in1[7]) + 8) >> 4;
in0 += 8;
in1 += 8;
out += 4;
}
for ( ; x < width; x +=2 )
{
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
in0 += 2;
in1 += 2;
out++;
}
in0 += width*2;
in1 += width*2;
}
}
static void ss_444_to_420jpeg_cp(uint8_t *buffer,uint8_t *dest, int width, int height)
{
const uint8_t *in0, *in1;
uint8_t *out;
int x, y = height;
in0 = buffer;
in1 = buffer + width;
out = dest;
for (y = 0; y < height; y += 4) {
for (x = 0; x < width; x += 4) {
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
out[1] = (in0[2] + 3 * (in0[3] + in1[2]) + (9 * in1[3]) + 8) >> 4;
out[2] = (in0[4] + 3 * (in0[5] + in1[4]) + (9 * in1[5]) + 8) >> 4;
out[3] = (in0[6] + 3 * (in0[7] + in1[6]) + (9 * in1[7]) + 8) >> 4;
in0 += 8;
in1 += 8;
out += 4;
}
for ( ; x < width; x +=2 )
{
out[0] = (in0[0] + 3 * (in0[1] + in1[0]) + (9 * in1[1]) + 8) >> 4;
in0 += 2;
in1 += 2;
out++;
}
in0 += width*2;
in1 += width*2;
}
}
/* horizontal interstitial siting
*
* Y Y Y Y
* C C C C in0
* Y Y Y Y
* C C C C
*
* Y Y Y Y
* C C out0
* Y Y Y Y
* C C
*
*
*/
/* vertical/horizontal interstitial siting
*
* Y Y Y Y
* C C C inm
* Y Y Y Y
*
* Y Y Y - Y out0
* C | C | C in0
* Y Y Y - Y out1
*
*
* C C C inp
*
*
* Each iteration through the loop reconstitutes one 2x2 block of
* pixels from the "surrounding" 3x3 block of samples...
* Boundary conditions are handled by cheap reflection; i.e. the
* center sample is simply reused.
*
*/
#define BLANK_CRB in0[1]
#define BLANK_CRB_2 (in0[1] << 1)
static void tr_420jpeg_to_444(void *data, uint8_t *buffer, int width, int height)
{
uint8_t *inm, *in0, *inp, *out0, *out1;
uint8_t cmm, cm0, cmp, c0m, c00, c0p, cpm, cp0, cpp;
int x, y;
yuv_sampler_t *sampler = (yuv_sampler_t*) data;
uint8_t *saveme = sampler->buf;
veejay_memcpy(saveme, buffer, width);
in0 = buffer + ( width * height /4) - 2;
inm = in0 - width/2;
inp = in0 + width/2;
out1 = buffer + (width * height) - 1;
out0 = out1 - width;
for (y = height; y > 0; y -= 2) {
if (y == 2) {
in0 = saveme + width/2 - 2;
inp = in0 + width/2;
}
for (x = width; x > 0; x -= 2) {
#if 0
if ((x == 2) && (y == 2)) {
cmm = in0[1];
cm0 = in0[1];
cmp = in0[2];
c0m = in0[1];
c0p = in0[2];
cpm = inp[1];
cp0 = inp[1];
cpp = inp[2];
} else if ((x == 2) && (y == height)) {
cmm = inm[1];
cm0 = inm[1];
cmp = inm[2];
c0m = in0[1];
c0p = in0[2];
cpm = in0[1];
cp0 = in0[1];
cpp = in0[2];
} else if ((x == width) && (y == height)) {
cmm = inm[0];
cm0 = inm[1];
cmp = inm[1];
c0m = in0[0];
c0p = in0[1];
cpm = in0[0];
cp0 = in0[1];
cpp = in0[1];
} else if ((x == width) && (y == 2)) {
cmm = in0[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (x == 2) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (y == 2) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (x == width) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else if (y == height) {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
} else {
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
}
c00 = in0[1];
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
#else
cmm = ((x == 2) || (y == 2)) ? BLANK_CRB : inm[0];
cm0 = (y == 2) ? BLANK_CRB : inm[1];
cmp = ((x == width) || (y == 2)) ? BLANK_CRB : inm[2];
c0m = (x == 2) ? BLANK_CRB : in0[0];
c00 = in0[1];
c0p = (x == width) ? BLANK_CRB : in0[2];
cpm = ((x == 2) || (y == height)) ? BLANK_CRB : inp[0];
cp0 = (y == height) ? BLANK_CRB : inp[1];
cpp = ((x == width) || (y == height)) ? BLANK_CRB : inp[2];
#endif
inm--;
in0--;
inp--;
*(out1--) = (1*cpp + 3*(cp0+c0p) + 9*c00 + 8) >> 4;
*(out1--) = (1*cpm + 3*(cp0+c0m) + 9*c00 + 8) >> 4;
*(out0--) = (1*cmp + 3*(cm0+c0p) + 9*c00 + 8) >> 4;
*(out0--) = (1*cmm + 3*(cm0+c0m) + 9*c00 + 8) >> 4;
}
out1 -= width;
out0 -= width;
}
}
// lame box filter
// the dampening of high frequencies depend
// on the directions these frequencies occur in the
// image, resulting in clear edges between certain
// group of pixels.
static void ss_420jpeg_to_444(uint8_t *buffer, int width, int height)
{
#ifndef HAVE_ASM_MMX
uint8_t *in, *out0, *out1;
int x, y;
in = buffer + (width * height / 4) - 1;
out1 = buffer + (width * height) - 1;
out0 = out1 - width;
for (y = height - 1; y >= 0; y -= 2) {
for (x = width - 1; x >= 0; x -=2) {
uint8_t val = *(in--);
*(out1--) = val;
*(out1--) = val;
*(out0--) = val;
*(out0--) = val;
}
out0 -= width;
out1 -= width;
}
#else
int x,y;
const int mmx_stride = width >> 3;
uint8_t *src = buffer + ((width * height) >> 2)-1;
uint8_t *dst = buffer + (width * height) -1;
uint8_t *dst2 = dst - width;
for( y = height-1; y >= 0; y -= 2)
{
for( x = 0; x < mmx_stride; x ++ )
{
movq_m2r( *src,mm0 );
movq_m2r( *src,mm1 );
movq_r2m(mm0, *dst );
movq_r2m(mm1, *(dst+8) );
movq_r2m(mm0, *dst2 );
movq_r2m(mm1, *(dst2+8) );
dst += 16;
dst2 += 16;
src += 8;
}
dst -= width;
dst2 -= width;
}
#endif
}
void ss_420_to_422(uint8_t *buffer, int width, int height)
{
//todo, 1x2 super sampling (box)
}
void ss_422_to_420(uint8_t *buffer, int width, int height )
{
//todo 2x1 down sampling (box)
}
#ifdef HAVE_ASM_MMX
#undef HAVE_K6_2PLUS
#if !defined( HAVE_ASM_MMX2) && defined( HAVE_ASM_3DNOW )
#define HAVE_K6_2PLUS
#endif
#undef _EMMS
#ifdef HAVE_K6_2PLUS
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define _EMMS "femms"
#else
#define _EMMS "emms"
#endif
#endif
#ifdef HAVE_ASM_MMX
/* for small memory blocks (<256 bytes) this version is faster */
#define small_memcpy(to,from,n)\
{\
register unsigned long int dummy;\
__asm__ __volatile__(\
"rep; movsb"\
:"=&D"(to), "=&S"(from), "=&c"(dummy)\
:"0" (to), "1" (from),"2" (n)\
: "memory");\
}
static inline void copy8( uint8_t *dst, uint8_t *in )
{
__asm__ __volatile__ (
"movq (%0), %%mm0\n"
"movq %%mm0, (%1)\n"
:: "r" (in), "r" (dst) : "memory" );
}
static inline void copy16( uint8_t *dst, uint8_t *in)
{
__asm__ __volatile__ (
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq %%mm0, (%1)\n"
"movq %%mm1, 8(%1)\n"
:: "r" (in), "r" (dst) : "memory" );
}
static inline void copy_width( uint8_t *dst, uint8_t *in, int width )
{
int w = width >> 4;
int x;
uint8_t *d = dst;
uint8_t *i = in;
for( x = 0; x < w; x ++ )
{
copy16( d, i );
d += 16;
i += 16;
}
x = (width % 16);
if( x )
small_memcpy( d, i, x);
}
static inline void load_mask16to8()
{
const uint64_t mask = 0x00ff00ff00ff00ffLL;
const uint8_t *m = (uint8_t*)&mask;
__asm __volatile(
"movq (%0), %%mm4\n\t"
:: "r" (m)
);
}
static inline void down_sample16to8( uint8_t *out, uint8_t *in )
{
//@ down sample by dropping right pixels
__asm __volatile(
"movq (%0), %%mm1\n\t"
"movq 8(%0),%%mm3\n\t"
"pxor %%mm5,%%mm5\n\t"
"pand %%mm4,%%mm1\n\t"
"pand %%mm4,%%mm3\n\t"
"packuswb %%mm1,%%mm2\n\t"
"packuswb %%mm3,%%mm5\n\t"
"psrlq $32, %%mm2\n\t"
"por %%mm5,%%mm2\n\t"
"movq %%mm2, (%1)\n\t"
:: "r" (in), "r" (out)
);
}
#endif
static void ss_444_to_422_cp(void *data, uint8_t *buffer, uint8_t *dest, int width, int height)
{
const int dst_stride = width >> 1;
int x,y;
#ifdef HAVE_ASM_MMX
int mmxdst_stride=dst_stride >> 3;
int left = dst_stride % 8;
#endif
yuv_sampler_t *sampler = (yuv_sampler_t*) data;
uint8_t *src = sampler->buf;
uint8_t *dst;
#ifdef HAVE_ASM_MMX
load_mask16to8();
#endif
for(y = 0; y < height; y ++)
{
src = buffer + (y*width);
dst = dest + (y*dst_stride);
#if defined (HAVE_ASM_MMX) || defined (HAVE_ASM_MMX2)
for( x= 0; x < mmxdst_stride; x++ )
{
down_sample16to8( dst, src );
src += 16;
dst += 8;
}
for(x=0; x < left; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#else
for(x=0; x < dst_stride; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#endif
}
}
static void ss_444_to_422(void *data, uint8_t *buffer, int width, int height)
{
const int dst_stride = width >> 1;
int x,y;
#ifdef HAVE_ASM_MMX
int mmxdst_stride=dst_stride >> 3;
int left = dst_stride % 8;
#endif
yuv_sampler_t *sampler = (yuv_sampler_t*) data;
uint8_t *src = sampler->buf;
uint8_t *dst;
#ifdef HAVE_ASM_MMX
load_mask16to8();
#endif
for(y = 0; y < height; y ++)
{
src = sampler->buf;
dst = buffer + (y*dst_stride);
#if defined (HAVE_ASM_MMX) || defined (HAVE_ASM_MMX2)
copy_width( src, buffer + (y*width), width );
for( x= 0; x < mmxdst_stride; x++ )
{
down_sample16to8( dst, src );
src += 16;
dst += 8;
}
for(x=0; x < left; x++)
{
*(dst++) = ( src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#else
for( x = 0; x < dst_stride; x ++ )
{
*(dst++) = (src[0] + src[1] + 1 ) >> 1;
src += 2;
}
#endif
}
}
#ifdef HAVE_ASM_MMX
static inline void super_sample8to16( uint8_t *in, uint8_t *out )
{
//@ super sample by duplicating pixels
__asm__ __volatile__ (
"\n\tpxor %%mm2,%%mm2"
"\n\tpxor %%mm4,%%mm4"
"\n\tmovq (%0), %%mm1"
"\n\tpunpcklbw %%mm1,%%mm2"
"\n\tpunpckhbw %%mm1,%%mm4"
"\n\tmovq %%mm2,%%mm5"
"\n\tmovq %%mm4,%%mm6"
"\n\tpsrlq $8, %%mm5"
"\n\tpsrlq $8, %%mm6"
"\n\tpor %%mm5,%%mm2"
"\n\tpor %%mm6,%%mm4"
"\n\tmovq %%mm2, (%1)"
"\n\tmovq %%mm4, 8(%1)"
:: "r" (in), "r" (out)
);
}
#endif
static void tr_422_to_444(void *data, uint8_t *buffer, int width, int height)
{
int x,y;
const int stride = width >> 1;
#ifndef HAVE_ASM_MMX
for( y = height-1; y > 0 ; y -- )
{
uint8_t *dst = buffer + (y * width);
uint8_t *src = buffer + (y * stride);
for(x=0; x < stride; x++) // for 1 row
{
dst[0] = src[x]; //put to dst
dst[1] = src[x];
dst+=2; // increment dst
}
}
#else
const int mmx_stride = stride >> 3;
int left = (mmx_stride % 8)-1;
if( left < 0 ) left = 0;
for( y = height-1; y > 0 ; y -- )
{
uint8_t *src = buffer + (y * stride);
uint8_t *dst = buffer + (y * width);
for(x=0; x < mmx_stride; x++) // for 1 row
{
super_sample8to16(src,dst );
src += 8;
dst += 16;
}
/* for(x=0; x < left; x++) // for 1 row
{
dst[0] = src[x]; //put to dst
dst[1] = src[x];
dst+=2; // increment dst
}*/
}
#endif
}
/* vertical intersitial siting; horizontal cositing
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* Y Y Y Y
* C C
* Y Y Y Y
*
* [1,2,1] kernel for horizontal subsampling:
*
* inX[0] [1] [2]
* | | |
* C C C C
* \ | /
* \ | /
* C
*/
static void ss_444_to_420mpeg2(uint8_t *buffer, int width, int height)
{
uint8_t *in0, *in1, *out;
int x, y;
in0 = buffer; /* points to */
in1 = buffer + width; /* second of pair of lines */
out = buffer;
for (y = 0; y < height; y += 2) {
/* first column boundary condition -- just repeat it to right */
*out = (in0[0] + (2 * in0[0]) + in0[1] +
in1[0] + (2 * in1[0]) + in1[1]) >> 3;
out++;
in0++;
in1++;
/* rest of columns just loop */
for (x = 2; x < width; x += 2) {
*out = (in0[0] + (2 * in0[1]) + in0[2] +
in1[0] + (2 * in1[1]) + in1[2]) >> 3;
in0 += 2;
in1 += 2;
out++;
}
in0 += width + 1;
in1 += width + 1;
}
}
void chroma_subsample_cp(subsample_mode_t mode, void *data, uint8_t *ycbcr[], uint8_t *dcbcr[],
int width, int height)
{
switch (mode) {
case SSM_420_JPEG_BOX:
case SSM_420_JPEG_TR:
ss_444_to_420jpeg_cp(ycbcr[1],dcbcr[1], width, height);
ss_444_to_420jpeg_cp(ycbcr[2],dcbcr[2], width, height);
break;
case SSM_420_MPEG2:
break;
case SSM_422_444:
ss_444_to_422_cp(data,ycbcr[1],dcbcr[1],width,height);
ss_444_to_422_cp(data,ycbcr[2],dcbcr[2],width,height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
break;
default:
break;
}
}
void chroma_subsample(subsample_mode_t mode, void *data, uint8_t *ycbcr[],
int width, int height)
{
switch (mode) {
case SSM_420_JPEG_BOX:
case SSM_420_JPEG_TR:
ss_444_to_420jpeg(ycbcr[1], width, height);
ss_444_to_420jpeg(ycbcr[2], width, height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_MPEG2:
ss_444_to_420mpeg2(ycbcr[1], width, height);
ss_444_to_420mpeg2(ycbcr[2], width, height);
break;
case SSM_422_444:
ss_444_to_422(data,ycbcr[1],width,height);
ss_444_to_422(data,ycbcr[2],width,height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_422_to_420(ycbcr[1],width,height);
ss_422_to_420(ycbcr[2],width,height);
break;
default:
break;
}
}
void chroma_supersample(subsample_mode_t mode,void *data, uint8_t *ycbcr[],
int width, int height)
{
switch (mode) {
case SSM_420_JPEG_BOX:
ss_420jpeg_to_444(ycbcr[1], width, height);
ss_420jpeg_to_444(ycbcr[2], width, height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_JPEG_TR:
tr_420jpeg_to_444(data,ycbcr[1], width, height);
tr_420jpeg_to_444(data,ycbcr[2], width, height);
break;
case SSM_422_444:
tr_422_to_444(data,ycbcr[1],width,height);
tr_422_to_444(data,ycbcr[2],width,height);
#ifdef HAVE_ASM_MMX
__asm__ __volatile__ ( _EMMS:::"memory");
#endif
break;
case SSM_420_422:
ss_420_to_422( ycbcr[1], width, height );
ss_420_to_422( ycbcr[2], width, height );
break;
case SSM_420_MPEG2:
// ss_420mpeg2_to_444(ycbcr[1], width, height);
// ss_420mpeg2_to_444(ycbcr[2], width, height);
break;
default:
break;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,139 @@
#ifndef YUVCONF_H
#define YUVCONF_H
/* Veejay - A visual instrument and realtime video sampler
* Copyright (C) 2004 Niels Elburg <nwelburg@gmail.com>
*
* YUV library for veejay.
*
* Mjpegtools, (C) The Mjpegtools Development Team (http://mjpeg.sourceforge.net)
* Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
typedef enum subsample_mode {
SSM_UNKNOWN = 0,
SSM_420_JPEG_TR = 1,
SSM_420_JPEG_BOX = 2,
SSM_420_MPEG2 = 3,
SSM_422_444 = 4,
SSM_420_422 = 5,
SSM_COUNT = 6,
} subsample_mode_t;
extern const char *ssm_id[SSM_COUNT];
extern const char *ssm_description[SSM_COUNT];
void *subsample_init(int buf_len);
void subsample_free(void *sampler);
void chroma_subsample(subsample_mode_t mode, void *sampler, uint8_t * ycbcr[],
int width, int height);
void chroma_subsample_cp(subsample_mode_t mode, void *data, uint8_t *ycbcr[], uint8_t *dcbcr[],
int width, int height);
void chroma_supersample(subsample_mode_t mode, void *sampler, uint8_t * ycbcr[],
int width, int height);
// yuv 4:2:2 packed to yuv 4:2:0 planar
void vj_yuy2toyv12( uint8_t *y, uint8_t *u, uint8_t *v, uint8_t *in, int w, int h);
// yuv 4:2:2 packet to yuv 4:2:2 planar
void yuy2toyv16( uint8_t *y, uint8_t *u, uint8_t *v, uint8_t *in, int w, int h);
// yuv 4:2:2 planar to yuv 4:2:2 packed
void yuv422p_to_yuv422( uint8_t *yuv422[3], uint8_t *dst, int w, int h );
// yuv 4:2:2 planar to yuv 4:2:0 planar
void yuv420p_to_yuv422( uint8_t *yuv420[3], uint8_t *dst, int w, int h );
// yuv 4:2:2 planar to YUYV
void yuv422_to_yuyv( uint8_t *yuv422[3], uint8_t *dst, int w, int h );
// scene detection
int luminance_mean(uint8_t * frame[], int w, int h);
/* software scaler from ffmpeg project: */
typedef struct
{
float lumaGBlur;
float chromaGBlur;
float lumaSarpen;
float chromaSharpen;
float chromaHShift;
float chromaVShift;
int verbose;
int flags;
int use_filter;
} sws_template;
void yuv_init_lib();
void* yuv_init_swscaler(VJFrame *src, VJFrame *dst, sws_template *templ, int cpu_flags);
void yuv_convert_and_scale( void *sws, VJFrame *src, VJFrame *dst );
void yuv_convert_and_scale_rgb( void *sws, VJFrame *src, VJFrame *dst );
void yuv_convert_and_scale_gray_rgb(void *sws,VJFrame *src, VJFrame *dst);
void yuv_convert_and_scale_from_rgb(void *sws , VJFrame *src, VJFrame *dst);
void yuv_convert_and_scale_grey(void *sws , VJFrame *src, VJFrame *dst);
int yuv_sws_get_cpu_flags(void);
void yuv_free_swscaler(void *sws);
void yuv_crop(VJFrame *src, VJFrame *dst, VJRectangle *rect );
VJFrame *yuv_allocate_crop_image( VJFrame *src, VJRectangle *rect );
void yuv_deinterlace(
uint8_t *data[3],
const int width,
const int height,
int out_pix_fmt,
int shift,
uint8_t *Y,uint8_t *U, uint8_t *V );
void yuv_init_lib();
void yuv_free_lib();
void yuv_convert_ac( VJFrame *src, VJFrame *dst, int a, int b );
//void yuv_convert_any( VJFrame *src, VJFrame *dst, int a, int b );
void yuv_convert_any_ac_packed( VJFrame *src, uint8_t *dst, int src_fmt, int dst_fmt );
void yuv_convert_any3( VJFrame *src,int strides[], VJFrame *dst, int a, int b );
VJFrame *yuv_rgb_template( uint8_t *rgb_buffer, int w, int h, int fmt );
VJFrame *yuv_yuv_template( uint8_t *Y, uint8_t *U, uint8_t *V, int w, int h, int fmt );
char *yuv_get_scaler_name(int id);
void yuv_convert_any_ac( VJFrame *src, VJFrame *dst, int src_fmt, int dst_fmt );
void *yuv_fx_context_create( VJFrame *src, VJFrame *dst, int src_fmt, int dst_fmt );
void yuv_fx_context_process( void *ctx, VJFrame *src, VJFrame *dst );
void yuv_fx_context_destroy( void *ctx );
#endif