auto vectorization wins

This commit is contained in:
niels
2015-06-02 21:21:13 +02:00
parent bc95c72c3f
commit 00c84f21f1
2 changed files with 29 additions and 141 deletions

View File

@@ -23,6 +23,9 @@
#include <libvjmem/vjmem.h> #include <libvjmem/vjmem.h>
#include "negatechannel.h" #include "negatechannel.h"
#include "common.h" #include "common.h"
#undef HAVE_ASM_MMX
vj_effect *negatechannel_init(int w, int h) vj_effect *negatechannel_init(int w, int h)
{ {
vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect)); vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect));
@@ -56,92 +59,21 @@ void negatechannel_apply( VJFrame *frame, int width, int height, int chan, int v
uint8_t *Cb = frame->data[1]; uint8_t *Cb = frame->data[1];
uint8_t *Cr = frame->data[2]; uint8_t *Cr = frame->data[2];
#ifndef HAVE_ASM_MMX
switch( chan ) { switch( chan ) {
case 0: case 0:
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
*(Y) = val - *(Y); Y[i] = val - Y[i];
*(Y)++; }
} break;
break; case 1:
case 1: for (i = 0; i < uv_len; i++) {
Cb[i] = val - Cb[i];
for (i = 0; i < uv_len; i++) { }
*(Cb) = val - *(Cb); break;
*(Cb)++; case 2:
} for (i = 0; i < uv_len; i++) {
break; Cr[i] = val - Cr[i];
case 2: }
for (i = 0; i < uv_len; i++) { break;
*(Cr) = val - *(Cr);
*(Cr)++;
}
break;
} }
#else
int left = len % 8;
int work= len >> 3;
vje_load_mask(val);
switch( chan ) {
case 0:
for( i = 0; i < work ; i ++ )
{
vje_mmx_negate( Y, Y );
Y += 8;
}
if (left )
{
for( i = 0; i < left; i ++ )
{
*(Y) = val - *(Y);
*(Y)++;
}
}
break;
case 1:
work = uv_len >> 3;
left = uv_len % 8;
for( i = 0; i < work ; i ++ )
{
vje_mmx_negate( Cb, Cb );
Cr += 8;
}
if(left )
{
for( i = 0; i < left; i ++ )
{
*(Cb) = val - *(Cb);
*(Cb)++;
}
}
break;
case 2:
work = uv_len >> 3;
left = uv_len % 8;
for( i = 0; i < work ; i ++ )
{
vje_mmx_negate( Cr, Cr );
Cr += 8;
}
if(left )
{
for( i = 0; i < left; i ++ )
{
*(Cr) = val - *(Cr);
*(Cr)++;
}
}
break;
}
do_emms;
#endif
} }

View File

@@ -23,6 +23,10 @@
#include <libvjmem/vjmem.h> #include <libvjmem/vjmem.h>
#include "common.h" #include "common.h"
#include "negation.h" #include "negation.h"
//auto vectorization is better
#undef HAVE_ASM_MMX
vj_effect *negation_init(int w, int h) vj_effect *negation_init(int w, int h)
{ {
vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect)); vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect));
@@ -54,61 +58,13 @@ void negation_apply( VJFrame *frame, int width, int height, int val)
uint8_t *Cb = frame->data[1]; uint8_t *Cb = frame->data[1];
uint8_t *Cr = frame->data[2]; uint8_t *Cr = frame->data[2];
#ifndef HAVE_ASM_MMX for( i = 0; i < len; i ++ ) {
for (i = 0; i < len; i++) { Y[i] = val - Y[i];
*(Y) = val - *(Y);
*(Y)++;
}
for (i = 0; i < uv_len; i++) {
*(Cb) = val - *(Cb);
*(Cb)++;
*(Cr) = val - *(Cr);
*(Cr)++;
}
#else
int left = len % 8;
int work= len >> 3;
vje_load_mask(val);
for( i = 0; i < work ; i ++ )
{
vje_mmx_negate( Y, Y );
Y += 8;
}
if (left )
{
for( i = 0; i < left; i ++ )
{
*(Y) = val - *(Y);
*(Y)++;
} }
}
work = uv_len >> 3; for( i = 0; i < uv_len; i ++ ) {
left = uv_len % 8; Cb[i] = val - Cb[i];
for( i = 0; i < work ; i ++ ) Cr[i] = val - Cr[i];
{
vje_mmx_negate( Cb, Cb );
vje_mmx_negate( Cr, Cr );
Cb += 8;
Cr += 8;
}
if(left )
{
for( i = 0; i < left; i ++ )
{
*(Cb) = val - *(Cb);
*(Cb)++;
*(Cr) = val - *(Cr);
*(Cr)++;
} }
}
do_emms;
#endif
} }