From 00c84f21f16482941dd96d4f5b75f534bb7fe4ac Mon Sep 17 00:00:00 2001 From: niels Date: Tue, 2 Jun 2015 21:21:13 +0200 Subject: [PATCH] auto vectorization wins --- .../libvje/effects/negatechannel.c | 104 +++--------------- .../veejay-server/libvje/effects/negation.c | 66 ++--------- 2 files changed, 29 insertions(+), 141 deletions(-) diff --git a/veejay-current/veejay-server/libvje/effects/negatechannel.c b/veejay-current/veejay-server/libvje/effects/negatechannel.c index f83db0b8..788e3def 100644 --- a/veejay-current/veejay-server/libvje/effects/negatechannel.c +++ b/veejay-current/veejay-server/libvje/effects/negatechannel.c @@ -23,6 +23,9 @@ #include #include "negatechannel.h" #include "common.h" + +#undef HAVE_ASM_MMX + vj_effect *negatechannel_init(int w, int h) { vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect)); @@ -56,92 +59,21 @@ void negatechannel_apply( VJFrame *frame, int width, int height, int chan, int v uint8_t *Cb = frame->data[1]; uint8_t *Cr = frame->data[2]; -#ifndef HAVE_ASM_MMX switch( chan ) { - case 0: - for (i = 0; i < len; i++) { - *(Y) = val - *(Y); - *(Y)++; - } - break; - case 1: - - for (i = 0; i < uv_len; i++) { - *(Cb) = val - *(Cb); - *(Cb)++; - } - break; - case 2: - for (i = 0; i < uv_len; i++) { - *(Cr) = val - *(Cr); - *(Cr)++; - } - break; + case 0: + for (i = 0; i < len; i++) { + Y[i] = val - Y[i]; + } + break; + case 1: + for (i = 0; i < uv_len; i++) { + Cb[i] = val - Cb[i]; + } + break; + case 2: + for (i = 0; i < uv_len; i++) { + Cr[i] = val - Cr[i]; + } + break; } - -#else - - int left = len % 8; - int work= len >> 3; - - vje_load_mask(val); - - switch( chan ) { - case 0: - for( i = 0; i < work ; i ++ ) - { - vje_mmx_negate( Y, Y ); - Y += 8; - } - - if (left ) - { - for( i = 0; i < left; i ++ ) - { - *(Y) = val - *(Y); - *(Y)++; - } - } - break; - case 1: - - work = uv_len >> 3; - left = uv_len % 8; - for( i = 0; i < work ; i ++ ) - { - vje_mmx_negate( Cb, Cb ); - Cr += 8; - } - - if(left ) - { - for( i = 0; i < left; i ++ ) - { - *(Cb) = val - *(Cb); - *(Cb)++; - } - } - break; - case 2: - work = uv_len >> 3; - left = uv_len % 8; - for( i = 0; i < work ; i ++ ) - { - vje_mmx_negate( Cr, Cr ); - Cr += 8; - } - - if(left ) - { - for( i = 0; i < left; i ++ ) - { - *(Cr) = val - *(Cr); - *(Cr)++; - } - } - break; - } - - do_emms; -#endif } diff --git a/veejay-current/veejay-server/libvje/effects/negation.c b/veejay-current/veejay-server/libvje/effects/negation.c index e3ad6bd2..3fdd7af8 100644 --- a/veejay-current/veejay-server/libvje/effects/negation.c +++ b/veejay-current/veejay-server/libvje/effects/negation.c @@ -23,6 +23,10 @@ #include #include "common.h" #include "negation.h" + +//auto vectorization is better +#undef HAVE_ASM_MMX + vj_effect *negation_init(int w, int h) { vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect)); @@ -54,61 +58,13 @@ void negation_apply( VJFrame *frame, int width, int height, int val) uint8_t *Cb = frame->data[1]; uint8_t *Cr = frame->data[2]; -#ifndef HAVE_ASM_MMX - for (i = 0; i < len; i++) { - *(Y) = val - *(Y); - *(Y)++; - } - - for (i = 0; i < uv_len; i++) { - *(Cb) = val - *(Cb); - *(Cb)++; - *(Cr) = val - *(Cr); - *(Cr)++; - } -#else - - int left = len % 8; - int work= len >> 3; - - vje_load_mask(val); - - for( i = 0; i < work ; i ++ ) - { - vje_mmx_negate( Y, Y ); - Y += 8; - } - - if (left ) - { - for( i = 0; i < left; i ++ ) - { - *(Y) = val - *(Y); - *(Y)++; - } - } - - work = uv_len >> 3; - left = uv_len % 8; - for( i = 0; i < work ; i ++ ) - { - vje_mmx_negate( Cb, Cb ); - vje_mmx_negate( Cr, Cr ); - Cb += 8; - Cr += 8; - } - - if(left ) - { - for( i = 0; i < left; i ++ ) - { - *(Cb) = val - *(Cb); - *(Cb)++; - *(Cr) = val - *(Cr); - *(Cr)++; + for( i = 0; i < len; i ++ ) { + Y[i] = val - Y[i]; + } + + for( i = 0; i < uv_len; i ++ ) { + Cb[i] = val - Cb[i]; + Cr[i] = val - Cr[i]; } - } - do_emms; -#endif }