auto vectorization wins

2025-12-17 21:30:02 +01:00 · 2015-06-02 21:21:13 +02:00
parent bc95c72c3f
commit 00c84f21f1
2 changed files with 29 additions and 141 deletions
--- a/veejay-current/veejay-server/libvje/effects/negatechannel.c
+++ b/veejay-current/veejay-server/libvje/effects/negatechannel.c
@@ -23,6 +23,9 @@
 #include <libvjmem/vjmem.h>
 #include "negatechannel.h"
 #include "common.h"
+
+#undef HAVE_ASM_MMX
+
 vj_effect *negatechannel_init(int w, int h)
 {
    vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect));
@@ -56,92 +59,21 @@ void negatechannel_apply( VJFrame *frame, int width, int height, int chan, int v
    uint8_t *Cb = frame->data[1];
    uint8_t *Cr = frame->data[2];

-#ifndef HAVE_ASM_MMX 
    switch( chan ) {
 		case 0:
 		 for (i = 0; i < len; i++) {
-	*(Y) = val - *(Y);
-	*(Y)++;
+			Y[i] = val - Y[i];
 		    }
 		 break;
 		case 1:
-
 			for (i = 0; i < uv_len; i++) {
-	*(Cb) = val - *(Cb);
-        *(Cb)++;
+			Cb[i] = val - Cb[i];
 		    }
 		 break;
 		case 2:
 		for (i = 0; i < uv_len; i++) {
-        *(Cr) = val - *(Cr);
-	*(Cr)++;
+			Cr[i] = val - Cr[i];
 		}
 		 break;
   }
-
-#else
-
-    int left = len % 8;
-    int work=  len >> 3;
-
-    vje_load_mask(val);
-
-    switch( chan ) {
-	case 0:
-    for( i = 0; i < work ; i ++ )
-    {
-	vje_mmx_negate( Y, Y );	
-	Y += 8;
-    }	
-
-    if (left )
-    {
-	for( i = 0; i < left; i ++ )
-	{
-		*(Y) = val - *(Y);
-		*(Y)++;
-	}	
-    }
-	break;
-	case 1:
-
-    work = uv_len >> 3;
-    left = uv_len % 8;
-    for( i = 0; i < work ; i ++ )
-    {
-	vje_mmx_negate( Cb, Cb );
-	Cr += 8;
-    }
-
-    if(left )
-    {
-	for( i = 0; i < left; i ++ )
-	{
-		*(Cb) = val - *(Cb);
-      	 	*(Cb)++;
-	}
-    }
-	break;
-	case 2:
-    work = uv_len >> 3;
-    left = uv_len % 8;
-    for( i = 0; i < work ; i ++ )
-    {
-	vje_mmx_negate( Cr, Cr );
-	Cr += 8;
-    }
-
-    if(left )
-    {
-	for( i = 0; i < left; i ++ )
-	{
-     	  	*(Cr) = val - *(Cr);
-		*(Cr)++;
-	}
-    }
-	break;
- }
-
- do_emms;
-#endif
 }
--- a/veejay-current/veejay-server/libvje/effects/negation.c
+++ b/veejay-current/veejay-server/libvje/effects/negation.c
@@ -23,6 +23,10 @@
 #include <libvjmem/vjmem.h>
 #include "common.h"
 #include "negation.h"
+
+//auto vectorization is better
+#undef HAVE_ASM_MMX
+
 vj_effect *negation_init(int w, int h)
 {
    vj_effect *ve = (vj_effect *) vj_calloc(sizeof(vj_effect));
@@ -54,61 +58,13 @@ void negation_apply( VJFrame *frame, int width, int height, int val)
    uint8_t *Cb = frame->data[1];
    uint8_t *Cr = frame->data[2];

-#ifndef HAVE_ASM_MMX
-    for (i = 0; i < len; i++) {
-	*(Y) = val - *(Y);
-	*(Y)++;
+	for( i = 0; i < len; i ++ ) {
+		Y[i] = val - Y[i];
 	}

-    for (i = 0; i < uv_len; i++) {
-	*(Cb) = val - *(Cb);
-        *(Cb)++;
-        *(Cr) = val - *(Cr);
-	*(Cr)++;
-    }
-#else
-
-    int left = len % 8;
-    int work=  len >> 3;
-
-    vje_load_mask(val);
-
-    for( i = 0; i < work ; i ++ )
-    {
-	vje_mmx_negate( Y, Y );	
-	Y += 8;
+	for( i = 0; i < uv_len; i ++ ) {
+		Cb[i] = val - Cb[i];
+		Cr[i] = val - Cr[i];
 	}

-    if (left )
-    {
-	for( i = 0; i < left; i ++ )
-	{
-		*(Y) = val - *(Y);
-		*(Y)++;
-	}	
-    }
-
-    work = uv_len >> 3;
-    left = uv_len % 8;
-    for( i = 0; i < work ; i ++ )
-    {
-	vje_mmx_negate( Cb, Cb );
-	vje_mmx_negate( Cr, Cr );
-	Cb += 8;
-	Cr += 8;
-    }
-
-    if(left )
-    {
-	for( i = 0; i < left; i ++ )
-	{
-		*(Cb) = val - *(Cb);
-      	 	*(Cb)++;
-     	  	*(Cr) = val - *(Cr);
-		*(Cr)++;
-	}
-    }
-
-    do_emms;
-#endif
 }