From a2a2f8ca60747e11feeed8a988b80ea3d0b0d3b4 Mon Sep 17 00:00:00 2001 From: veejay <> Date: Mon, 30 Oct 2023 16:58:54 +0100 Subject: [PATCH] fix softblur --- .../veejay-server/libvje/effects/softblur.c | 237 ++++-------------- 1 file changed, 48 insertions(+), 189 deletions(-) diff --git a/veejay-current/veejay-server/libvje/effects/softblur.c b/veejay-current/veejay-server/libvje/effects/softblur.c index 2a7af8ee..2cc3464d 100644 --- a/veejay-current/veejay-server/libvje/effects/softblur.c +++ b/veejay-current/veejay-server/libvje/effects/softblur.c @@ -38,19 +38,46 @@ vj_effect *softblur_init(int w,int h) ve->defaults[0] = 0; ve->limits[0][0] = 0; - ve->limits[1][0] = 1; /* 3*/ - ve->description = "Soft Blur (1x3) and (3x3)"; + ve->limits[1][0] = 2; /* 3*/ + ve->description = "Soft Blur"; ve->sub_format = -1; ve->extra_frame = 0; ve->has_user = 0; - ve->param_description = vje_build_param_list(ve->num_params, "Kernel size"); + ve->param_description = vje_build_param_list(ve->num_params, "Kernel Size"); + + ve->hints = vje_init_value_hint_list( ve->num_params ); + + vje_build_value_hint_list( ve->hints, ve->limits[1][0], 0, "1x3", "3x3","5x5"); + return ve; } +static void softblur5_apply(VJFrame *frame) +{ + int r, c, i, j; + uint8_t *restrict Y = frame->data[0]; + const int len = frame->len; + const int width = frame->width; + + for (r = 2 * width; r < len - 2 * width; r += width) { + #pragma omp simd + for (c = 2; c < width - 2; c++) { + int sum = 0; + for (i = -2; i <= 2; i++) { + for (j = -2; j <= 2; j++) { + sum += Y[r + i * width + c + j]; + } + } + Y[r + c] = sum / 25; + } + } +} + + static void softblur3_apply(VJFrame *frame ) { int r,c; - uint8_t *Y = frame->data[0]; + uint8_t *restrict Y = frame->data[0]; const int len = frame->len; const int width = frame->width; @@ -61,6 +88,7 @@ static void softblur3_apply(VJFrame *frame ) ) / 3; for(r=width; r < (len-width); r+=width) { +#pragma omp simd for(c=1; c < (width-1); c++) { Y[r+c] = ( Y[r - width + c - 1] + Y[r - width + c ] + @@ -80,188 +108,29 @@ static void softblur3_apply(VJFrame *frame ) Y[c] + Y[c + 1] ) / 3; - - } -#ifdef HAVE_ASM_MMX -/* mmx_blur() taken from libvisual plugins - * - * Libvisual-plugins - Standard plugins for libvisual - * - * Copyright (C) 2002, 2003, 2004, 2005 Dennis Smit - * - * Authors: Dennis Smit - */ - -static void mmx_blur(VJFrame *frame) -{ - __asm __volatile - ("\n\t pxor %%mm6, %%mm6" - ::); - - const int len = frame->len; - const int width = frame->width; - int scrsh = (len) >> 1; - int i; - - uint8_t *buf = frame->data[0]; - /* Prepare substraction register */ - for (i = 0; i < scrsh; i += 4) { - __asm __volatile - ("\n\t movd %[buf], %%mm0" - "\n\t movd %[add1], %%mm1" - "\n\t punpcklbw %%mm6, %%mm0" - "\n\t movd %[add2], %%mm2" - "\n\t punpcklbw %%mm6, %%mm1" - "\n\t movd %[add3], %%mm3" - "\n\t punpcklbw %%mm6, %%mm2" - "\n\t paddw %%mm1, %%mm0" - "\n\t punpcklbw %%mm6, %%mm3" - "\n\t paddw %%mm2, %%mm0" - "\n\t paddw %%mm3, %%mm0" - "\n\t psrlw $2, %%mm0" - "\n\t packuswb %%mm6, %%mm0" - "\n\t movd %%mm0, %[buf]" - :: [buf] "m" (*(buf + i)) - , [add1] "m" (*(buf + i + width)) - , [add2] "m" (*(buf + i + width + 1)) - , [add3] "m" (*(buf + i + width - 1)) - ); - // : "mm0", "mm1", "mm2", "mm3", "mm6"); - } - - for (i = len; i > scrsh; i -= 4) { - __asm __volatile - ("\n\t movd %[buf], %%mm0" - "\n\t movd %[add1], %%mm1" - "\n\t punpcklbw %%mm6, %%mm0" - "\n\t movd %[add2], %%mm2" - "\n\t punpcklbw %%mm6, %%mm1" - "\n\t movd %[add3], %%mm3" - "\n\t punpcklbw %%mm6, %%mm2" - "\n\t paddw %%mm1, %%mm0" - "\n\t punpcklbw %%mm6, %%mm3" - "\n\t paddw %%mm2, %%mm0" - "\n\t paddw %%mm3, %%mm0" - "\n\t psrlw $2, %%mm0" - "\n\t packuswb %%mm6, %%mm0" - "\n\t movd %%mm0, %[buf]" - :: [buf] "m" (*(buf + i)) - , [add1] "m" (*(buf + i + width)) - , [add2] "m" (*(buf + i + 1)) - , [add3] "m" (*(buf + i + width - 1)) - );// : "mm0", "mm1", "mm2", "mm3", "mm6"); - } - - do_emms; -} -#endif -#if !defined(HAVE_ASM_MMX) && !defined(HAVE_ARM) static void softblur1_apply( VJFrame *frame) { int r, c; const int len = frame->len; - uint8_t *Y = frame->data[0]; + uint8_t *restrict Y = frame->data[0]; const int width = frame->width; - for (r = 0; r < len; r += width) { - for (c = 1; c < width-1; c++) { - Y[c + r] = (Y[r + c - 1] + - Y[r + c] + - Y[r + c + 1] - ) / 3; +#pragma omp simd + for(r=0; r < len; r+=width) { + for(c=1; c < (width-1); c++) { + Y[r+c] = ( + Y[r + c + 1] + + Y[r + c] + + Y[r + c + 1] ) /3; + } - } + } } -#endif -#ifdef HAVE_ARM -static void softblur1_apply(VJFrame *frame) { - const int len = frame->len; - uint8_t *Y = frame->data[0]; - const int width = frame->width; - const int aligned_width = (width / 16) * 16; - - for (int r = 0; r < len; r += width) { - for (int c = 1; c < aligned_width - 1; c += 16) { - uint8x16_t prev = vld1q_u8(&Y[r + c - 1]); - uint8x16_t current = vld1q_u8(&Y[r + c]); - uint8x16_t next = vld1q_u8(&Y[r + c + 1]); - - uint16x8_t sum_low = vaddl_u8(vget_low_u8(prev), vget_low_u8(current)); - uint16x8_t sum_high = vaddl_u8(vget_high_u8(prev), vget_high_u8(current)); - sum_low = vaddw_u8(sum_low, vget_low_u8(next)); - sum_high = vaddw_u8(sum_high, vget_high_u8(next)); - - uint8x16_t result = vcombine_u8(vshrn_n_u16(sum_low, 2), vshrn_n_u16(sum_high, 2)); - vst1q_u8(&Y[r + c], result); - } - - for (int c = aligned_width; c < width - 1; c++) { - Y[r + c] = (Y[r + c - 1] + Y[r + c] + Y[r + c + 1]) / 3; - } - } -} -#endif - -#ifdef HAVE_ASM_SSE2 -static void sse2_blur(VJFrame *frame) { - const int len = frame->len; - const int width = frame->width; - int scrsh = len >> 1; - - uint8_t *buf = frame->data[0]; - - for (int i = 0; i < scrsh; i += 16) { - __m128i mm0 = _mm_load_si128((__m128i *)(buf + i)); - __m128i mm1 = _mm_load_si128((__m128i *)(buf + i + width)); - __m128i mm2 = _mm_load_si128((__m128i *)(buf + i + width + 1)); - __m128i mm3 = _mm_load_si128((__m128i *)(buf + i + width - 1)); - - mm0 = _mm_unpacklo_epi8(mm0, _mm_setzero_si128()); - mm1 = _mm_unpacklo_epi8(mm1, _mm_setzero_si128()); - mm2 = _mm_unpacklo_epi8(mm2, _mm_setzero_si128()); - mm3 = _mm_unpacklo_epi8(mm3, _mm_setzero_si128()); - - mm0 = _mm_add_epi16(mm0, mm1); - mm0 = _mm_add_epi16(mm0, mm2); - mm0 = _mm_add_epi16(mm0, mm3); - - mm0 = _mm_srli_epi16(mm0, 2); - - mm0 = _mm_packus_epi16(mm0, mm0); - - _mm_storel_epi64((__m128i *)(buf + i), mm0); - } - - for (int i = len - 16; i > scrsh; i -= 16) { - __m128i mm0 = _mm_load_si128((__m128i *)(buf + i)); - __m128i mm1 = _mm_load_si128((__m128i *)(buf + i + width)); - __m128i mm2 = _mm_load_si128((__m128i *)(buf + i + 1)); - __m128i mm3 = _mm_load_si128((__m128i *)(buf + i + width - 1)); - - mm0 = _mm_unpacklo_epi8(mm0, _mm_setzero_si128()); - mm1 = _mm_unpacklo_epi8(mm1, _mm_setzero_si128()); - mm2 = _mm_unpacklo_epi8(mm2, _mm_setzero_si128()); - mm3 = _mm_unpacklo_epi8(mm3, _mm_setzero_si128()); - - mm0 = _mm_add_epi16(mm0, mm1); - mm0 = _mm_add_epi16(mm0, mm2); - mm0 = _mm_add_epi16(mm0, mm3); - - mm0 = _mm_srli_epi16(mm0, 2); - - mm0 = _mm_packus_epi16(mm0, mm0); - - _mm_storel_epi64((__m128i *)(buf + i), mm0); - } - - _mm_empty(); -} -#endif void softblur_apply(void *ptr, VJFrame *frame, int *args) @@ -271,19 +140,14 @@ void softblur_apply(void *ptr, VJFrame *frame, int *args) switch (type) { case 0: -#ifdef HAVE_ASM_SSE2 - sse2_blur(frame); -#else -#ifdef HAVE_ASM_MMX - mmx_blur(frame); -#else softblur1_apply(frame); -#endif -#endif break; case 1: softblur3_apply(frame); break; + case 2: + softblur5_apply(frame); + break; } } @@ -291,18 +155,13 @@ void softblur_apply_internal(VJFrame *frame, int type) { switch (type) { case 0: -#ifdef HAVE_ASM_SSE2 - sse2_blur(frame); -#else -#ifdef HAVE_ASM_MMX - mmx_blur(frame); -#else softblur1_apply(frame); -#endif -#endif break; case 1: softblur3_apply(frame); break; + case 2: + softblur5_apply(frame); + break; } }