WIP memset neon 32

This commit is contained in:
veejay
2023-09-18 15:01:55 +02:00
parent 055c2d215c
commit c772deadb7

View File

@@ -1945,6 +1945,40 @@ void memset_asimd_64(uint8_t *dst, uint8_t value, size_t size) {
remaining_bytes--;
}
}
void memset_asimd_32(uint8_t *dst, uint8_t value, size_t size) {
uint8x16_t value_v = vdupq_n_u8(value);
size_t num_blocks = size / 32;
size_t remaining_bytes = size % 32;
for (size_t i = 0; i < num_blocks; i++) {
vst1q_u8(dst, value_v);
dst += 16;
vst1q_u8(dst, value_v);
dst += 16;
}
while (remaining_bytes >= 16) {
vst1q_u8(dst, value_v);
dst += 16;
remaining_bytes -= 16;
}
while (remaining_bytes >= 8) {
uint64x1_t value_u64 = vdup_n_u64(*((uint64_t*)&value));
vst1_u8(dst, vreinterpret_u8_u64(value_u64));
dst += 8;
remaining_bytes -= 8;
}
while (remaining_bytes > 0) {
*dst = value;
dst++;
remaining_bytes--;
}
}
#endif
static struct {
@@ -2036,7 +2070,7 @@ static struct {
{ "Advanced SIMD memset()", (void*) memset_asimd, 0, AV_CPU_FLAG_ARMV8 },
{ "Advanced SIMD memset() v4", (void*) memset_asimd_v4, 0, AV_CPU_FLAG_ARMV8 },
{ "Advanced SIMD memset() with line size of 64", (void*) memset_asimd_64, 0, AV_CPU_FLAG_ARMV8 },
{ "Advanced SIMD memset() with line size of 32", (void*) memset_asimd_64, 0, AV_CPU_FLAG_ARMV8 },
// { "Advanced SIMD memset() v3", (void*) memset_asimd_v3, 0, AV_CPU_FLAG_ARMV8 },
// { "Advanced SIMD memset() v2", (void*) memset_asimd_v2, 0, AV_CPU_FLAG_ARMV8 },