mirror of
https://github.com/game-stop/veejay.git
synced 2025-12-15 04:10:00 +01:00
WIP memset neon 32
This commit is contained in:
@@ -1945,6 +1945,40 @@ void memset_asimd_64(uint8_t *dst, uint8_t value, size_t size) {
|
||||
remaining_bytes--;
|
||||
}
|
||||
}
|
||||
|
||||
void memset_asimd_32(uint8_t *dst, uint8_t value, size_t size) {
|
||||
uint8x16_t value_v = vdupq_n_u8(value);
|
||||
|
||||
size_t num_blocks = size / 32;
|
||||
size_t remaining_bytes = size % 32;
|
||||
|
||||
for (size_t i = 0; i < num_blocks; i++) {
|
||||
vst1q_u8(dst, value_v);
|
||||
dst += 16;
|
||||
vst1q_u8(dst, value_v);
|
||||
dst += 16;
|
||||
}
|
||||
|
||||
while (remaining_bytes >= 16) {
|
||||
vst1q_u8(dst, value_v);
|
||||
dst += 16;
|
||||
remaining_bytes -= 16;
|
||||
}
|
||||
|
||||
while (remaining_bytes >= 8) {
|
||||
uint64x1_t value_u64 = vdup_n_u64(*((uint64_t*)&value));
|
||||
vst1_u8(dst, vreinterpret_u8_u64(value_u64));
|
||||
dst += 8;
|
||||
remaining_bytes -= 8;
|
||||
}
|
||||
|
||||
while (remaining_bytes > 0) {
|
||||
*dst = value;
|
||||
dst++;
|
||||
remaining_bytes--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static struct {
|
||||
@@ -2036,7 +2070,7 @@ static struct {
|
||||
{ "Advanced SIMD memset()", (void*) memset_asimd, 0, AV_CPU_FLAG_ARMV8 },
|
||||
{ "Advanced SIMD memset() v4", (void*) memset_asimd_v4, 0, AV_CPU_FLAG_ARMV8 },
|
||||
{ "Advanced SIMD memset() with line size of 64", (void*) memset_asimd_64, 0, AV_CPU_FLAG_ARMV8 },
|
||||
|
||||
{ "Advanced SIMD memset() with line size of 32", (void*) memset_asimd_64, 0, AV_CPU_FLAG_ARMV8 },
|
||||
// { "Advanced SIMD memset() v3", (void*) memset_asimd_v3, 0, AV_CPU_FLAG_ARMV8 },
|
||||
// { "Advanced SIMD memset() v2", (void*) memset_asimd_v2, 0, AV_CPU_FLAG_ARMV8 },
|
||||
|
||||
|
||||
Reference in New Issue
Block a user