mirror of
https://github.com/game-stop/veejay.git
synced 2025-12-16 21:00:00 +01:00
ARMv8-a RPI4 memset, cacheline size WIP
This commit is contained in:
@@ -1756,28 +1756,38 @@ static void fast_memset(void * to, int val, size_t len)
|
|||||||
#ifdef HAVE_ARM_ASIMD
|
#ifdef HAVE_ARM_ASIMD
|
||||||
|
|
||||||
void memset_asimd_v3(void *dst, uint8_t val, size_t len) {
|
void memset_asimd_v3(void *dst, uint8_t val, size_t len) {
|
||||||
uint8x16_t value = vdupq_n_u8(val);
|
uint8x16_t value = vdupq_n_u8(val);
|
||||||
uint8_t *dst_bytes = (uint8_t *)dst;
|
uint8_t *dst_bytes = (uint8_t *)dst;
|
||||||
size_t num_blocks = len / 16;
|
|
||||||
|
const int CACHE_LINE_SIZE = cpu_cache_size();
|
||||||
|
size_t offset = (CACHE_LINE_SIZE - ((uintptr_t)dst_bytes % CACHE_LINE_SIZE)) % CACHE_LINE_SIZE;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < offset; i++) {
|
||||||
|
*dst_bytes++ = val;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
value = vld1q_u8(&val);
|
value = vld1q_u8(&val);
|
||||||
|
|
||||||
for (size_t i = 0; i < num_blocks; i++) {
|
size_t num_lines = len / CACHE_LINE_SIZE;
|
||||||
uint8x16_t dst_data = vld1q_u8(dst_bytes);
|
|
||||||
|
|
||||||
dst_data = vorrq_u8(dst_data, value);
|
for (size_t i = 0; i < num_lines; i++) {
|
||||||
|
for (size_t j = 0; j < CACHE_LINE_SIZE / 16; j++) {
|
||||||
vst1q_u8(dst_bytes, dst_data);
|
uint8x16_t dst_data = vld1q_u8(dst_bytes);
|
||||||
dst_bytes += 16;
|
dst_data = vorrq_u8(dst_data, value);
|
||||||
|
vst1q_u8(dst_bytes, dst_data);
|
||||||
|
dst_bytes += 16;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t remaining_bytes = len % 16;
|
size_t remaining_bytes = len % CACHE_LINE_SIZE;
|
||||||
|
|
||||||
for (size_t i = 0; i < remaining_bytes; i++) {
|
for (size_t i = 0; i < remaining_bytes; i++) {
|
||||||
*dst_bytes++ = val;
|
*dst_bytes++ = val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void memset_asimd(void *dst, uint8_t val, size_t len) {
|
void memset_asimd(void *dst, uint8_t val, size_t len) {
|
||||||
uint8x16_t value = vdupq_n_u8(val);
|
uint8x16_t value = vdupq_n_u8(val);
|
||||||
uint8_t *dst_bytes = (uint8_t *)dst;
|
uint8_t *dst_bytes = (uint8_t *)dst;
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ extern void yuyv_plane_init();
|
|||||||
extern void benchmark_tasks(int n_tasks, long n_frames, int w, int h);
|
extern void benchmark_tasks(int n_tasks, long n_frames, int w, int h);
|
||||||
extern void init_parallel_tasks(int n_tasks);
|
extern void init_parallel_tasks(int n_tasks);
|
||||||
static int MEM_ALIGNMENT_SIZE = 0;
|
static int MEM_ALIGNMENT_SIZE = 0;
|
||||||
static int CACHE_LINE_SIZE = 64;
|
extern int CACHE_LINE_SIZE = 64;
|
||||||
|
|
||||||
|
|
||||||
static int has_cpuid(void)
|
static int has_cpuid(void)
|
||||||
|
|||||||
@@ -65,4 +65,5 @@ extern void *vj_simple_pool_alloc( void *ptr, size_t s );
|
|||||||
extern void *vj_simple_pool_init( size_t s );
|
extern void *vj_simple_pool_init( size_t s );
|
||||||
extern void vj_simple_pool_reset( void *ptr );
|
extern void vj_simple_pool_reset( void *ptr );
|
||||||
extern void vj_mem_destroy();
|
extern void vj_mem_destroy();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user