ARMv8-a RPI4 memset, cacheline size WIP

This commit is contained in:
veejay
2023-09-18 01:29:36 +02:00
parent c796c99f00
commit 057ce7a549
3 changed files with 22 additions and 11 deletions

View File

@@ -1758,26 +1758,36 @@ static void fast_memset(void * to, int val, size_t len)
void memset_asimd_v3(void *dst, uint8_t val, size_t len) {
uint8x16_t value = vdupq_n_u8(val);
uint8_t *dst_bytes = (uint8_t *)dst;
size_t num_blocks = len / 16;
const int CACHE_LINE_SIZE = cpu_cache_size();
size_t offset = (CACHE_LINE_SIZE - ((uintptr_t)dst_bytes % CACHE_LINE_SIZE)) % CACHE_LINE_SIZE;
for (size_t i = 0; i < offset; i++) {
*dst_bytes++ = val;
len--;
}
value = vld1q_u8(&val);
for (size_t i = 0; i < num_blocks; i++) {
size_t num_lines = len / CACHE_LINE_SIZE;
for (size_t i = 0; i < num_lines; i++) {
for (size_t j = 0; j < CACHE_LINE_SIZE / 16; j++) {
uint8x16_t dst_data = vld1q_u8(dst_bytes);
dst_data = vorrq_u8(dst_data, value);
vst1q_u8(dst_bytes, dst_data);
dst_bytes += 16;
}
}
size_t remaining_bytes = len % 16;
size_t remaining_bytes = len % CACHE_LINE_SIZE;
for (size_t i = 0; i < remaining_bytes; i++) {
*dst_bytes++ = val;
}
}
void memset_asimd(void *dst, uint8_t val, size_t len) {
uint8x16_t value = vdupq_n_u8(val);
uint8_t *dst_bytes = (uint8_t *)dst;

View File

@@ -33,7 +33,7 @@ extern void yuyv_plane_init();
extern void benchmark_tasks(int n_tasks, long n_frames, int w, int h);
extern void init_parallel_tasks(int n_tasks);
static int MEM_ALIGNMENT_SIZE = 0;
static int CACHE_LINE_SIZE = 64;
extern int CACHE_LINE_SIZE = 64;
static int has_cpuid(void)

View File

@@ -65,4 +65,5 @@ extern void *vj_simple_pool_alloc( void *ptr, size_t s );
extern void *vj_simple_pool_init( size_t s );
extern void vj_simple_pool_reset( void *ptr );
extern void vj_mem_destroy();
#endif