mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-12-11 17:30:00 +01:00
This patch adds a fully-featured level 3 and 4 decoder for FFv1, supporting Golomb and all Range coding variants, all pixel formats, and all features, except for the newly added floating-point formats. On a 6000 Ada, for 3840x2160 bgr0 content at 50Mbps (standard desktop recording), it is able to do 400fps. An Alder Lake with 24 threads can barely do 100fps.
1318 lines
51 KiB
C
1318 lines
51 KiB
C
/*
|
|
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "vulkan_decode.h"
|
|
#include "hwaccel_internal.h"
|
|
|
|
#include "ffv1.h"
|
|
#include "ffv1_vulkan.h"
|
|
#include "libavutil/vulkan_spirv.h"
|
|
#include "libavutil/mem.h"
|
|
|
|
extern const char *ff_source_common_comp;
|
|
extern const char *ff_source_rangecoder_comp;
|
|
extern const char *ff_source_ffv1_vlc_comp;
|
|
extern const char *ff_source_ffv1_common_comp;
|
|
extern const char *ff_source_ffv1_dec_setup_comp;
|
|
extern const char *ff_source_ffv1_reset_comp;
|
|
extern const char *ff_source_ffv1_dec_comp;
|
|
extern const char *ff_source_ffv1_dec_rct_comp;
|
|
|
|
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
|
|
.codec_id = AV_CODEC_ID_FFV1,
|
|
.decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
|
|
.queue_flags = VK_QUEUE_COMPUTE_BIT,
|
|
};
|
|
|
|
#define HOST_MAP
|
|
|
|
typedef struct FFv1VulkanDecodePicture {
|
|
FFVulkanDecodePicture vp;
|
|
|
|
AVBufferRef *tmp_data;
|
|
|
|
AVBufferRef *slice_state;
|
|
uint32_t plane_state_size;
|
|
uint32_t slice_state_size;
|
|
uint32_t slice_data_size;
|
|
uint32_t max_context_count;
|
|
|
|
AVBufferRef *slice_offset_buf;
|
|
uint32_t *slice_offset;
|
|
int slice_num;
|
|
|
|
AVBufferRef *slice_status_buf;
|
|
int crc_checked;
|
|
} FFv1VulkanDecodePicture;
|
|
|
|
typedef struct FFv1VulkanDecodeContext {
|
|
AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */
|
|
|
|
FFVulkanShader setup;
|
|
FFVulkanShader reset[2]; /* AC/Golomb */
|
|
FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
|
|
FFVulkanShader rct[2]; /* 16/32 bit */
|
|
|
|
FFVkBuffer rangecoder_static_buf;
|
|
FFVkBuffer quant_buf;
|
|
FFVkBuffer crc_tab_buf;
|
|
|
|
AVBufferPool *slice_state_pool;
|
|
AVBufferPool *tmp_data_pool;
|
|
AVBufferPool *slice_offset_pool;
|
|
AVBufferPool *slice_status_pool;
|
|
} FFv1VulkanDecodeContext;
|
|
|
|
typedef struct FFv1VkParameters {
|
|
uint32_t context_count[MAX_QUANT_TABLES];
|
|
|
|
VkDeviceAddress slice_data;
|
|
VkDeviceAddress slice_state;
|
|
VkDeviceAddress scratch_data;
|
|
|
|
uint32_t img_size[2];
|
|
uint32_t chroma_shift[2];
|
|
|
|
uint32_t plane_state_size;
|
|
uint32_t crcref;
|
|
|
|
uint8_t bits_per_raw_sample;
|
|
uint8_t quant_table_count;
|
|
uint8_t version;
|
|
uint8_t micro_version;
|
|
uint8_t key_frame;
|
|
uint8_t planes;
|
|
uint8_t codec_planes;
|
|
uint8_t color_planes;
|
|
uint8_t transparency;
|
|
uint8_t colorspace;
|
|
uint8_t ec;
|
|
uint8_t golomb;
|
|
uint8_t check_crc;
|
|
uint8_t padding[3];
|
|
} FFv1VkParameters;
|
|
|
|
static void add_push_data(FFVulkanShader *shd)
|
|
{
|
|
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
|
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
|
|
GLSLC(0, );
|
|
GLSLC(1, u8buf slice_data; );
|
|
GLSLC(1, u8buf slice_state; );
|
|
GLSLC(1, u8buf scratch_data; );
|
|
GLSLC(0, );
|
|
GLSLC(1, uvec2 img_size; );
|
|
GLSLC(1, uvec2 chroma_shift; );
|
|
GLSLC(0, );
|
|
GLSLC(1, uint plane_state_size; );
|
|
GLSLC(1, uint32_t crcref; );
|
|
GLSLC(0, );
|
|
GLSLC(1, uint8_t bits_per_raw_sample; );
|
|
GLSLC(1, uint8_t quant_table_count; );
|
|
GLSLC(1, uint8_t version; );
|
|
GLSLC(1, uint8_t micro_version; );
|
|
GLSLC(1, uint8_t key_frame; );
|
|
GLSLC(1, uint8_t planes; );
|
|
GLSLC(1, uint8_t codec_planes; );
|
|
GLSLC(1, uint8_t color_planes; );
|
|
GLSLC(1, uint8_t transparency; );
|
|
GLSLC(1, uint8_t colorspace; );
|
|
GLSLC(1, uint8_t ec; );
|
|
GLSLC(1, uint8_t golomb; );
|
|
GLSLC(1, uint8_t check_crc; );
|
|
GLSLC(1, uint8_t padding[3]; );
|
|
GLSLC(0, }; );
|
|
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
|
|
VK_SHADER_STAGE_COMPUTE_BIT);
|
|
}
|
|
|
|
static int vk_ffv1_start_frame(AVCodecContext *avctx,
|
|
const AVBufferRef *buffer_ref,
|
|
av_unused const uint8_t *buffer,
|
|
av_unused uint32_t size)
|
|
{
|
|
int err;
|
|
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
|
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
|
FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
|
|
FFV1Context *f = avctx->priv_data;
|
|
|
|
FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
|
|
FFVulkanDecodePicture *vp = &fp->vp;
|
|
|
|
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
|
enum AVPixelFormat sw_format = hwfc->sw_format;
|
|
|
|
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
|
|
!(sw_format == AV_PIX_FMT_YA8);
|
|
|
|
fp->slice_num = 0;
|
|
|
|
for (int i = 0; i < f->quant_table_count; i++)
|
|
fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
|
|
|
|
/* Allocate slice buffer data */
|
|
if (f->ac == AC_GOLOMB_RICE)
|
|
fp->plane_state_size = 8;
|
|
else
|
|
fp->plane_state_size = CONTEXT_SIZE;
|
|
|
|
fp->plane_state_size *= fp->max_context_count;
|
|
fp->slice_state_size = fp->plane_state_size*f->plane_count;
|
|
|
|
fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
|
|
fp->slice_state_size += fp->slice_data_size;
|
|
fp->slice_state_size = FFALIGN(fp->slice_state_size, 8);
|
|
|
|
fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK);
|
|
|
|
/* Host map the input slices data if supported */
|
|
if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
|
|
err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
|
|
buffer_ref,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
/* Allocate slice state data */
|
|
if (f->picture.f->flags & AV_FRAME_FLAG_KEY) {
|
|
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool,
|
|
&fp->slice_state,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
NULL, f->max_slice_count*fp->slice_state_size,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
|
if (err < 0)
|
|
return err;
|
|
} else {
|
|
FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
|
|
fp->slice_state = av_buffer_ref(fpl->slice_state);
|
|
if (!fp->slice_state)
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
|
|
/* Allocate temporary data buffer */
|
|
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
|
|
&fp->tmp_data,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
NULL, f->max_slice_count*CONTEXT_SIZE,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Allocate slice offsets buffer */
|
|
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
|
|
&fp->slice_offset_buf,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
NULL, 2*f->max_slice_count*sizeof(uint32_t),
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Allocate slice status buffer */
|
|
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool,
|
|
&fp->slice_status_buf,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
NULL, f->max_slice_count*sizeof(uint32_t),
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Prepare frame to be used */
|
|
err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1,
|
|
FF_VK_REP_NATIVE, 0);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Create a temporaty frame for RGB */
|
|
if (is_rgb) {
|
|
vp->dpb_frame = av_frame_alloc();
|
|
if (!vp->dpb_frame)
|
|
return AVERROR(ENOMEM);
|
|
|
|
err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit],
|
|
vp->dpb_frame, 0);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vk_ffv1_decode_slice(AVCodecContext *avctx,
|
|
const uint8_t *data,
|
|
uint32_t size)
|
|
{
|
|
FFV1Context *f = avctx->priv_data;
|
|
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
|
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
|
|
|
FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
|
|
FFVulkanDecodePicture *vp = &fp->vp;
|
|
|
|
FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
|
|
|
|
if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
|
|
FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
|
|
AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t),
|
|
data - slices_buf->mapped_mem);
|
|
AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t),
|
|
size);
|
|
|
|
fp->slice_num++;
|
|
} else {
|
|
int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
|
|
&fp->slice_num,
|
|
(const uint32_t **)&fp->slice_offset);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t),
|
|
fp->slice_offset[fp->slice_num - 1]);
|
|
AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t),
|
|
size);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|
{
|
|
int err;
|
|
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
|
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
|
FFVulkanFunctions *vk = &ctx->s.vkfn;
|
|
|
|
FFV1Context *f = avctx->priv_data;
|
|
FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
|
|
FFv1VkParameters pd;
|
|
FFv1VkResetParameters pd_reset;
|
|
|
|
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
|
enum AVPixelFormat sw_format = hwfc->sw_format;
|
|
|
|
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
|
|
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
|
|
!(sw_format == AV_PIX_FMT_YA8);
|
|
int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components;
|
|
|
|
FFVulkanShader *reset_shader;
|
|
FFVulkanShader *decode_shader;
|
|
|
|
FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
|
|
FFVulkanDecodePicture *vp = &fp->vp;
|
|
|
|
FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
|
|
FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data;
|
|
FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
|
|
FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
|
|
|
|
FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
|
|
VkImageView rct_image_views[AV_NUM_DATA_POINTERS];
|
|
|
|
AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
|
|
VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->view.out;
|
|
|
|
VkImageMemoryBarrier2 img_bar[37];
|
|
int nb_img_bar = 0;
|
|
VkBufferMemoryBarrier2 buf_bar[8];
|
|
int nb_buf_bar = 0;
|
|
|
|
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
|
|
ff_vk_exec_start(&ctx->s, exec);
|
|
|
|
/* Prepare deps */
|
|
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
|
|
err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
|
|
f->picture.f);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
if (is_rgb) {
|
|
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_create_imageviews(&ctx->s, exec, rct_image_views,
|
|
vp->dpb_frame, FF_VK_REP_NATIVE));
|
|
}
|
|
|
|
if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
|
|
FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
|
|
FFVulkanDecodePicture *vpl = &fpl->vp;
|
|
|
|
/* Wait on the previous frame */
|
|
RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
|
|
}
|
|
|
|
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1));
|
|
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1));
|
|
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
|
|
vp->slices_buf = NULL;
|
|
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
|
|
fp->slice_offset_buf = NULL;
|
|
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
|
|
fp->tmp_data = NULL;
|
|
|
|
/* Entry barrier for the slice state */
|
|
if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
|
|
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
|
.srcStageMask = slice_state->stage,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
.srcAccessMask = slice_state->access,
|
|
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slice_state->buf,
|
|
.offset = 0,
|
|
.size = VK_WHOLE_SIZE,
|
|
};
|
|
}
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pBufferMemoryBarriers = buf_bar,
|
|
.bufferMemoryBarrierCount = nb_buf_bar,
|
|
});
|
|
if (nb_buf_bar) {
|
|
slice_state->stage = buf_bar[1].dstStageMask;
|
|
slice_state->access = buf_bar[1].dstAccessMask;
|
|
nb_buf_bar = 0;
|
|
}
|
|
|
|
/* Setup shader */
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
|
|
1, 0, 0,
|
|
slice_state,
|
|
0, fp->slice_data_size*f->slice_count,
|
|
VK_FORMAT_UNDEFINED);
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
|
|
1, 1, 0,
|
|
slice_offset,
|
|
0, 2*f->slice_count*sizeof(uint32_t),
|
|
VK_FORMAT_UNDEFINED);
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
|
|
1, 2, 0,
|
|
slice_status,
|
|
0, f->slice_count*sizeof(uint32_t),
|
|
VK_FORMAT_UNDEFINED);
|
|
|
|
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
|
|
pd = (FFv1VkParameters) {
|
|
/* context_count */
|
|
|
|
.slice_data = slices_buf->address,
|
|
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
|
.scratch_data = tmp_data->address,
|
|
|
|
.img_size[0] = f->picture.f->width,
|
|
.img_size[1] = f->picture.f->height,
|
|
.chroma_shift[0] = f->chroma_h_shift,
|
|
.chroma_shift[1] = f->chroma_v_shift,
|
|
|
|
.plane_state_size = fp->plane_state_size,
|
|
.crcref = f->crcref,
|
|
|
|
.bits_per_raw_sample = bits,
|
|
.quant_table_count = f->quant_table_count,
|
|
.version = f->version,
|
|
.micro_version = f->micro_version,
|
|
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
|
|
.planes = av_pix_fmt_count_planes(sw_format),
|
|
.codec_planes = f->plane_count,
|
|
.color_planes = color_planes,
|
|
.transparency = f->transparency,
|
|
.colorspace = f->colorspace,
|
|
.ec = f->ec,
|
|
.golomb = f->ac == AC_GOLOMB_RICE,
|
|
.check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
|
|
};
|
|
for (int i = 0; i < MAX_QUANT_TABLES; i++)
|
|
pd.context_count[i] = f->context_count[i];
|
|
|
|
ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(pd), &pd);
|
|
|
|
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
|
|
|
/* Reset shader */
|
|
reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
|
|
1, 0, 0,
|
|
slice_state,
|
|
0, fp->slice_data_size*f->slice_count,
|
|
VK_FORMAT_UNDEFINED);
|
|
|
|
ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
|
|
|
|
pd_reset = (FFv1VkResetParameters) {
|
|
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
|
.plane_state_size = fp->plane_state_size,
|
|
.context_count = fp->max_context_count,
|
|
.codec_planes = f->plane_count,
|
|
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
|
|
.version = f->version,
|
|
.micro_version = f->micro_version,
|
|
};
|
|
ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(pd_reset), &pd_reset);
|
|
|
|
/* Sync between setup and reset shaders */
|
|
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
|
.srcStageMask = slice_state->stage,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
.srcAccessMask = slice_state->access,
|
|
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
|
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slice_state->buf,
|
|
.offset = 0,
|
|
.size = fp->slice_data_size*f->slice_count,
|
|
};
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pBufferMemoryBarriers = buf_bar,
|
|
.bufferMemoryBarrierCount = nb_buf_bar,
|
|
});
|
|
slice_state->stage = buf_bar[0].dstStageMask;
|
|
slice_state->access = buf_bar[0].dstAccessMask;
|
|
nb_buf_bar = 0;
|
|
|
|
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
|
|
f->plane_count);
|
|
|
|
/* Decode */
|
|
decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
|
|
1, 0, 0,
|
|
slice_state,
|
|
0, fp->slice_data_size*f->slice_count,
|
|
VK_FORMAT_UNDEFINED);
|
|
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
|
|
decode_dst, decode_dst_view,
|
|
1, 1,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
|
|
ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
|
|
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(pd), &pd);
|
|
|
|
/* Sync between reset and decode shaders */
|
|
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
|
.srcStageMask = slice_state->stage,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
.srcAccessMask = slice_state->access,
|
|
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
|
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = slice_state->buf,
|
|
.offset = fp->slice_data_size*f->slice_count,
|
|
.size = slice_state->size - fp->slice_data_size*f->slice_count,
|
|
};
|
|
|
|
/* Input frame barrier */
|
|
ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pImageMemoryBarriers = img_bar,
|
|
.imageMemoryBarrierCount = nb_img_bar,
|
|
.pBufferMemoryBarriers = buf_bar,
|
|
.bufferMemoryBarrierCount = nb_buf_bar,
|
|
});
|
|
slice_state->stage = buf_bar[0].dstStageMask;
|
|
slice_state->access = buf_bar[0].dstAccessMask;
|
|
nb_img_bar = 0;
|
|
nb_buf_bar = 0;
|
|
|
|
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
|
|
|
/* RCT */
|
|
if (is_rgb) {
|
|
FFVulkanShader *rct_shader = &fv->rct[f->use32bit];
|
|
FFv1VkRCTParameters pd_rct;
|
|
|
|
ff_vk_shader_update_desc_buffer(&ctx->s, exec, rct_shader,
|
|
1, 0, 0,
|
|
slice_state,
|
|
0, fp->slice_data_size*f->slice_count,
|
|
VK_FORMAT_UNDEFINED);
|
|
ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
|
|
decode_dst, decode_dst_view,
|
|
1, 1,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
|
|
f->picture.f, vp->view.out,
|
|
1, 2,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
|
|
ff_vk_exec_bind_shader(&ctx->s, exec, rct_shader);
|
|
|
|
pd_rct = (FFv1VkRCTParameters) {
|
|
.offset = 1 << bits,
|
|
.bits = bits,
|
|
.planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
|
|
(ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
|
|
.color_planes = color_planes,
|
|
.transparency = f->transparency,
|
|
};
|
|
|
|
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
|
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
|
|
sw_format == AV_PIX_FMT_GBRP14)
|
|
memcpy(pd_rct.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
|
|
else if (sw_format == AV_PIX_FMT_X2BGR10)
|
|
memcpy(pd_rct.fmt_lut, (int [4]) { 0, 2, 1, 3 }, 4*sizeof(int));
|
|
else
|
|
ff_vk_set_perm(sw_format, pd_rct.fmt_lut, 0);
|
|
|
|
ff_vk_shader_update_push_const(&ctx->s, exec, rct_shader,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(pd_rct), &pd_rct);
|
|
|
|
ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pImageMemoryBarriers = img_bar,
|
|
.imageMemoryBarrierCount = nb_img_bar,
|
|
});
|
|
nb_img_bar = 0;
|
|
|
|
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
|
}
|
|
|
|
err = ff_vk_exec_submit(&ctx->s, exec);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* We don't need the temporary frame after decoding */
|
|
av_frame_free(&vp->dpb_frame);
|
|
|
|
fail:
|
|
return 0;
|
|
}
|
|
|
|
static void define_shared_code(FFVulkanShader *shd, int use32bit)
|
|
{
|
|
int smp_bits = use32bit ? 32 : 16;
|
|
|
|
GLSLC(0, #define DECODE );
|
|
|
|
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
|
|
|
GLSLF(0, #define TYPE int%i_t ,smp_bits);
|
|
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
|
|
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
|
|
GLSLD(ff_source_rangecoder_comp);
|
|
GLSLD(ff_source_ffv1_common_comp);
|
|
}
|
|
|
|
static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
|
|
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
|
FFVulkanShader *shd)
|
|
{
|
|
int err;
|
|
FFVulkanDescriptorSetBinding *desc_set;
|
|
|
|
uint8_t *spv_data;
|
|
size_t spv_len;
|
|
void *spv_opaque = NULL;
|
|
|
|
RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
(const char *[]) { "GL_EXT_buffer_reference",
|
|
"GL_EXT_buffer_reference2" }, 2,
|
|
1, 1, 1,
|
|
0));
|
|
|
|
/* Common codec header */
|
|
GLSLD(ff_source_common_comp);
|
|
|
|
add_push_data(shd);
|
|
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
|
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "rangecoder_static_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "uint8_t zero_one_state[512];",
|
|
},
|
|
{
|
|
.name = "crc_ieee_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "uint32_t crc_ieee[256];",
|
|
},
|
|
{
|
|
.name = "quant_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
|
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
|
},
|
|
};
|
|
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
|
|
|
|
define_shared_code(shd, 0 /* Irrelevant */);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "slice_data_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.buf_content = "SliceContext slice_ctx",
|
|
.buf_elems = f->max_slice_count,
|
|
},
|
|
{
|
|
.name = "slice_offsets_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_quali = "readonly",
|
|
.buf_content = "uint32_t slice_offsets",
|
|
.buf_elems = 2*f->max_slice_count,
|
|
},
|
|
{
|
|
.name = "slice_status_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_quali = "writeonly",
|
|
.buf_content = "uint32_t slice_crc_mismatch",
|
|
.buf_elems = f->max_slice_count,
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
|
|
|
|
GLSLD(ff_source_ffv1_dec_setup_comp);
|
|
|
|
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
|
&spv_opaque));
|
|
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
|
|
|
RET(ff_vk_shader_register_exec(s, pool, shd));
|
|
|
|
fail:
|
|
if (spv_opaque)
|
|
spv->free_shader(spv, &spv_opaque);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
|
|
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
|
FFVulkanShader *shd, int ac)
|
|
{
|
|
int err;
|
|
FFVulkanDescriptorSetBinding *desc_set;
|
|
|
|
uint8_t *spv_data;
|
|
size_t spv_len;
|
|
void *spv_opaque = NULL;
|
|
int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
|
|
|
|
RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
(const char *[]) { "GL_EXT_buffer_reference",
|
|
"GL_EXT_buffer_reference2" }, 2,
|
|
wg_dim, 1, 1,
|
|
0));
|
|
|
|
if (ac == AC_GOLOMB_RICE)
|
|
av_bprintf(&shd->src, "#define GOLOMB\n");
|
|
|
|
/* Common codec header */
|
|
GLSLD(ff_source_common_comp);
|
|
|
|
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
|
GLSLC(1, u8buf slice_state; );
|
|
GLSLC(1, uint plane_state_size; );
|
|
GLSLC(1, uint context_count; );
|
|
GLSLC(1, uint8_t codec_planes; );
|
|
GLSLC(1, uint8_t key_frame; );
|
|
GLSLC(1, uint8_t version; );
|
|
GLSLC(1, uint8_t micro_version; );
|
|
GLSLC(1, uint8_t padding[1]; );
|
|
GLSLC(0, }; );
|
|
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
|
|
VK_SHADER_STAGE_COMPUTE_BIT);
|
|
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
|
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "rangecoder_static_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "uint8_t zero_one_state[512];",
|
|
},
|
|
{
|
|
.name = "quant_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
|
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
|
|
|
define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
|
|
if (ac == AC_GOLOMB_RICE)
|
|
GLSLD(ff_source_ffv1_vlc_comp);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "slice_data_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.mem_quali = "readonly",
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.buf_content = "SliceContext slice_ctx",
|
|
.buf_elems = f->max_slice_count,
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
|
|
|
|
GLSLD(ff_source_ffv1_reset_comp);
|
|
|
|
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
|
&spv_opaque));
|
|
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
|
|
|
RET(ff_vk_shader_register_exec(s, pool, shd));
|
|
|
|
fail:
|
|
if (spv_opaque)
|
|
spv->free_shader(spv, &spv_opaque);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
|
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
|
FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
|
|
int use32bit, int ac, int rgb)
|
|
{
|
|
int err;
|
|
FFVulkanDescriptorSetBinding *desc_set;
|
|
|
|
uint8_t *spv_data;
|
|
size_t spv_len;
|
|
void *spv_opaque = NULL;
|
|
|
|
RET(ff_vk_shader_init(s, shd, "ffv1_dec",
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
(const char *[]) { "GL_EXT_buffer_reference",
|
|
"GL_EXT_buffer_reference2" }, 2,
|
|
1, 1, 1,
|
|
0));
|
|
|
|
if (ac == AC_GOLOMB_RICE)
|
|
av_bprintf(&shd->src, "#define GOLOMB\n");
|
|
|
|
if (rgb)
|
|
av_bprintf(&shd->src, "#define RGB\n");
|
|
|
|
/* Common codec header */
|
|
GLSLD(ff_source_common_comp);
|
|
|
|
add_push_data(shd);
|
|
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
|
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "rangecoder_static_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "uint8_t zero_one_state[512];",
|
|
},
|
|
{
|
|
.name = "quant_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
|
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
|
},
|
|
};
|
|
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
|
|
|
define_shared_code(shd, use32bit);
|
|
if (ac == AC_GOLOMB_RICE)
|
|
GLSLD(ff_source_ffv1_vlc_comp);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "slice_data_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.buf_content = "SliceContext slice_ctx",
|
|
.buf_elems = f->max_slice_count,
|
|
},
|
|
{
|
|
.name = "dst",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
.dimensions = 2,
|
|
.mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
|
|
FF_VK_REP_NATIVE),
|
|
.elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
|
|
|
|
GLSLD(ff_source_ffv1_dec_comp);
|
|
|
|
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
|
&spv_opaque));
|
|
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
|
|
|
RET(ff_vk_shader_register_exec(s, pool, shd));
|
|
|
|
fail:
|
|
if (spv_opaque)
|
|
spv->free_shader(spv, &spv_opaque);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int init_rct_shader(FFV1Context *f, FFVulkanContext *s,
|
|
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
|
FFVulkanShader *shd, int use32bit,
|
|
AVHWFramesContext *src_ctx, AVHWFramesContext *dst_ctx)
|
|
{
|
|
int err;
|
|
FFVulkanDescriptorSetBinding *desc_set;
|
|
|
|
uint8_t *spv_data;
|
|
size_t spv_len;
|
|
void *spv_opaque = NULL;
|
|
int wg_count = sqrt(s->props.properties.limits.maxComputeWorkGroupInvocations);
|
|
|
|
RET(ff_vk_shader_init(s, shd, "ffv1_rct",
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
(const char *[]) { "GL_EXT_buffer_reference",
|
|
"GL_EXT_buffer_reference2" }, 2,
|
|
wg_count, wg_count, 1,
|
|
0));
|
|
|
|
/* Common codec header */
|
|
GLSLD(ff_source_common_comp);
|
|
|
|
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
|
GLSLC(1, ivec4 fmt_lut; );
|
|
GLSLC(1, int offset; );
|
|
GLSLC(1, uint8_t bits; );
|
|
GLSLC(1, uint8_t planar_rgb; );
|
|
GLSLC(1, uint8_t color_planes; );
|
|
GLSLC(1, uint8_t transparency; );
|
|
GLSLC(1, uint8_t version; );
|
|
GLSLC(1, uint8_t micro_version; );
|
|
GLSLC(1, uint8_t padding[2]; );
|
|
GLSLC(0, }; );
|
|
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
|
|
VK_SHADER_STAGE_COMPUTE_BIT);
|
|
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
|
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "rangecoder_static_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "uint8_t zero_one_state[512];",
|
|
},
|
|
{
|
|
.name = "quant_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.mem_layout = "scalar",
|
|
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
|
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
|
|
|
define_shared_code(shd, use32bit);
|
|
|
|
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
{
|
|
.name = "slice_data_buf",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.mem_quali = "readonly",
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.buf_content = "SliceContext slice_ctx",
|
|
.buf_elems = f->max_slice_count,
|
|
},
|
|
{
|
|
.name = "src",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
.dimensions = 2,
|
|
.mem_layout = ff_vk_shader_rep_fmt(src_ctx->sw_format,
|
|
FF_VK_REP_NATIVE),
|
|
.mem_quali = "readonly",
|
|
.elems = av_pix_fmt_count_planes(src_ctx->sw_format),
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
},
|
|
{
|
|
.name = "dst",
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
.dimensions = 2,
|
|
.mem_layout = ff_vk_shader_rep_fmt(dst_ctx->sw_format,
|
|
FF_VK_REP_NATIVE),
|
|
.mem_quali = "writeonly",
|
|
.elems = av_pix_fmt_count_planes(dst_ctx->sw_format),
|
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
},
|
|
};
|
|
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
|
|
|
|
GLSLD(ff_source_ffv1_dec_rct_comp);
|
|
|
|
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
|
&spv_opaque));
|
|
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
|
|
|
RET(ff_vk_shader_register_exec(s, pool, shd));
|
|
|
|
fail:
|
|
if (spv_opaque)
|
|
spv->free_shader(spv, &spv_opaque);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
|
|
AVBufferRef **dst, enum AVPixelFormat sw_format)
|
|
{
|
|
int err;
|
|
AVHWFramesContext *frames_ctx;
|
|
AVVulkanFramesContext *vk_frames;
|
|
|
|
*dst = av_hwframe_ctx_alloc(s->device_ref);
|
|
if (!(*dst))
|
|
return AVERROR(ENOMEM);
|
|
|
|
frames_ctx = (AVHWFramesContext *)((*dst)->data);
|
|
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
|
frames_ctx->sw_format = sw_format;
|
|
frames_ctx->width = FFALIGN(s->frames->width, 32);
|
|
frames_ctx->height = FFALIGN(s->frames->height, 32);
|
|
|
|
vk_frames = frames_ctx->hwctx;
|
|
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
|
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
|
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
|
|
|
err = av_hwframe_ctx_init(*dst);
|
|
if (err < 0) {
|
|
av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
|
|
av_get_pix_fmt_name(sw_format), av_err2str(err));
|
|
av_buffer_unref(dst);
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
|
|
{
|
|
FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
|
|
|
|
ff_vk_shader_free(&ctx->s, &fv->setup);
|
|
|
|
for (int i = 0; i < 2; i++) /* 16/32 bit */
|
|
av_buffer_unref(&fv->intermediate_frames_ref[i]);
|
|
|
|
for (int i = 0; i < 2; i++) /* AC/Golomb */
|
|
ff_vk_shader_free(&ctx->s, &fv->reset[i]);
|
|
|
|
for (int i = 0; i < 2; i++) /* 16/32 bit */
|
|
for (int j = 0; j < 2; j++) /* AC/Golomb */
|
|
for (int k = 0; k < 2; k++) /* Normal/RGB */
|
|
ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
|
|
|
|
for (int i = 0; i < 2; i++) /* 16/32 bit */
|
|
ff_vk_shader_free(&ctx->s, &fv->rct[i]);
|
|
|
|
ff_vk_free_buf(&ctx->s, &fv->quant_buf);
|
|
ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
|
|
ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
|
|
|
|
av_buffer_pool_uninit(&fv->tmp_data_pool);
|
|
av_buffer_pool_uninit(&fv->slice_state_pool);
|
|
av_buffer_pool_uninit(&fv->slice_offset_pool);
|
|
av_buffer_pool_uninit(&fv->slice_status_pool);
|
|
}
|
|
|
|
static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
|
{
|
|
int err;
|
|
FFV1Context *f = avctx->priv_data;
|
|
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
|
FFVulkanDecodeShared *ctx = NULL;
|
|
FFv1VulkanDecodeContext *fv;
|
|
FFVkSPIRVCompiler *spv;
|
|
|
|
if (f->version < 3 ||
|
|
(f->version == 4 && f->micro_version > 3))
|
|
return AVERROR(ENOTSUP);
|
|
|
|
spv = ff_vk_spirv_init();
|
|
if (!spv) {
|
|
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
|
return AVERROR_EXTERNAL;
|
|
}
|
|
|
|
err = ff_vk_decode_init(avctx);
|
|
if (err < 0)
|
|
return err;
|
|
ctx = dec->shared_ctx;
|
|
|
|
switch (ctx->s.driver_props.driverID) {
|
|
case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
|
|
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
|
|
if (avctx->strict_std_compliance < FF_COMPLIANCE_UNOFFICIAL) {
|
|
av_log(avctx, AV_LOG_ERROR,
|
|
"Intel's drivers are unsupported, use -strict -1 to enable acceleration.\n");
|
|
return AVERROR(ENOTSUP);
|
|
} else {
|
|
av_log(avctx, AV_LOG_WARNING,
|
|
"Enabling acceleration on Intel's drivers.\n");
|
|
}
|
|
break;
|
|
};
|
|
|
|
fv = ctx->sd_ctx = av_mallocz(sizeof(*fv));
|
|
if (!fv) {
|
|
err = AVERROR(ENOMEM);
|
|
goto fail;
|
|
}
|
|
|
|
ctx->sd_ctx_free = &vk_decode_ffv1_uninit;
|
|
|
|
/* Intermediate frame pool for RCT */
|
|
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
|
err = init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i],
|
|
i ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
/* Setup shader */
|
|
err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Reset shaders */
|
|
for (int i = 0; i < 2; i++) { /* AC/Golomb */
|
|
err = init_reset_shader(f, &ctx->s, &ctx->exec_pool,
|
|
spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
/* Decode shaders */
|
|
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
|
for (int j = 0; j < 2; j++) { /* AC/Golomb */
|
|
for (int k = 0; k < 2; k++) { /* Normal/RGB */
|
|
AVHWFramesContext *frames_ctx;
|
|
frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
|
|
(AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
|
err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
|
|
spv, &fv->decode[i][j][k],
|
|
frames_ctx,
|
|
i,
|
|
!j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
|
|
k);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* RCT shaders */
|
|
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
|
err = init_rct_shader(f, &ctx->s, &ctx->exec_pool,
|
|
spv, &fv->rct[i], i,
|
|
(AVHWFramesContext *)fv->intermediate_frames_ref[i]->data,
|
|
(AVHWFramesContext *)avctx->hw_frames_ctx->data);
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
/* Range coder data */
|
|
err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
|
|
&fv->rangecoder_static_buf,
|
|
f);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Quantization table data */
|
|
err = ff_ffv1_vk_init_quant_table_data(&ctx->s,
|
|
&fv->quant_buf,
|
|
f);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* CRC table buffer */
|
|
err = ff_ffv1_vk_init_crc_table_data(&ctx->s,
|
|
&fv->crc_tab_buf,
|
|
f);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/* Update setup global descriptors */
|
|
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
|
&fv->setup, 0, 0, 0,
|
|
&fv->rangecoder_static_buf,
|
|
0, fv->rangecoder_static_buf.size,
|
|
VK_FORMAT_UNDEFINED));
|
|
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
|
&fv->setup, 0, 1, 0,
|
|
&fv->crc_tab_buf,
|
|
0, fv->crc_tab_buf.size,
|
|
VK_FORMAT_UNDEFINED));
|
|
|
|
/* Update decode global descriptors */
|
|
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
|
for (int j = 0; j < 2; j++) { /* AC/Golomb */
|
|
for (int k = 0; k < 2; k++) { /* Normal/RGB */
|
|
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
|
&fv->decode[i][j][k], 0, 0, 0,
|
|
&fv->rangecoder_static_buf,
|
|
0, fv->rangecoder_static_buf.size,
|
|
VK_FORMAT_UNDEFINED));
|
|
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
|
&fv->decode[i][j][k], 0, 1, 0,
|
|
&fv->quant_buf,
|
|
0, fv->quant_buf.size,
|
|
VK_FORMAT_UNDEFINED));
|
|
}
|
|
}
|
|
}
|
|
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
|
|
{
|
|
AVHWDeviceContext *hwctx = _hwctx.nc;
|
|
|
|
FFv1VulkanDecodePicture *fp = data;
|
|
FFVulkanDecodePicture *vp = &fp->vp;
|
|
|
|
ff_vk_decode_free_frame(hwctx, vp);
|
|
|
|
if (fp->crc_checked) {
|
|
FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
|
|
for (int i = 0; i < fp->slice_num; i++) {
|
|
uint32_t crc_res;
|
|
crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t));
|
|
if (crc_res != 0)
|
|
av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n",
|
|
i, crc_res);
|
|
}
|
|
}
|
|
|
|
av_buffer_unref(&vp->slices_buf);
|
|
av_buffer_unref(&fp->slice_state);
|
|
av_buffer_unref(&fp->slice_offset_buf);
|
|
av_buffer_unref(&fp->slice_status_buf);
|
|
av_buffer_unref(&fp->tmp_data);
|
|
}
|
|
|
|
const FFHWAccel ff_ffv1_vulkan_hwaccel = {
|
|
.p.name = "ffv1_vulkan",
|
|
.p.type = AVMEDIA_TYPE_VIDEO,
|
|
.p.id = AV_CODEC_ID_FFV1,
|
|
.p.pix_fmt = AV_PIX_FMT_VULKAN,
|
|
.start_frame = &vk_ffv1_start_frame,
|
|
.decode_slice = &vk_ffv1_decode_slice,
|
|
.end_frame = &vk_ffv1_end_frame,
|
|
.free_frame_priv = &vk_ffv1_free_frame_priv,
|
|
.frame_priv_data_size = sizeof(FFv1VulkanDecodePicture),
|
|
.init = &vk_decode_ffv1_init,
|
|
.update_thread_context = &ff_vk_update_thread_context,
|
|
.decode_params = &ff_vk_params_invalidate,
|
|
.flush = &ff_vk_decode_flush,
|
|
.uninit = &ff_vk_decode_uninit,
|
|
.frame_params = &ff_vk_frame_params,
|
|
.priv_data_size = sizeof(FFVulkanDecodeContext),
|
|
.caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
|
|
};
|