mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-03-14 08:17:41 +01:00
Adds a vulkan implementation of the reference prores kostya encoder. Provides about 3-4x speedup over the CPU code
88 lines
3.0 KiB
GLSL
88 lines
3.0 KiB
GLSL
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#version 460
|
|
#pragma shader_stage(compute)
|
|
|
|
#extension GL_EXT_shader_image_load_formatted : require
|
|
#extension GL_EXT_scalar_block_layout : require
|
|
#extension GL_EXT_shader_explicit_arithmetic_types : require
|
|
|
|
layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in;
|
|
|
|
layout (constant_id = 0) const int alpha_bits = 0;
|
|
layout (constant_id = 1) const int slices_per_row = 0;
|
|
layout (constant_id = 2) const int width_in_mb = 0;
|
|
layout (constant_id = 3) const int max_mbs_per_slice = 0;
|
|
|
|
struct SliceData {
|
|
uint mbs_per_slice;
|
|
int16_t coeffs[4][8 * 256];
|
|
};
|
|
|
|
layout (set = 0, binding = 0, scalar) writeonly buffer SliceBuffer {
|
|
SliceData slices[];
|
|
};
|
|
layout (set = 0, binding = 1) uniform readonly iimage2D plane;
|
|
|
|
/* Table of possible edge slice configurations */
|
|
const uvec3 edge_mps_table[8] = uvec3[](
|
|
uvec3(0, 0, 0),
|
|
uvec3(1, 0, 0),
|
|
uvec3(2, 0, 0),
|
|
uvec3(2, 1, 0),
|
|
uvec3(4, 0, 0),
|
|
uvec3(4, 1, 0),
|
|
uvec3(4, 2, 0),
|
|
uvec3(4, 2, 1)
|
|
);
|
|
|
|
void main()
|
|
{
|
|
ivec2 coord = min(ivec2(gl_GlobalInvocationID.xy), imageSize(plane) - ivec2(1));
|
|
uint16_t alpha = uint16_t(imageLoad(plane, coord).x);
|
|
|
|
if (alpha_bits == 8)
|
|
alpha >>= 2;
|
|
else
|
|
alpha = (alpha << 6) | (alpha >> 4);
|
|
|
|
uint mbs_per_slice = max_mbs_per_slice;
|
|
uint slices_width = width_in_mb / mbs_per_slice;
|
|
uint mb_width = slices_width * mbs_per_slice;
|
|
uint slice_x = gl_WorkGroupID.x / mbs_per_slice;
|
|
uint slice_y = gl_WorkGroupID.y;
|
|
uvec2 slice_base = uvec2(slice_x * mbs_per_slice * 16u, slice_y * 16u);
|
|
|
|
/* Handle slice macroblock size reduction on edge slices */
|
|
if (gl_WorkGroupID.x >= mb_width) {
|
|
uint edge_mb = gl_WorkGroupID.x - mb_width;
|
|
uvec3 table = edge_mps_table[width_in_mb - mb_width];
|
|
uvec3 base = uvec3(0, table.x, table.x + table.y);
|
|
uint edge_slice = edge_mb < base.y ? 0 : (edge_mb < base.z ? 1 : 2);
|
|
slice_x += edge_slice;
|
|
slice_base.x += base[edge_slice] * 16u;
|
|
mbs_per_slice = table[edge_slice];
|
|
}
|
|
|
|
uint slice = slice_y * slices_per_row + slice_x;
|
|
uvec2 coeff_coord = uvec2(coord) - slice_base;
|
|
uint coeff = coeff_coord.y * (mbs_per_slice * 16u) + coeff_coord.x;
|
|
slices[slice].coeffs[3][coeff] = int16_t(alpha);
|
|
}
|