Files
ffmpeg/libavcodec/vulkan/dpx_unpack.comp
Lynne 9b14ea0aa1 vulkan_dpx: fix alignment issue
12-bit images apparently require mod-32 alignment for each line.
Go figure.
2025-12-04 15:08:46 +01:00

87 lines
2.4 KiB
Plaintext

/*
* Copyright (c) 2025 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
uint32_t read_data(uint off)
{
#ifdef BIG_ENDIAN
return reverse4(data[off]);
#else
return data[off];
#endif
}
#ifdef PACKED_10BIT
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
{
uint32_t d = read_data(pos.y*stride + pos.x);
i16vec4 v;
d = d << 10 | d >> 22 & 0x3FFFFF;
v[0] = int16_t(d & 0x3FF);
d = d << 10 | d >> 22 & 0x3FFFFF;
v[1] = int16_t(d & 0x3FF);
d = d << 10 | d >> 22 & 0x3FFFFF;
v[2] = int16_t(d & 0x3FF);
v[3] = int16_t(0);
return v;
}
#else
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
{
uint line_size = stride*BITS_PER_COMP*COMPONENTS;
line_size += line_size & 31;
line_size += need_align << 3;
uint line_off = pos.y*line_size;
uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS;
uint off = (line_off + pix_off >> 5);
uint bit = pix_off & 0x1f;
uint32_t d0 = read_data(off + 0);
uint32_t d1 = read_data(off + 1);
uint64_t combined = (uint64_t(d1) << 32) | d0;
combined >>= bit;
return i16vec4(combined,
combined >> (BITS_PER_COMP*1),
combined >> (BITS_PER_COMP*2),
combined >> (BITS_PER_COMP*3)) &
int16_t((1 << BITS_PER_COMP) - 1);
}
#endif
void main(void)
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (!IS_WITHIN(pos, imageSize(dst[0])))
return;
i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x);
#if NB_IMAGES == 1
imageStore(dst[0], pos, p);
#else
const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
for (uint i = 0; i < COMPONENTS; i++)
imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
#endif
}