/* * FFv1 codec * * Copyright (c) 2024 Lynne * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef RGB #define LADDR(p) (p) #else #define RGB_LINECACHE 2 #define RGB_LBUF (RGB_LINECACHE - 1) #define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) #endif #ifdef RGB ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) { const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0); /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]), TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(0, -1)))[0]), TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[0])); /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ TYPE cur = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[0]); int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) { TYPE cur2 = TYPE(0); if (expectEXT(off.x > 0, true)) { const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]); } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; /* top-2 became current upon swap */ TYPE top2 = TYPE(imageLoad(dec[p], sp + LADDR(off))[0]); base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; } /* context, prediction */ return ivec2(base, predict(cur, VTYPE2(top))); } #else ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) { const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); sp += off; VTYPE3 top = VTYPE3(TYPE(0), TYPE(0), TYPE(0)); if (off.y > 0 && off != ivec2(0, 1)) top[0] = TYPE(imageLoad(dec[p], sp + ivec2(-1, -1) + yoff_border1)[0]); if (off.y > 0) { top[1] = TYPE(imageLoad(dec[p], sp + ivec2(0, -1))[0]); top[2] = TYPE(imageLoad(dec[p], sp + ivec2(min(1, sw - off.x - 1), -1))[0]); } TYPE cur = TYPE(0); if (off != ivec2(0, 0)) cur = TYPE(imageLoad(dec[p], sp + ivec2(-1, 0) + yoff_border1)[0]); int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; if ((quant_table[quant_table_idx][3][127] != 0) || (quant_table[quant_table_idx][4][127] != 0)) { TYPE cur2 = TYPE(0); if (off.x > 0 && off != ivec2(1, 0)) { const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); cur2 = TYPE(imageLoad(dec[p], sp + ivec2(-2, 0) + yoff_border2)[0]); } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; TYPE top2 = TYPE(0); if (off.y > 1) top2 = TYPE(imageLoad(dec[p], sp + ivec2(0, -2))[0]); base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; } /* context, prediction */ return ivec2(base, predict(cur, VTYPE2(top))); } #endif #ifndef GOLOMB #ifdef CACHED_SYMBOL_READER shared uint8_t state[CONTEXT_SIZE]; #define READ(c, off) get_rac_direct(c, state[off]) #else #define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off)) #endif int get_isymbol(inout RangeCoder c, uint state_off) { if (READ(c, 0)) return 0; uint e = 1; for (; e < 33; e++) if (!READ(c, min(e, 10))) break; if (expectEXT(e == 1, false)) { return READ(c, 11) ? -1 : 1; } else if (expectEXT(e == 33, false)) { corrupt = true; return 0; } int a = 1; for (uint i = e + 20; i >= 22; i--) { a <<= 1; a |= int(READ(c, min(i, 31))); } return READ(c, min(e + 10, 21)) ? -a : a; } void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits) { #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif for (int x = 0; x < w; x++) { uint v = 0; for (int i = (bits - 1); i >= 0; i--) v |= uint(get_rac_equi(sc.c)) << i; imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); } } void decode_line(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits, uint state_off, uint8_t quant_table_idx, const int run_index) { #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif for (int x = 0; x < w; x++) { ivec2 pr = get_pred(sp, ivec2(x, y), p, w, quant_table_idx); uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); #ifdef CACHED_SYMBOL_READER u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); state[gl_LocalInvocationID.x] = sb.v; barrier(); if (gl_LocalInvocationID.x == 0) { #endif int diff = get_isymbol(sc.c, context_off); if (pr[0] < 0) diff = -diff; uint v = zero_extend(pr[1] + diff, bits); imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); #ifdef CACHED_SYMBOL_READER } sb.v = state[gl_LocalInvocationID.x]; #endif } } #else /* GOLOMB */ void decode_line(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits, uint state_off, uint8_t quant_table_idx, inout int run_index) { #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif int run_count = 0; int run_mode = 0; for (int x = 0; x < w; x++) { ivec2 pos = sp + ivec2(x, y); int diff; ivec2 pr = get_pred(sp, ivec2(x, y), p, w, quant_table_idx); VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0])); if (pr[0] == 0 && run_mode == 0) run_mode = 1; if (run_mode != 0) { if (run_count == 0 && run_mode == 1) { int tmp_idx = int(log2_run[run_index]); if (get_bit(sc.gb)) { run_count = 1 << tmp_idx; if (x + run_count <= w) run_index++; } else { if (tmp_idx != 0) { run_count = int(get_bits(sc.gb, tmp_idx)); } else run_count = 0; if (run_index != 0) run_index--; run_mode = 2; } } run_count--; if (run_count < 0) { run_mode = 0; run_count = 0; diff = read_vlc_symbol(sc.gb, sb, bits); if (diff >= 0) diff++; } else { diff = 0; } } else { diff = read_vlc_symbol(sc.gb, sb, bits); } if (pr[0] < 0) diff = -diff; uint v = zero_extend(pr[1] + diff, bits); imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); } } #endif #ifdef RGB ivec4 transform_sample(ivec4 pix, ivec2 rct_coef) { pix.b -= rct_offset; pix.r -= rct_offset; pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2; pix.b += pix.g; pix.r += pix.g; return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]], pix[fmt_lut[3]]); } void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) { for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { ivec2 lpos = sp + LADDR(ivec2(x, y)); ivec2 pos = sc.slice_pos + ivec2(x, y); ivec4 pix; pix.r = int(imageLoad(dec[2], lpos)[0]); pix.g = int(imageLoad(dec[0], lpos)[0]); pix.b = int(imageLoad(dec[1], lpos)[0]); if (transparency != 0) pix.a = int(imageLoad(dec[3], lpos)[0]); if (expectEXT(apply_rct, true)) pix = transform_sample(pix, sc.slice_rct_coef); imageStore(dst[0], pos, pix); if (planar_rgb != 0) { for (int i = 1; i < color_planes; i++) imageStore(dst[i], pos, ivec4(pix[i])); } } } #endif void decode_slice(inout SliceContext sc, const uint slice_idx) { int run_index = 0; int w = sc.slice_dim.x; ivec2 sp = sc.slice_pos; #ifndef RGB int bits = bits_per_raw_sample; #else int bits = 9; if (bits != 8 || sc.slice_coding_mode != 0) bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; #endif /* PCM coding */ #ifndef GOLOMB if (sc.slice_coding_mode == 1) { if (gl_LocalInvocationID.x > 0) return; #ifndef RGB for (int p = 0; p < planes; p++) { int h = sc.slice_dim.y; if (p > 0 && p < 3) h >>= chroma_shift.y; for (int y = 0; y < h; y++) decode_line_pcm(sc, sp, w, y, p, bits); } #else for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line_pcm(sc, sp, w, y, p, bits); writeout_rgb(sc, sp, w, y, false); } #endif } else /* Arithmetic coding */ #endif { u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; #ifndef RGB for (int p = 0; p < planes; p++) { int h = sc.slice_dim.y; if (p > 0 && p < 3) h >>= chroma_shift.y; for (int y = 0; y < h; y++) decode_line(sc, sp, w, y, p, bits, slice_state_off[p], quant_table_idx[p], run_index); } #else for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line(sc, sp, w, y, p, bits, slice_state_off[p], quant_table_idx[p], run_index); writeout_rgb(sc, sp, w, y, true); } #endif } } void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; decode_slice(slice_ctx[slice_idx], slice_idx); }