swscale: Refactor XYZ+RGB state and add function hooks

Prepare for xyz12Torgb48 architecture-specific optimizations in
subsequent patches by:
 - Grouping XYZ+RGB gamma LUTs and 3x3 matrices into SwsColorXform
   (ctx->xyz2rgb and ctx->rgb2xyz), replacing scattered fields.
 - Dropping the unused last matrix column giving the same or smaller
   SwsInternal size.
 - Renaming ff_xyz12Torgb48 and ff_rgb48Toxyz12 and routing calls via
   the new per-context function pointer (ctx->xyz12Torgb48 and
   ctx->rgb48Toxyz12) in graph.c and swscale.c.
 - Adding ff_sws_init_xyzdsp and invoking it in swscale init paths
   (normal and unscaled).
 - Making fill_xyztables public to ease its setup later in checkasm.

These modifications do not introduce any functional changes.

Signed-off-by: Arpad Panyik <Arpad.Panyik@arm.com>
This commit is contained in:
Arpad Panyik
2025-11-26 16:35:11 +00:00
committed by Martin Storsjö
parent 9e038fd959
commit ef651b84ce
5 changed files with 95 additions and 74 deletions

View File

@@ -142,7 +142,8 @@ static void run_rgb0(const SwsImg *out, const SwsImg *in, int y, int h,
static void run_xyz2rgb(const SwsImg *out, const SwsImg *in, int y, int h, static void run_xyz2rgb(const SwsImg *out, const SwsImg *in, int y, int h,
const SwsPass *pass) const SwsPass *pass)
{ {
ff_xyz12Torgb48(pass->priv, out->data[0] + y * out->linesize[0], out->linesize[0], const SwsInternal *c = pass->priv;
c->xyz12Torgb48(c, out->data[0] + y * out->linesize[0], out->linesize[0],
in->data[0] + y * in->linesize[0], in->linesize[0], in->data[0] + y * in->linesize[0], in->linesize[0],
pass->width, h); pass->width, h);
} }
@@ -150,7 +151,8 @@ static void run_xyz2rgb(const SwsImg *out, const SwsImg *in, int y, int h,
static void run_rgb2xyz(const SwsImg *out, const SwsImg *in, int y, int h, static void run_rgb2xyz(const SwsImg *out, const SwsImg *in, int y, int h,
const SwsPass *pass) const SwsPass *pass)
{ {
ff_rgb48Toxyz12(pass->priv, out->data[0] + y * out->linesize[0], out->linesize[0], const SwsInternal *c = pass->priv;
c->rgb48Toxyz12(c, out->data[0] + y * out->linesize[0], out->linesize[0],
in->data[0] + y * in->linesize[0], in->linesize[0], in->data[0] + y * in->linesize[0], in->linesize[0],
pass->width, h); pass->width, h);
} }

View File

@@ -660,6 +660,8 @@ static av_cold void sws_init_swscale(SwsInternal *c)
{ {
enum AVPixelFormat srcFormat = c->opts.src_format; enum AVPixelFormat srcFormat = c->opts.src_format;
ff_sws_init_xyzdsp(c);
ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX, ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2nv12cX, &c->yuv2packed1,
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
@@ -737,8 +739,8 @@ static int check_image_pointers(const uint8_t * const data[4], enum AVPixelForma
return 1; return 1;
} }
void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride, static void xyz12Torgb48_c(const SwsInternal *c, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h) const uint8_t *src, int src_stride, int w, int h)
{ {
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.src_format);
@@ -759,20 +761,20 @@ void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
z = AV_RL16(src16 + xp + 2); z = AV_RL16(src16 + xp + 2);
} }
x = c->xyzgamma[x >> 4]; x = c->xyz2rgb.gamma.in[x >> 4];
y = c->xyzgamma[y >> 4]; y = c->xyz2rgb.gamma.in[y >> 4];
z = c->xyzgamma[z >> 4]; z = c->xyz2rgb.gamma.in[z >> 4];
// convert from XYZlinear to sRGBlinear // convert from XYZlinear to sRGBlinear
r = c->xyz2rgb_matrix[0][0] * x + r = c->xyz2rgb.mat[0][0] * x +
c->xyz2rgb_matrix[0][1] * y + c->xyz2rgb.mat[0][1] * y +
c->xyz2rgb_matrix[0][2] * z >> 12; c->xyz2rgb.mat[0][2] * z >> 12;
g = c->xyz2rgb_matrix[1][0] * x + g = c->xyz2rgb.mat[1][0] * x +
c->xyz2rgb_matrix[1][1] * y + c->xyz2rgb.mat[1][1] * y +
c->xyz2rgb_matrix[1][2] * z >> 12; c->xyz2rgb.mat[1][2] * z >> 12;
b = c->xyz2rgb_matrix[2][0] * x + b = c->xyz2rgb.mat[2][0] * x +
c->xyz2rgb_matrix[2][1] * y + c->xyz2rgb.mat[2][1] * y +
c->xyz2rgb_matrix[2][2] * z >> 12; c->xyz2rgb.mat[2][2] * z >> 12;
// limit values to 16-bit depth // limit values to 16-bit depth
r = av_clip_uint16(r); r = av_clip_uint16(r);
@@ -781,13 +783,13 @@ void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
// convert from sRGBlinear to RGB and scale from 12bit to 16bit // convert from sRGBlinear to RGB and scale from 12bit to 16bit
if (desc->flags & AV_PIX_FMT_FLAG_BE) { if (desc->flags & AV_PIX_FMT_FLAG_BE) {
AV_WB16(dst16 + xp + 0, c->rgbgamma[r] << 4); AV_WB16(dst16 + xp + 0, c->xyz2rgb.gamma.out[r] << 4);
AV_WB16(dst16 + xp + 1, c->rgbgamma[g] << 4); AV_WB16(dst16 + xp + 1, c->xyz2rgb.gamma.out[g] << 4);
AV_WB16(dst16 + xp + 2, c->rgbgamma[b] << 4); AV_WB16(dst16 + xp + 2, c->xyz2rgb.gamma.out[b] << 4);
} else { } else {
AV_WL16(dst16 + xp + 0, c->rgbgamma[r] << 4); AV_WL16(dst16 + xp + 0, c->xyz2rgb.gamma.out[r] << 4);
AV_WL16(dst16 + xp + 1, c->rgbgamma[g] << 4); AV_WL16(dst16 + xp + 1, c->xyz2rgb.gamma.out[g] << 4);
AV_WL16(dst16 + xp + 2, c->rgbgamma[b] << 4); AV_WL16(dst16 + xp + 2, c->xyz2rgb.gamma.out[b] << 4);
} }
} }
@@ -796,8 +798,8 @@ void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
} }
} }
void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride, static void rgb48Toxyz12_c(const SwsInternal *c, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h) const uint8_t *src, int src_stride, int w, int h)
{ {
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.dst_format); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->opts.dst_format);
@@ -818,20 +820,20 @@ void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
b = AV_RL16(src16 + xp + 2); b = AV_RL16(src16 + xp + 2);
} }
r = c->rgbgammainv[r>>4]; r = c->rgb2xyz.gamma.in[r >> 4];
g = c->rgbgammainv[g>>4]; g = c->rgb2xyz.gamma.in[g >> 4];
b = c->rgbgammainv[b>>4]; b = c->rgb2xyz.gamma.in[b >> 4];
// convert from sRGBlinear to XYZlinear // convert from sRGBlinear to XYZlinear
x = c->rgb2xyz_matrix[0][0] * r + x = c->rgb2xyz.mat[0][0] * r +
c->rgb2xyz_matrix[0][1] * g + c->rgb2xyz.mat[0][1] * g +
c->rgb2xyz_matrix[0][2] * b >> 12; c->rgb2xyz.mat[0][2] * b >> 12;
y = c->rgb2xyz_matrix[1][0] * r + y = c->rgb2xyz.mat[1][0] * r +
c->rgb2xyz_matrix[1][1] * g + c->rgb2xyz.mat[1][1] * g +
c->rgb2xyz_matrix[1][2] * b >> 12; c->rgb2xyz.mat[1][2] * b >> 12;
z = c->rgb2xyz_matrix[2][0] * r + z = c->rgb2xyz.mat[2][0] * r +
c->rgb2xyz_matrix[2][1] * g + c->rgb2xyz.mat[2][1] * g +
c->rgb2xyz_matrix[2][2] * b >> 12; c->rgb2xyz.mat[2][2] * b >> 12;
// limit values to 16-bit depth // limit values to 16-bit depth
x = av_clip_uint16(x); x = av_clip_uint16(x);
@@ -840,13 +842,13 @@ void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
// convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
if (desc->flags & AV_PIX_FMT_FLAG_BE) { if (desc->flags & AV_PIX_FMT_FLAG_BE) {
AV_WB16(dst16 + xp + 0, c->xyzgammainv[x] << 4); AV_WB16(dst16 + xp + 0, c->rgb2xyz.gamma.out[x] << 4);
AV_WB16(dst16 + xp + 1, c->xyzgammainv[y] << 4); AV_WB16(dst16 + xp + 1, c->rgb2xyz.gamma.out[y] << 4);
AV_WB16(dst16 + xp + 2, c->xyzgammainv[z] << 4); AV_WB16(dst16 + xp + 2, c->rgb2xyz.gamma.out[z] << 4);
} else { } else {
AV_WL16(dst16 + xp + 0, c->xyzgammainv[x] << 4); AV_WL16(dst16 + xp + 0, c->rgb2xyz.gamma.out[x] << 4);
AV_WL16(dst16 + xp + 1, c->xyzgammainv[y] << 4); AV_WL16(dst16 + xp + 1, c->rgb2xyz.gamma.out[y] << 4);
AV_WL16(dst16 + xp + 2, c->xyzgammainv[z] << 4); AV_WL16(dst16 + xp + 2, c->rgb2xyz.gamma.out[z] << 4);
} }
} }
@@ -855,6 +857,12 @@ void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
} }
} }
av_cold void ff_sws_init_xyzdsp(SwsInternal *c)
{
c->xyz12Torgb48 = xyz12Torgb48_c;
c->rgb48Toxyz12 = rgb48Toxyz12_c;
}
void ff_update_palette(SwsInternal *c, const uint32_t *pal) void ff_update_palette(SwsInternal *c, const uint32_t *pal)
{ {
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
@@ -1110,7 +1118,7 @@ static int scale_internal(SwsContext *sws,
base = srcStride[0] < 0 ? c->xyz_scratch - srcStride[0] * (srcSliceH-1) : base = srcStride[0] < 0 ? c->xyz_scratch - srcStride[0] * (srcSliceH-1) :
c->xyz_scratch; c->xyz_scratch;
ff_xyz12Torgb48(c, base, srcStride[0], src2[0], srcStride[0], sws->src_w, srcSliceH); c->xyz12Torgb48(c, base, srcStride[0], src2[0], srcStride[0], sws->src_w, srcSliceH);
src2[0] = base; src2[0] = base;
} }
@@ -1182,7 +1190,7 @@ static int scale_internal(SwsContext *sws,
} }
/* replace on the same data */ /* replace on the same data */
ff_rgb48Toxyz12(c, dst, dstStride2[0], dst, dstStride2[0], sws->dst_w, ret); c->rgb48Toxyz12(c, dst, dstStride2[0], dst, dstStride2[0], sws->dst_w, ret);
} }
/* reset slice direction at end of frame */ /* reset slice direction at end of frame */

View File

@@ -93,6 +93,19 @@ typedef int (*SwsFunc)(SwsInternal *c, const uint8_t *const src[],
const int srcStride[], int srcSliceY, int srcSliceH, const int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *const dst[], const int dstStride[]); uint8_t *const dst[], const int dstStride[]);
typedef void (*SwsColorFunc)(const SwsInternal *c, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h);
typedef struct SwsLuts {
uint16_t *in;
uint16_t *out;
} SwsLuts;
typedef struct SwsColorXform {
SwsLuts gamma;
int16_t mat[3][3];
} SwsColorXform;
/** /**
* Write one line of horizontally scaled data to planar output * Write one line of horizontally scaled data to planar output
* without any additional vertical scaling (or point-scaling). * without any additional vertical scaling (or point-scaling).
@@ -547,12 +560,10 @@ struct SwsInternal {
/* pre defined color-spaces gamma */ /* pre defined color-spaces gamma */
#define XYZ_GAMMA (2.6) #define XYZ_GAMMA (2.6)
#define RGB_GAMMA (2.2) #define RGB_GAMMA (2.2)
uint16_t *xyzgamma; SwsColorFunc xyz12Torgb48;
uint16_t *rgbgamma; SwsColorFunc rgb48Toxyz12;
uint16_t *xyzgammainv; SwsColorXform xyz2rgb;
uint16_t *rgbgammainv; SwsColorXform rgb2xyz;
int16_t xyz2rgb_matrix[3][4];
int16_t rgb2xyz_matrix[3][4];
/* function pointers for swscale() */ /* function pointers for swscale() */
yuv2planar1_fn yuv2plane1; yuv2planar1_fn yuv2plane1;
@@ -720,6 +731,9 @@ av_cold void ff_sws_init_range_convert_loongarch(SwsInternal *c);
av_cold void ff_sws_init_range_convert_riscv(SwsInternal *c); av_cold void ff_sws_init_range_convert_riscv(SwsInternal *c);
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c); av_cold void ff_sws_init_range_convert_x86(SwsInternal *c);
av_cold void ff_sws_init_xyzdsp(SwsInternal *c);
av_cold int ff_sws_fill_xyztables(SwsInternal *c);
SwsFunc ff_yuv2rgb_init_x86(SwsInternal *c); SwsFunc ff_yuv2rgb_init_x86(SwsInternal *c);
SwsFunc ff_yuv2rgb_init_ppc(SwsInternal *c); SwsFunc ff_yuv2rgb_init_ppc(SwsInternal *c);
SwsFunc ff_yuv2rgb_init_loongarch(SwsInternal *c); SwsFunc ff_yuv2rgb_init_loongarch(SwsInternal *c);
@@ -1043,12 +1057,6 @@ void ff_copyPlane(const uint8_t *src, int srcStride,
int srcSliceY, int srcSliceH, int width, int srcSliceY, int srcSliceH, int width,
uint8_t *dst, int dstStride); uint8_t *dst, int dstStride);
void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h);
void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h);
static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
int alpha, int bits, const int big_endian) int alpha, int bits, const int big_endian)
{ {

View File

@@ -2685,6 +2685,8 @@ void ff_get_unscaled_swscale(SwsInternal *c)
} }
} }
ff_sws_init_xyzdsp(c);
#if ARCH_PPC #if ARCH_PPC
ff_get_unscaled_swscale_ppc(c); ff_get_unscaled_swscale_ppc(c);
#elif ARCH_ARM #elif ARCH_ARM

View File

@@ -719,36 +719,37 @@ static av_cold void init_xyz_tables(void)
} }
} }
static int fill_xyztables(SwsInternal *c) av_cold int ff_sws_fill_xyztables(SwsInternal *c)
{ {
static const int16_t xyz2rgb_matrix[3][4] = { static const int16_t xyz2rgb_matrix[3][3] = {
{13270, -6295, -2041}, {13270, -6295, -2041},
{-3969, 7682, 170}, {-3969, 7682, 170},
{ 228, -835, 4329} }; { 228, -835, 4329} };
static const int16_t rgb2xyz_matrix[3][4] = { static const int16_t rgb2xyz_matrix[3][3] = {
{1689, 1464, 739}, {1689, 1464, 739},
{ 871, 2929, 296}, { 871, 2929, 296},
{ 79, 488, 3891} }; { 79, 488, 3891} };
if (c->xyzgamma) if (c->xyz2rgb.gamma.in)
return 0; return 0;
memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix)); memcpy(c->xyz2rgb.mat, xyz2rgb_matrix, sizeof(c->xyz2rgb.mat));
memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix)); memcpy(c->rgb2xyz.mat, rgb2xyz_matrix, sizeof(c->rgb2xyz.mat));
#if CONFIG_SMALL #if CONFIG_SMALL
c->xyzgamma = av_malloc(sizeof(uint16_t) * 2 * (4096 + 65536)); c->xyz2rgb.gamma.in = av_malloc(sizeof(uint16_t) * 2 * (4096 + 65536));
if (!c->xyzgamma) if (!c->xyz2rgb.gamma.in)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
c->rgbgammainv = c->xyzgamma + 4096; c->rgb2xyz.gamma.in = c->xyz2rgb.gamma.in + 4096;
c->rgbgamma = c->rgbgammainv + 4096; c->xyz2rgb.gamma.out = c->rgb2xyz.gamma.in + 4096;
c->xyzgammainv = c->rgbgamma + 65536; c->rgb2xyz.gamma.out = c->xyz2rgb.gamma.out + 65536;
init_xyz_tables(c->xyzgamma, c->xyzgammainv, c->rgbgamma, c->rgbgammainv); init_xyz_tables(c->xyz2rgb.gamma.in, c->rgb2xyz.gamma.out,
c->xyz2rgb.gamma.out, c->rgb2xyz.gamma.in);
#else #else
c->xyzgamma = xyzgamma_tab; c->xyz2rgb.gamma.in = xyzgamma_tab;
c->rgbgamma = rgbgamma_tab; c->xyz2rgb.gamma.out = rgbgamma_tab;
c->xyzgammainv = xyzgammainv_tab; c->rgb2xyz.gamma.in = rgbgammainv_tab;
c->rgbgammainv = rgbgammainv_tab; c->rgb2xyz.gamma.out = xyzgammainv_tab;
static AVOnce xyz_init_static_once = AV_ONCE_INIT; static AVOnce xyz_init_static_once = AV_ONCE_INIT;
ff_thread_once(&xyz_init_static_once, init_xyz_tables); ff_thread_once(&xyz_init_static_once, init_xyz_tables);
@@ -822,7 +823,7 @@ static int handle_formats(SwsContext *sws)
c->srcXYZ |= handle_xyz(&sws->src_format); c->srcXYZ |= handle_xyz(&sws->src_format);
c->dstXYZ |= handle_xyz(&sws->dst_format); c->dstXYZ |= handle_xyz(&sws->dst_format);
if (c->srcXYZ || c->dstXYZ) if (c->srcXYZ || c->dstXYZ)
return fill_xyztables(c); return ff_sws_fill_xyztables(c);
else else
return 0; return 0;
} }
@@ -2312,7 +2313,7 @@ void sws_freeContext(SwsContext *sws)
av_freep(&c->gamma); av_freep(&c->gamma);
av_freep(&c->inv_gamma); av_freep(&c->inv_gamma);
#if CONFIG_SMALL #if CONFIG_SMALL
av_freep(&c->xyzgamma); av_freep(&c->xyz2rgb.gamma.in);
#endif #endif
av_freep(&c->rgb0_scratch); av_freep(&c->rgb0_scratch);