Commit c6e6f60a84 for aom
commit c6e6f60a84c99e5fcc10523e3867e6a20d0140df
Author: Wan-Teh Chang <wtc@google.com>
Date: Fri Apr 10 15:54:33 2026 -0700
Make av1/decoder/grain_synthesis.c thread-safe
Move all mutable static variables into local variables in
add_film_grain_run() or other functions that use them.
Remove the unneeded grain_center variable. See the AV2 spec pull request
https://github.com/AOMediaCodec/av2-spec-internal/pull/1093.
Adapted from the patches by Chun-Min Chang of Mozilla.
Bug: 501044174
Change-Id: I27835e84d849d3a2146b4ba998dfa81ee084ab81
diff --git a/av1/decoder/grain_synthesis.c b/av1/decoder/grain_synthesis.c
index bbf5895ff9..a1ab1bf7ab 100644
--- a/av1/decoder/grain_synthesis.c
+++ b/av1/decoder/grain_synthesis.c
@@ -218,11 +218,8 @@ static const int gaussian_sequence[2048] = {
static const int gauss_bits = 11;
-static int luma_subblock_size_y = 32;
-static int luma_subblock_size_x = 32;
-
-static int chroma_subblock_size_y = 16;
-static int chroma_subblock_size_x = 16;
+static const int luma_subblock_size_y = 32;
+static const int luma_subblock_size_x = 32;
static const int min_luma_legal_range = 16;
static const int max_luma_legal_range = 235;
@@ -230,14 +227,15 @@ static const int max_luma_legal_range = 235;
static const int min_chroma_legal_range = 16;
static const int max_chroma_legal_range = 240;
-static int scaling_lut_y[256];
-static int scaling_lut_cb[256];
-static int scaling_lut_cr[256];
-
-static int grain_min;
-static int grain_max;
+typedef struct {
+ int y[256];
+ int cb[256];
+ int cr[256];
+} aom_grain_scaling_lut_t;
-static uint16_t random_register = 0; // random number generator register
+typedef struct {
+ uint16_t random_register; // random number generator register
+} aom_grain_rng_t;
static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
int ***pred_pos_chroma, int **luma_grain_block,
@@ -313,9 +311,7 @@ static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
*cb_col_buf = NULL;
*cr_col_buf = NULL;
- memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
- memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
- memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
+ const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
int num_pos_chroma = num_pos_luma;
@@ -430,34 +426,36 @@ static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
}
// get a number between 0 and 2^bits - 1
-static inline int get_random_number(int bits) {
+static inline int get_random_number(aom_grain_rng_t *rng, int bits) {
uint16_t bit;
- bit = ((random_register >> 0) ^ (random_register >> 1) ^
- (random_register >> 3) ^ (random_register >> 12)) &
+ bit = ((rng->random_register >> 0) ^ (rng->random_register >> 1) ^
+ (rng->random_register >> 3) ^ (rng->random_register >> 12)) &
1;
- random_register = (random_register >> 1) | (bit << 15);
- return (random_register >> (16 - bits)) & ((1 << bits) - 1);
+ rng->random_register = (rng->random_register >> 1) | (bit << 15);
+ return (rng->random_register >> (16 - bits)) & ((1 << bits) - 1);
}
-static void init_random_generator(int luma_line, uint16_t seed) {
+static void init_random_generator(aom_grain_rng_t *rng, int luma_line,
+ uint16_t seed) {
// same for the picture
uint16_t msb = (seed >> 8) & 255;
uint16_t lsb = seed & 255;
- random_register = (msb << 8) + lsb;
+ rng->random_register = (msb << 8) + lsb;
// changes for each row
int luma_num = luma_line >> 5;
- random_register ^= ((luma_num * 37 + 178) & 255) << 8;
- random_register ^= ((luma_num * 173 + 105) & 255);
+ rng->random_register ^= ((luma_num * 37 + 178) & 255) << 8;
+ rng->random_register ^= ((luma_num * 173 + 105) & 255);
}
static void generate_luma_grain_block(
- const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
- int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
- int left_pad, int top_pad, int right_pad, int bottom_pad) {
+ const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_luma,
+ int *luma_grain_block, int luma_block_size_y, int luma_block_size_x,
+ int luma_grain_stride, int left_pad, int top_pad, int right_pad,
+ int bottom_pad) {
if (params->num_y_points == 0) {
memset(luma_grain_block, 0,
sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
@@ -470,10 +468,13 @@ static void generate_luma_grain_block(
int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
int rounding_offset = (1 << (params->ar_coeff_shift - 1));
+ const int grain_min = -(1 << (bit_depth - 1));
+ const int grain_max = (1 << (bit_depth - 1)) - 1;
+
for (int i = 0; i < luma_block_size_y; i++)
for (int j = 0; j < luma_block_size_x; j++)
luma_grain_block[i * luma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
+ (gaussian_sequence[get_random_number(rng, gauss_bits)] +
((1 << gauss_sec_shift) >> 1)) >>
gauss_sec_shift;
@@ -494,7 +495,7 @@ static void generate_luma_grain_block(
}
static bool generate_chroma_grain_blocks(
- const aom_film_grain_t *params, int **pred_pos_chroma,
+ const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_chroma,
int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
@@ -507,13 +508,16 @@ static bool generate_chroma_grain_blocks(
int rounding_offset = (1 << (params->ar_coeff_shift - 1));
int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
+ const int grain_min = -(1 << (bit_depth - 1));
+ const int grain_max = (1 << (bit_depth - 1)) - 1;
+
if (params->num_cb_points || params->chroma_scaling_from_luma) {
- init_random_generator(7 << 5, params->random_seed);
+ init_random_generator(rng, 7 << 5, params->random_seed);
for (int i = 0; i < chroma_block_size_y; i++)
for (int j = 0; j < chroma_block_size_x; j++)
cb_grain_block[i * chroma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
+ (gaussian_sequence[get_random_number(rng, gauss_bits)] +
((1 << gauss_sec_shift) >> 1)) >>
gauss_sec_shift;
} else {
@@ -522,12 +526,12 @@ static bool generate_chroma_grain_blocks(
}
if (params->num_cr_points || params->chroma_scaling_from_luma) {
- init_random_generator(11 << 5, params->random_seed);
+ init_random_generator(rng, 11 << 5, params->random_seed);
for (int i = 0; i < chroma_block_size_y; i++)
for (int j = 0; j < chroma_block_size_x; j++)
cr_grain_block[i * chroma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
+ (gaussian_sequence[get_random_number(rng, gauss_bits)] +
((1 << gauss_sec_shift) >> 1)) >>
gauss_sec_shift;
} else {
@@ -613,7 +617,7 @@ static void init_scaling_function(const int scaling_points[][2], int num_points,
// function that extracts samples from a LUT (and interpolates intemediate
// frames for 10- and 12-bit video)
-static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
+static int scale_LUT(const int *scaling_lut, int index, int bit_depth) {
int x = index >> (bit_depth - 8);
if (!(bit_depth - 8) || x == 255)
@@ -625,10 +629,11 @@ static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
(bit_depth - 8));
}
-static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
- uint8_t *cb, uint8_t *cr, int luma_stride,
- int chroma_stride, int *luma_grain,
- int *cb_grain, int *cr_grain,
+static void add_noise_to_block(const aom_film_grain_t *params,
+ const aom_grain_scaling_lut_t *scaling_lut,
+ uint8_t *luma, uint8_t *cb, uint8_t *cr,
+ int luma_stride, int chroma_stride,
+ int *luma_grain, int *cb_grain, int *cr_grain,
int luma_grain_stride, int chroma_grain_stride,
int half_luma_height, int half_luma_width,
int bit_depth, int chroma_subsamp_y,
@@ -694,7 +699,7 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
if (apply_cb) {
cb[i * chroma_stride + j] = clamp(
cb[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cb,
+ ((scale_LUT(scaling_lut->cb,
clamp(((average_luma * cb_luma_mult +
cb_mult * cb[i * chroma_stride + j]) >>
6) +
@@ -710,7 +715,7 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
if (apply_cr) {
cr[i * chroma_stride + j] = clamp(
cr[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cr,
+ ((scale_LUT(scaling_lut->cr,
clamp(((average_luma * cr_luma_mult +
cr_mult * cr[i * chroma_stride + j]) >>
6) +
@@ -728,24 +733,25 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
if (apply_y) {
for (int i = 0; i < (half_luma_height << 1); i++) {
for (int j = 0; j < (half_luma_width << 1); j++) {
- luma[i * luma_stride + j] =
- clamp(luma[i * luma_stride + j] +
- ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
- luma_grain[i * luma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_luma, max_luma);
+ luma[i * luma_stride + j] = clamp(
+ luma[i * luma_stride + j] +
+ ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j], 8) *
+ luma_grain[i * luma_grain_stride + j] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
}
}
}
}
static void add_noise_to_block_hbd(
- const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
- int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
- int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
- int half_luma_height, int half_luma_width, int bit_depth,
- int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
+ const aom_film_grain_t *params, const aom_grain_scaling_lut_t *scaling_lut,
+ uint16_t *luma, uint16_t *cb, uint16_t *cr, int luma_stride,
+ int chroma_stride, int *luma_grain, int *cb_grain, int *cr_grain,
+ int luma_grain_stride, int chroma_grain_stride, int half_luma_height,
+ int half_luma_width, int bit_depth, int chroma_subsamp_y,
+ int chroma_subsamp_x, int mc_identity) {
int cb_mult = params->cb_mult - 128; // fixed scale
int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
// offset value depends on the bit depth
@@ -811,7 +817,7 @@ static void add_noise_to_block_hbd(
if (apply_cb) {
cb[i * chroma_stride + j] = clamp(
cb[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cb,
+ ((scale_LUT(scaling_lut->cb,
clamp(((average_luma * cb_luma_mult +
cb_mult * cb[i * chroma_stride + j]) >>
6) +
@@ -826,7 +832,7 @@ static void add_noise_to_block_hbd(
if (apply_cr) {
cr[i * chroma_stride + j] = clamp(
cr[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cr,
+ ((scale_LUT(scaling_lut->cr,
clamp(((average_luma * cr_luma_mult +
cr_mult * cr[i * chroma_stride + j]) >>
6) +
@@ -846,7 +852,7 @@ static void add_noise_to_block_hbd(
for (int j = 0; j < (half_luma_width << 1); j++) {
luma[i * luma_stride + j] =
clamp(luma[i * luma_stride + j] +
- ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
+ ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j],
bit_depth) *
luma_grain[i * luma_grain_stride + j] +
rounding_offset) >>
@@ -912,7 +918,7 @@ static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
static void ver_boundary_overlap(int *left_block, int left_stride,
int *right_block, int right_stride,
int *dst_block, int dst_stride, int width,
- int height) {
+ int height, int grain_min, int grain_max) {
if (width == 1) {
while (height) {
*dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
@@ -941,7 +947,7 @@ static void ver_boundary_overlap(int *left_block, int left_stride,
static void hor_boundary_overlap(int *top_block, int top_stride,
int *bottom_block, int bottom_stride,
int *dst_block, int dst_stride, int width,
- int height) {
+ int height, int grain_min, int grain_max) {
if (height == 1) {
while (width) {
*dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
@@ -1003,7 +1009,11 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
int *cb_col_buf;
int *cr_col_buf;
- random_register = params->random_seed;
+ aom_grain_scaling_lut_t scaling_lut;
+ memset(&scaling_lut, 0, sizeof(scaling_lut));
+
+ aom_grain_rng_t rng;
+ rng.random_register = params->random_seed;
int left_pad = 3;
int right_pad = 3; // padding to offset for AR coefficients
@@ -1012,11 +1022,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
int ar_padding = 3; // maximum lag used for stabilization of AR coefficients
- luma_subblock_size_y = 32;
- luma_subblock_size_x = 32;
-
- chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
- chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
+ const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
+ const int chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
// Initial padding is only needed for generation of
// film grain templates (to stabilize the AR process)
@@ -1040,9 +1047,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
int overlap = params->overlap_flag;
int bit_depth = params->bit_depth;
- const int grain_center = 128 << (bit_depth - 8);
- grain_min = 0 - grain_center;
- grain_max = grain_center - 1;
+ int grain_min = -(1 << (bit_depth - 1));
+ int grain_max = (1 << (bit_depth - 1)) - 1;
if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
&pred_pos_chroma, &luma_grain_block, &cb_grain_block,
@@ -1053,35 +1059,37 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
chroma_subsamp_x))
return -1;
- generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
+ generate_luma_grain_block(params, &rng, pred_pos_luma, luma_grain_block,
luma_block_size_y, luma_block_size_x,
luma_grain_stride, left_pad, top_pad, right_pad,
bottom_pad);
if (!generate_chroma_grain_blocks(
- params, pred_pos_chroma, luma_grain_block, cb_grain_block,
+ params, &rng, pred_pos_chroma, luma_grain_block, cb_grain_block,
cr_grain_block, luma_grain_stride, chroma_block_size_y,
chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
return -1;
init_scaling_function(params->scaling_points_y, params->num_y_points,
- scaling_lut_y);
+ scaling_lut.y);
if (params->chroma_scaling_from_luma) {
- memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
- memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
+ static_assert(sizeof(scaling_lut.cb) == sizeof(scaling_lut.y), "");
+ static_assert(sizeof(scaling_lut.cr) == sizeof(scaling_lut.y), "");
+ memcpy(scaling_lut.cb, scaling_lut.y, sizeof(scaling_lut.y));
+ memcpy(scaling_lut.cr, scaling_lut.y, sizeof(scaling_lut.y));
} else {
init_scaling_function(params->scaling_points_cb, params->num_cb_points,
- scaling_lut_cb);
+ scaling_lut.cb);
init_scaling_function(params->scaling_points_cr, params->num_cr_points,
- scaling_lut_cr);
+ scaling_lut.cr);
}
for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
- init_random_generator(y * 2, params->random_seed);
+ init_random_generator(&rng, y * 2, params->random_seed);
for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
- int offset_y = get_random_number(8);
+ int offset_y = get_random_number(&rng, 8);
int offset_x = (offset_y >> 4) & 15;
offset_y &= 15;
@@ -1099,7 +1107,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
luma_grain_block + luma_offset_y * luma_grain_stride +
luma_offset_x,
luma_grain_stride, y_col_buf, 2, 2,
- AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
+ AOMMIN(luma_subblock_size_y + 2, height - (y << 1)), grain_min,
+ grain_max);
ver_boundary_overlap(
cb_col_buf, 2 >> chroma_subsamp_x,
@@ -1108,7 +1117,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
2 >> chroma_subsamp_x,
AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
+ (height - (y << 1)) >> chroma_subsamp_y),
+ grain_min, grain_max);
ver_boundary_overlap(
cr_col_buf, 2 >> chroma_subsamp_x,
@@ -1117,13 +1127,14 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
2 >> chroma_subsamp_x,
AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
+ (height - (y << 1)) >> chroma_subsamp_y),
+ grain_min, grain_max);
int i = y ? 1 : 0;
if (use_high_bit_depth) {
add_noise_to_block_hbd(
- params,
+ params, &scaling_lut,
(uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
(uint16_t *)cb +
((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1139,7 +1150,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
} else {
add_noise_to_block(
- params, luma + ((y + i) << 1) * luma_stride + (x << 1),
+ params, &scaling_lut,
+ luma + ((y + i) << 1) * luma_stride + (x << 1),
cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
(x << (1 - chroma_subsamp_x)),
cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1156,19 +1168,20 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
if (overlap && y) {
if (x) {
hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
- y_line_buf + (x << 1), luma_stride, 2, 2);
+ y_line_buf + (x << 1), luma_stride, 2, 2,
+ grain_min, grain_max);
hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
cb_line_buf + x * (2 >> chroma_subsamp_x),
chroma_stride, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_y);
+ 2 >> chroma_subsamp_y, grain_min, grain_max);
hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
cr_line_buf + x * (2 >> chroma_subsamp_x),
chroma_stride, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_y);
+ 2 >> chroma_subsamp_y, grain_min, grain_max);
}
hor_boundary_overlap(
@@ -1178,7 +1191,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
width - ((x ? x + 1 : 0) << 1)),
- 2);
+ 2, grain_min, grain_max);
hor_boundary_overlap(
cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
@@ -1191,7 +1204,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
AOMMIN(chroma_subblock_size_x -
((x ? 1 : 0) << (1 - chroma_subsamp_x)),
(width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
- 2 >> chroma_subsamp_y);
+ 2 >> chroma_subsamp_y, grain_min, grain_max);
hor_boundary_overlap(
cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
@@ -1204,11 +1217,12 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
AOMMIN(chroma_subblock_size_x -
((x ? 1 : 0) << (1 - chroma_subsamp_x)),
(width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
- 2 >> chroma_subsamp_y);
+ 2 >> chroma_subsamp_y, grain_min, grain_max);
if (use_high_bit_depth) {
add_noise_to_block_hbd(
- params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
+ params, &scaling_lut,
+ (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
(uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
(x << ((1 - chroma_subsamp_x))),
(uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1221,7 +1235,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
chroma_subsamp_y, chroma_subsamp_x, mc_identity);
} else {
add_noise_to_block(
- params, luma + (y << 1) * luma_stride + (x << 1),
+ params, &scaling_lut, luma + (y << 1) * luma_stride + (x << 1),
cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
(x << ((1 - chroma_subsamp_x))),
cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1240,7 +1254,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
if (use_high_bit_depth) {
add_noise_to_block_hbd(
- params,
+ params, &scaling_lut,
(uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
(uint16_t *)cb +
((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1265,7 +1279,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
chroma_subsamp_y, chroma_subsamp_x, mc_identity);
} else {
add_noise_to_block(
- params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
+ params, &scaling_lut,
+ luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
((x + j) << (1 - chroma_subsamp_x)),
cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
diff --git a/test/grain_synthesis_race_test.cc b/test/grain_synthesis_race_test.cc
new file mode 100644
index 0000000000..8f9beda41e
--- /dev/null
+++ b/test/grain_synthesis_race_test.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2026, Alliance for Open Media. All rights reserved.
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Test that av1_add_film_grain is thread-safe when called concurrently
+// with images of different chroma subsampling (4:2:0 vs 4:4:4).
+// Reproduces the data race on static globals in grain_synthesis.c.
+
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "aom/aom_image.h"
+#include "aom_dsp/grain_params.h"
+#include "aom_util/aom_pthread.h"
+#include "av1/decoder/grain_synthesis.h"
+
+namespace {
+
+// Minimal film grain params that trigger the overlap path.
+aom_film_grain_t MakeGrainParams() {
+ aom_film_grain_t params = {};
+ params.apply_grain = 1;
+ params.update_parameters = 1;
+ params.num_y_points = 2;
+ params.scaling_points_y[0][0] = 0;
+ params.scaling_points_y[0][1] = 96;
+ params.scaling_points_y[1][0] = 255;
+ params.scaling_points_y[1][1] = 96;
+ params.num_cb_points = 2;
+ params.scaling_points_cb[0][0] = 0;
+ params.scaling_points_cb[0][1] = 64;
+ params.scaling_points_cb[1][0] = 255;
+ params.scaling_points_cb[1][1] = 64;
+ params.num_cr_points = 2;
+ params.scaling_points_cr[0][0] = 0;
+ params.scaling_points_cr[0][1] = 64;
+ params.scaling_points_cr[1][0] = 255;
+ params.scaling_points_cr[1][1] = 64;
+ params.scaling_shift = 11;
+ params.ar_coeff_lag = 1;
+ params.ar_coeff_shift = 7;
+ params.overlap_flag = 1;
+ params.bit_depth = 8;
+ params.random_seed = 7391;
+ params.cb_mult = 128;
+ params.cb_luma_mult = 192;
+ params.cb_offset = 256;
+ params.cr_mult = 128;
+ params.cr_luma_mult = 192;
+ params.cr_offset = 256;
+ return params;
+}
+
+void RunGrain(aom_img_fmt_t fmt, int iterations) {
+ constexpr int kWidth = 128;
+ constexpr int kHeight = 128;
+
+ aom_image_t src;
+ ASSERT_NE(aom_img_alloc(&src, fmt, kWidth, kHeight, 32), nullptr);
+ memset(src.planes[AOM_PLANE_Y], 128,
+ (size_t)src.stride[AOM_PLANE_Y] * kHeight);
+ const int chroma_h = (fmt == AOM_IMG_FMT_I420) ? kHeight / 2 : kHeight;
+ memset(src.planes[AOM_PLANE_U], 128,
+ (size_t)src.stride[AOM_PLANE_U] * chroma_h);
+ memset(src.planes[AOM_PLANE_V], 128,
+ (size_t)src.stride[AOM_PLANE_V] * chroma_h);
+ src.bit_depth = 8;
+ src.mc = AOM_CICP_MC_BT_709;
+
+ aom_image_t dst;
+ ASSERT_NE(aom_img_alloc(&dst, fmt, kWidth, kHeight, 32), nullptr);
+ dst.bit_depth = 8;
+ dst.mc = AOM_CICP_MC_BT_709;
+
+ aom_film_grain_t params = MakeGrainParams();
+
+ for (int i = 0; i < iterations; ++i) {
+ params.random_seed = (uint16_t)(7391 + i);
+ int ret = av1_add_film_grain(¶ms, &src, &dst);
+ ASSERT_EQ(ret, 0) << "av1_add_film_grain failed on iteration " << i;
+ }
+
+ aom_img_free(&src);
+ aom_img_free(&dst);
+}
+
+constexpr int kIterations = 200;
+
+THREADFN RunGrain420(void * /*arg*/) {
+ RunGrain(AOM_IMG_FMT_I420, kIterations);
+ return THREAD_EXIT_SUCCESS;
+}
+
+THREADFN RunGrain444(void * /*arg*/) {
+ RunGrain(AOM_IMG_FMT_I444, kIterations);
+ return THREAD_EXIT_SUCCESS;
+}
+
+TEST(GrainSynthesisRaceTest, ConcurrentDifferentSubsampling) {
+ constexpr int kThreads = 4;
+
+ std::vector<pthread_t> threads;
+ threads.reserve(kThreads * 2);
+
+ for (int i = 0; i < kThreads; ++i) {
+ pthread_t thread;
+ ASSERT_EQ(pthread_create(&thread, nullptr, RunGrain420, nullptr), 0);
+ threads.push_back(thread);
+ ASSERT_EQ(pthread_create(&thread, nullptr, RunGrain444, nullptr), 0);
+ threads.push_back(thread);
+ }
+
+ for (auto thread : threads) {
+ ASSERT_EQ(pthread_join(thread, nullptr), 0);
+ }
+}
+
+} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index f15db992d4..6224c54e6e 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -191,6 +191,11 @@ if(NOT BUILD_SHARED_LIBS)
add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_AVX2)
endif()
+ if(CONFIG_MULTITHREAD)
+ list(APPEND AOM_UNIT_TEST_DECODER_SOURCES
+ "${AOM_ROOT}/test/grain_synthesis_race_test.cc")
+ endif()
+
list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
"${AOM_ROOT}/test/arf_freq_test.cc"
"${AOM_ROOT}/test/av1_convolve_test.cc"