Dev news

Commit c6e6f60a84 for aom

commit c6e6f60a84c99e5fcc10523e3867e6a20d0140df
Author: Wan-Teh Chang <wtc@google.com>
Date:   Fri Apr 10 15:54:33 2026 -0700

    Make av1/decoder/grain_synthesis.c thread-safe

    Move all mutable static variables into local variables in
    add_film_grain_run() or other functions that use them.

    Remove the unneeded grain_center variable. See the AV2 spec pull request
    https://github.com/AOMediaCodec/av2-spec-internal/pull/1093.

    Adapted from the patches by Chun-Min Chang of Mozilla.

    Bug: 501044174
    Change-Id: I27835e84d849d3a2146b4ba998dfa81ee084ab81

diff --git a/av1/decoder/grain_synthesis.c b/av1/decoder/grain_synthesis.c
index bbf5895ff9..a1ab1bf7ab 100644
--- a/av1/decoder/grain_synthesis.c
+++ b/av1/decoder/grain_synthesis.c
@@ -218,11 +218,8 @@ static const int gaussian_sequence[2048] = {

 static const int gauss_bits = 11;

-static int luma_subblock_size_y = 32;
-static int luma_subblock_size_x = 32;
-
-static int chroma_subblock_size_y = 16;
-static int chroma_subblock_size_x = 16;
+static const int luma_subblock_size_y = 32;
+static const int luma_subblock_size_x = 32;

 static const int min_luma_legal_range = 16;
 static const int max_luma_legal_range = 235;
@@ -230,14 +227,15 @@ static const int max_luma_legal_range = 235;
 static const int min_chroma_legal_range = 16;
 static const int max_chroma_legal_range = 240;

-static int scaling_lut_y[256];
-static int scaling_lut_cb[256];
-static int scaling_lut_cr[256];
-
-static int grain_min;
-static int grain_max;
+typedef struct {
+  int y[256];
+  int cb[256];
+  int cr[256];
+} aom_grain_scaling_lut_t;

-static uint16_t random_register = 0;  // random number generator register
+typedef struct {
+  uint16_t random_register;  // random number generator register
+} aom_grain_rng_t;

 static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
                            int ***pred_pos_chroma, int **luma_grain_block,
@@ -313,9 +311,7 @@ static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
   *cb_col_buf = NULL;
   *cr_col_buf = NULL;

-  memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
-  memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
-  memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
+  const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;

   int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
   int num_pos_chroma = num_pos_luma;
@@ -430,34 +426,36 @@ static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
 }

 // get a number between 0 and 2^bits - 1
-static inline int get_random_number(int bits) {
+static inline int get_random_number(aom_grain_rng_t *rng, int bits) {
   uint16_t bit;
-  bit = ((random_register >> 0) ^ (random_register >> 1) ^
-         (random_register >> 3) ^ (random_register >> 12)) &
+  bit = ((rng->random_register >> 0) ^ (rng->random_register >> 1) ^
+         (rng->random_register >> 3) ^ (rng->random_register >> 12)) &
         1;
-  random_register = (random_register >> 1) | (bit << 15);
-  return (random_register >> (16 - bits)) & ((1 << bits) - 1);
+  rng->random_register = (rng->random_register >> 1) | (bit << 15);
+  return (rng->random_register >> (16 - bits)) & ((1 << bits) - 1);
 }

-static void init_random_generator(int luma_line, uint16_t seed) {
+static void init_random_generator(aom_grain_rng_t *rng, int luma_line,
+                                  uint16_t seed) {
   // same for the picture

   uint16_t msb = (seed >> 8) & 255;
   uint16_t lsb = seed & 255;

-  random_register = (msb << 8) + lsb;
+  rng->random_register = (msb << 8) + lsb;

   //  changes for each row
   int luma_num = luma_line >> 5;

-  random_register ^= ((luma_num * 37 + 178) & 255) << 8;
-  random_register ^= ((luma_num * 173 + 105) & 255);
+  rng->random_register ^= ((luma_num * 37 + 178) & 255) << 8;
+  rng->random_register ^= ((luma_num * 173 + 105) & 255);
 }

 static void generate_luma_grain_block(
-    const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
-    int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
-    int left_pad, int top_pad, int right_pad, int bottom_pad) {
+    const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_luma,
+    int *luma_grain_block, int luma_block_size_y, int luma_block_size_x,
+    int luma_grain_stride, int left_pad, int top_pad, int right_pad,
+    int bottom_pad) {
   if (params->num_y_points == 0) {
     memset(luma_grain_block, 0,
            sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
@@ -470,10 +468,13 @@ static void generate_luma_grain_block(
   int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
   int rounding_offset = (1 << (params->ar_coeff_shift - 1));

+  const int grain_min = -(1 << (bit_depth - 1));
+  const int grain_max = (1 << (bit_depth - 1)) - 1;
+
   for (int i = 0; i < luma_block_size_y; i++)
     for (int j = 0; j < luma_block_size_x; j++)
       luma_grain_block[i * luma_grain_stride + j] =
-          (gaussian_sequence[get_random_number(gauss_bits)] +
+          (gaussian_sequence[get_random_number(rng, gauss_bits)] +
            ((1 << gauss_sec_shift) >> 1)) >>
           gauss_sec_shift;

@@ -494,7 +495,7 @@ static void generate_luma_grain_block(
 }

 static bool generate_chroma_grain_blocks(
-    const aom_film_grain_t *params, int **pred_pos_chroma,
+    const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_chroma,
     int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
     int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
     int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
@@ -507,13 +508,16 @@ static bool generate_chroma_grain_blocks(
   int rounding_offset = (1 << (params->ar_coeff_shift - 1));
   int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;

+  const int grain_min = -(1 << (bit_depth - 1));
+  const int grain_max = (1 << (bit_depth - 1)) - 1;
+
   if (params->num_cb_points || params->chroma_scaling_from_luma) {
-    init_random_generator(7 << 5, params->random_seed);
+    init_random_generator(rng, 7 << 5, params->random_seed);

     for (int i = 0; i < chroma_block_size_y; i++)
       for (int j = 0; j < chroma_block_size_x; j++)
         cb_grain_block[i * chroma_grain_stride + j] =
-            (gaussian_sequence[get_random_number(gauss_bits)] +
+            (gaussian_sequence[get_random_number(rng, gauss_bits)] +
              ((1 << gauss_sec_shift) >> 1)) >>
             gauss_sec_shift;
   } else {
@@ -522,12 +526,12 @@ static bool generate_chroma_grain_blocks(
   }

   if (params->num_cr_points || params->chroma_scaling_from_luma) {
-    init_random_generator(11 << 5, params->random_seed);
+    init_random_generator(rng, 11 << 5, params->random_seed);

     for (int i = 0; i < chroma_block_size_y; i++)
       for (int j = 0; j < chroma_block_size_x; j++)
         cr_grain_block[i * chroma_grain_stride + j] =
-            (gaussian_sequence[get_random_number(gauss_bits)] +
+            (gaussian_sequence[get_random_number(rng, gauss_bits)] +
              ((1 << gauss_sec_shift) >> 1)) >>
             gauss_sec_shift;
   } else {
@@ -613,7 +617,7 @@ static void init_scaling_function(const int scaling_points[][2], int num_points,

 // function that extracts samples from a LUT (and interpolates intemediate
 // frames for 10- and 12-bit video)
-static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
+static int scale_LUT(const int *scaling_lut, int index, int bit_depth) {
   int x = index >> (bit_depth - 8);

   if (!(bit_depth - 8) || x == 255)
@@ -625,10 +629,11 @@ static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
                              (bit_depth - 8));
 }

-static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
-                               uint8_t *cb, uint8_t *cr, int luma_stride,
-                               int chroma_stride, int *luma_grain,
-                               int *cb_grain, int *cr_grain,
+static void add_noise_to_block(const aom_film_grain_t *params,
+                               const aom_grain_scaling_lut_t *scaling_lut,
+                               uint8_t *luma, uint8_t *cb, uint8_t *cr,
+                               int luma_stride, int chroma_stride,
+                               int *luma_grain, int *cb_grain, int *cr_grain,
                                int luma_grain_stride, int chroma_grain_stride,
                                int half_luma_height, int half_luma_width,
                                int bit_depth, int chroma_subsamp_y,
@@ -694,7 +699,7 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
       if (apply_cb) {
         cb[i * chroma_stride + j] = clamp(
             cb[i * chroma_stride + j] +
-                ((scale_LUT(scaling_lut_cb,
+                ((scale_LUT(scaling_lut->cb,
                             clamp(((average_luma * cb_luma_mult +
                                     cb_mult * cb[i * chroma_stride + j]) >>
                                    6) +
@@ -710,7 +715,7 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
       if (apply_cr) {
         cr[i * chroma_stride + j] = clamp(
             cr[i * chroma_stride + j] +
-                ((scale_LUT(scaling_lut_cr,
+                ((scale_LUT(scaling_lut->cr,
                             clamp(((average_luma * cr_luma_mult +
                                     cr_mult * cr[i * chroma_stride + j]) >>
                                    6) +
@@ -728,24 +733,25 @@ static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
   if (apply_y) {
     for (int i = 0; i < (half_luma_height << 1); i++) {
       for (int j = 0; j < (half_luma_width << 1); j++) {
-        luma[i * luma_stride + j] =
-            clamp(luma[i * luma_stride + j] +
-                      ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
-                            luma_grain[i * luma_grain_stride + j] +
-                        rounding_offset) >>
-                       params->scaling_shift),
-                  min_luma, max_luma);
+        luma[i * luma_stride + j] = clamp(
+            luma[i * luma_stride + j] +
+                ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j], 8) *
+                      luma_grain[i * luma_grain_stride + j] +
+                  rounding_offset) >>
+                 params->scaling_shift),
+            min_luma, max_luma);
       }
     }
   }
 }

 static void add_noise_to_block_hbd(
-    const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
-    int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
-    int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
-    int half_luma_height, int half_luma_width, int bit_depth,
-    int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
+    const aom_film_grain_t *params, const aom_grain_scaling_lut_t *scaling_lut,
+    uint16_t *luma, uint16_t *cb, uint16_t *cr, int luma_stride,
+    int chroma_stride, int *luma_grain, int *cb_grain, int *cr_grain,
+    int luma_grain_stride, int chroma_grain_stride, int half_luma_height,
+    int half_luma_width, int bit_depth, int chroma_subsamp_y,
+    int chroma_subsamp_x, int mc_identity) {
   int cb_mult = params->cb_mult - 128;            // fixed scale
   int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
   // offset value depends on the bit depth
@@ -811,7 +817,7 @@ static void add_noise_to_block_hbd(
       if (apply_cb) {
         cb[i * chroma_stride + j] = clamp(
             cb[i * chroma_stride + j] +
-                ((scale_LUT(scaling_lut_cb,
+                ((scale_LUT(scaling_lut->cb,
                             clamp(((average_luma * cb_luma_mult +
                                     cb_mult * cb[i * chroma_stride + j]) >>
                                    6) +
@@ -826,7 +832,7 @@ static void add_noise_to_block_hbd(
       if (apply_cr) {
         cr[i * chroma_stride + j] = clamp(
             cr[i * chroma_stride + j] +
-                ((scale_LUT(scaling_lut_cr,
+                ((scale_LUT(scaling_lut->cr,
                             clamp(((average_luma * cr_luma_mult +
                                     cr_mult * cr[i * chroma_stride + j]) >>
                                    6) +
@@ -846,7 +852,7 @@ static void add_noise_to_block_hbd(
       for (int j = 0; j < (half_luma_width << 1); j++) {
         luma[i * luma_stride + j] =
             clamp(luma[i * luma_stride + j] +
-                      ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
+                      ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j],
                                   bit_depth) *
                             luma_grain[i * luma_grain_stride + j] +
                         rounding_offset) >>
@@ -912,7 +918,7 @@ static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
 static void ver_boundary_overlap(int *left_block, int left_stride,
                                  int *right_block, int right_stride,
                                  int *dst_block, int dst_stride, int width,
-                                 int height) {
+                                 int height, int grain_min, int grain_max) {
   if (width == 1) {
     while (height) {
       *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
@@ -941,7 +947,7 @@ static void ver_boundary_overlap(int *left_block, int left_stride,
 static void hor_boundary_overlap(int *top_block, int top_stride,
                                  int *bottom_block, int bottom_stride,
                                  int *dst_block, int dst_stride, int width,
-                                 int height) {
+                                 int height, int grain_min, int grain_max) {
   if (height == 1) {
     while (width) {
       *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
@@ -1003,7 +1009,11 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
   int *cb_col_buf;
   int *cr_col_buf;

-  random_register = params->random_seed;
+  aom_grain_scaling_lut_t scaling_lut;
+  memset(&scaling_lut, 0, sizeof(scaling_lut));
+
+  aom_grain_rng_t rng;
+  rng.random_register = params->random_seed;

   int left_pad = 3;
   int right_pad = 3;  // padding to offset for AR coefficients
@@ -1012,11 +1022,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,

   int ar_padding = 3;  // maximum lag used for stabilization of AR coefficients

-  luma_subblock_size_y = 32;
-  luma_subblock_size_x = 32;
-
-  chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
-  chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
+  const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
+  const int chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;

   // Initial padding is only needed for generation of
   // film grain templates (to stabilize the AR process)
@@ -1040,9 +1047,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
   int overlap = params->overlap_flag;
   int bit_depth = params->bit_depth;

-  const int grain_center = 128 << (bit_depth - 8);
-  grain_min = 0 - grain_center;
-  grain_max = grain_center - 1;
+  int grain_min = -(1 << (bit_depth - 1));
+  int grain_max = (1 << (bit_depth - 1)) - 1;

   if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
                    &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
@@ -1053,35 +1059,37 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
                    chroma_subsamp_x))
     return -1;

-  generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
+  generate_luma_grain_block(params, &rng, pred_pos_luma, luma_grain_block,
                             luma_block_size_y, luma_block_size_x,
                             luma_grain_stride, left_pad, top_pad, right_pad,
                             bottom_pad);

   if (!generate_chroma_grain_blocks(
-          params, pred_pos_chroma, luma_grain_block, cb_grain_block,
+          params, &rng, pred_pos_chroma, luma_grain_block, cb_grain_block,
           cr_grain_block, luma_grain_stride, chroma_block_size_y,
           chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
           right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
     return -1;

   init_scaling_function(params->scaling_points_y, params->num_y_points,
-                        scaling_lut_y);
+                        scaling_lut.y);

   if (params->chroma_scaling_from_luma) {
-    memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
-    memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
+    static_assert(sizeof(scaling_lut.cb) == sizeof(scaling_lut.y), "");
+    static_assert(sizeof(scaling_lut.cr) == sizeof(scaling_lut.y), "");
+    memcpy(scaling_lut.cb, scaling_lut.y, sizeof(scaling_lut.y));
+    memcpy(scaling_lut.cr, scaling_lut.y, sizeof(scaling_lut.y));
   } else {
     init_scaling_function(params->scaling_points_cb, params->num_cb_points,
-                          scaling_lut_cb);
+                          scaling_lut.cb);
     init_scaling_function(params->scaling_points_cr, params->num_cr_points,
-                          scaling_lut_cr);
+                          scaling_lut.cr);
   }
   for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
-    init_random_generator(y * 2, params->random_seed);
+    init_random_generator(&rng, y * 2, params->random_seed);

     for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
-      int offset_y = get_random_number(8);
+      int offset_y = get_random_number(&rng, 8);
       int offset_x = (offset_y >> 4) & 15;
       offset_y &= 15;

@@ -1099,7 +1107,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             luma_grain_block + luma_offset_y * luma_grain_stride +
                 luma_offset_x,
             luma_grain_stride, y_col_buf, 2, 2,
-            AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
+            AOMMIN(luma_subblock_size_y + 2, height - (y << 1)), grain_min,
+            grain_max);

         ver_boundary_overlap(
             cb_col_buf, 2 >> chroma_subsamp_x,
@@ -1108,7 +1117,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
             2 >> chroma_subsamp_x,
             AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
-                   (height - (y << 1)) >> chroma_subsamp_y));
+                   (height - (y << 1)) >> chroma_subsamp_y),
+            grain_min, grain_max);

         ver_boundary_overlap(
             cr_col_buf, 2 >> chroma_subsamp_x,
@@ -1117,13 +1127,14 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
             2 >> chroma_subsamp_x,
             AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
-                   (height - (y << 1)) >> chroma_subsamp_y));
+                   (height - (y << 1)) >> chroma_subsamp_y),
+            grain_min, grain_max);

         int i = y ? 1 : 0;

         if (use_high_bit_depth) {
           add_noise_to_block_hbd(
-              params,
+              params, &scaling_lut,
               (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
               (uint16_t *)cb +
                   ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1139,7 +1150,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
               bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
         } else {
           add_noise_to_block(
-              params, luma + ((y + i) << 1) * luma_stride + (x << 1),
+              params, &scaling_lut,
+              luma + ((y + i) << 1) * luma_stride + (x << 1),
               cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
                   (x << (1 - chroma_subsamp_x)),
               cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1156,19 +1168,20 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
       if (overlap && y) {
         if (x) {
           hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
-                               y_line_buf + (x << 1), luma_stride, 2, 2);
+                               y_line_buf + (x << 1), luma_stride, 2, 2,
+                               grain_min, grain_max);

           hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
                                chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
                                cb_line_buf + x * (2 >> chroma_subsamp_x),
                                chroma_stride, 2 >> chroma_subsamp_x,
-                               2 >> chroma_subsamp_y);
+                               2 >> chroma_subsamp_y, grain_min, grain_max);

           hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
                                chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
                                cr_line_buf + x * (2 >> chroma_subsamp_x),
                                chroma_stride, 2 >> chroma_subsamp_x,
-                               2 >> chroma_subsamp_y);
+                               2 >> chroma_subsamp_y, grain_min, grain_max);
         }

         hor_boundary_overlap(
@@ -1178,7 +1191,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
             AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
                    width - ((x ? x + 1 : 0) << 1)),
-            2);
+            2, grain_min, grain_max);

         hor_boundary_overlap(
             cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
@@ -1191,7 +1204,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             AOMMIN(chroma_subblock_size_x -
                        ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
                    (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
-            2 >> chroma_subsamp_y);
+            2 >> chroma_subsamp_y, grain_min, grain_max);

         hor_boundary_overlap(
             cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
@@ -1204,11 +1217,12 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             AOMMIN(chroma_subblock_size_x -
                        ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
                    (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
-            2 >> chroma_subsamp_y);
+            2 >> chroma_subsamp_y, grain_min, grain_max);

         if (use_high_bit_depth) {
           add_noise_to_block_hbd(
-              params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
+              params, &scaling_lut,
+              (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
               (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
                   (x << ((1 - chroma_subsamp_x))),
               (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1221,7 +1235,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
               chroma_subsamp_y, chroma_subsamp_x, mc_identity);
         } else {
           add_noise_to_block(
-              params, luma + (y << 1) * luma_stride + (x << 1),
+              params, &scaling_lut, luma + (y << 1) * luma_stride + (x << 1),
               cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
                   (x << ((1 - chroma_subsamp_x))),
               cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1240,7 +1254,7 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,

       if (use_high_bit_depth) {
         add_noise_to_block_hbd(
-            params,
+            params, &scaling_lut,
             (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
             (uint16_t *)cb +
                 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
@@ -1265,7 +1279,8 @@ static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
             chroma_subsamp_y, chroma_subsamp_x, mc_identity);
       } else {
         add_noise_to_block(
-            params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
+            params, &scaling_lut,
+            luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
             cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
                 ((x + j) << (1 - chroma_subsamp_x)),
             cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
diff --git a/test/grain_synthesis_race_test.cc b/test/grain_synthesis_race_test.cc
new file mode 100644
index 0000000000..8f9beda41e
--- /dev/null
+++ b/test/grain_synthesis_race_test.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2026, Alliance for Open Media. All rights reserved.
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Test that av1_add_film_grain is thread-safe when called concurrently
+// with images of different chroma subsampling (4:2:0 vs 4:4:4).
+// Reproduces the data race on static globals in grain_synthesis.c.
+
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "aom/aom_image.h"
+#include "aom_dsp/grain_params.h"
+#include "aom_util/aom_pthread.h"
+#include "av1/decoder/grain_synthesis.h"
+
+namespace {
+
+// Minimal film grain params that trigger the overlap path.
+aom_film_grain_t MakeGrainParams() {
+  aom_film_grain_t params = {};
+  params.apply_grain = 1;
+  params.update_parameters = 1;
+  params.num_y_points = 2;
+  params.scaling_points_y[0][0] = 0;
+  params.scaling_points_y[0][1] = 96;
+  params.scaling_points_y[1][0] = 255;
+  params.scaling_points_y[1][1] = 96;
+  params.num_cb_points = 2;
+  params.scaling_points_cb[0][0] = 0;
+  params.scaling_points_cb[0][1] = 64;
+  params.scaling_points_cb[1][0] = 255;
+  params.scaling_points_cb[1][1] = 64;
+  params.num_cr_points = 2;
+  params.scaling_points_cr[0][0] = 0;
+  params.scaling_points_cr[0][1] = 64;
+  params.scaling_points_cr[1][0] = 255;
+  params.scaling_points_cr[1][1] = 64;
+  params.scaling_shift = 11;
+  params.ar_coeff_lag = 1;
+  params.ar_coeff_shift = 7;
+  params.overlap_flag = 1;
+  params.bit_depth = 8;
+  params.random_seed = 7391;
+  params.cb_mult = 128;
+  params.cb_luma_mult = 192;
+  params.cb_offset = 256;
+  params.cr_mult = 128;
+  params.cr_luma_mult = 192;
+  params.cr_offset = 256;
+  return params;
+}
+
+void RunGrain(aom_img_fmt_t fmt, int iterations) {
+  constexpr int kWidth = 128;
+  constexpr int kHeight = 128;
+
+  aom_image_t src;
+  ASSERT_NE(aom_img_alloc(&src, fmt, kWidth, kHeight, 32), nullptr);
+  memset(src.planes[AOM_PLANE_Y], 128,
+         (size_t)src.stride[AOM_PLANE_Y] * kHeight);
+  const int chroma_h = (fmt == AOM_IMG_FMT_I420) ? kHeight / 2 : kHeight;
+  memset(src.planes[AOM_PLANE_U], 128,
+         (size_t)src.stride[AOM_PLANE_U] * chroma_h);
+  memset(src.planes[AOM_PLANE_V], 128,
+         (size_t)src.stride[AOM_PLANE_V] * chroma_h);
+  src.bit_depth = 8;
+  src.mc = AOM_CICP_MC_BT_709;
+
+  aom_image_t dst;
+  ASSERT_NE(aom_img_alloc(&dst, fmt, kWidth, kHeight, 32), nullptr);
+  dst.bit_depth = 8;
+  dst.mc = AOM_CICP_MC_BT_709;
+
+  aom_film_grain_t params = MakeGrainParams();
+
+  for (int i = 0; i < iterations; ++i) {
+    params.random_seed = (uint16_t)(7391 + i);
+    int ret = av1_add_film_grain(&params, &src, &dst);
+    ASSERT_EQ(ret, 0) << "av1_add_film_grain failed on iteration " << i;
+  }
+
+  aom_img_free(&src);
+  aom_img_free(&dst);
+}
+
+constexpr int kIterations = 200;
+
+THREADFN RunGrain420(void * /*arg*/) {
+  RunGrain(AOM_IMG_FMT_I420, kIterations);
+  return THREAD_EXIT_SUCCESS;
+}
+
+THREADFN RunGrain444(void * /*arg*/) {
+  RunGrain(AOM_IMG_FMT_I444, kIterations);
+  return THREAD_EXIT_SUCCESS;
+}
+
+TEST(GrainSynthesisRaceTest, ConcurrentDifferentSubsampling) {
+  constexpr int kThreads = 4;
+
+  std::vector<pthread_t> threads;
+  threads.reserve(kThreads * 2);
+
+  for (int i = 0; i < kThreads; ++i) {
+    pthread_t thread;
+    ASSERT_EQ(pthread_create(&thread, nullptr, RunGrain420, nullptr), 0);
+    threads.push_back(thread);
+    ASSERT_EQ(pthread_create(&thread, nullptr, RunGrain444, nullptr), 0);
+    threads.push_back(thread);
+  }
+
+  for (auto thread : threads) {
+    ASSERT_EQ(pthread_join(thread, nullptr), 0);
+  }
+}
+
+}  // namespace
diff --git a/test/test.cmake b/test/test.cmake
index f15db992d4..6224c54e6e 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -191,6 +191,11 @@ if(NOT BUILD_SHARED_LIBS)
     add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_AVX2)
   endif()

+  if(CONFIG_MULTITHREAD)
+    list(APPEND AOM_UNIT_TEST_DECODER_SOURCES
+                "${AOM_ROOT}/test/grain_synthesis_race_test.cc")
+  endif()
+
   list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
               "${AOM_ROOT}/test/arf_freq_test.cc"
               "${AOM_ROOT}/test/av1_convolve_test.cc"