Commit 673071a141 for aom

commit 673071a1411e240e3cf13982ff2a1eb3e3438dd1
Author: Yunqing Wang <yunqingwang@google.com>
Date:   Mon May 18 11:34:43 2026 -0700

    Weighted chroma distortion

    This optimization is enabled for low-complexity mode at
    speed 3. This gives over 2% AVG decoder time reduction
    with a reasonable complexity-to-efficiency tradeoff.

    Bug: 505128814

    STATS_CHANGED for speed 3 LC mode

    Change-Id: Ib1a25a22d9d96ba5d24df7c7f67c3318e15043de

diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index c85cc9a8a5..55b17fce52 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -179,6 +179,32 @@ static inline void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
 #endif
 }

+static inline void av1_merge_rd_stats_weighted(RD_STATS *rd_stats_dst,
+                                               const RD_STATS *rd_stats_src) {
+  if (rd_stats_dst->rate == INT_MAX || rd_stats_src->rate == INT_MAX) {
+    // If rd_stats_dst or rd_stats_src has invalid rate, we will make
+    // rd_stats_dst invalid.
+    av1_invalid_rd_stats(rd_stats_dst);
+    return;
+  }
+  rd_stats_dst->rate = (int)AOMMIN(
+      ((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX);
+  if (!rd_stats_dst->zero_rate)
+    rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
+  rd_stats_dst->dist += rd_stats_src->dist * 15 / 16;
+  if (rd_stats_dst->sse < INT64_MAX && rd_stats_src->sse < INT64_MAX) {
+    rd_stats_dst->sse += rd_stats_src->sse * 15 / 16;
+  }
+  rd_stats_dst->skip_txfm &= rd_stats_src->skip_txfm;
+#if CONFIG_RD_DEBUG
+  // This may run into problems when monochrome video is
+  // encoded, as there will only be 1 plane
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
+  }
+#endif
+}
+
 static inline void av1_accumulate_rd_stats(RD_STATS *rd_stats, int64_t dist,
                                            int rate, int skip_txfm, int64_t sse,
                                            int zero_rate) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 706e08e316..62ef4815af 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3974,34 +3974,41 @@ static inline void refine_winner_mode_tx(
       const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;

       const ModeCosts *mode_costs = &x->mode_costs;
+      int64_t this_dist = rd_stats_y.dist + rd_stats_uv.dist;
+      int64_t this_sse = rd_stats_y.sse + rd_stats_uv.sse;
+      if (cpi->sf.hl_sf.weighted_chroma_distortion) {
+        this_dist = rd_stats_y.dist + rd_stats_uv.dist * 15 / 16;
+        this_sse = rd_stats_y.sse + rd_stats_uv.sse * 15 / 16;
+      }
+
       if (is_inter_mode(mbmi->mode) &&
           (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
           RDCOST(x->rdmult,
                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
                      rd_stats_uv.rate,
-                 (rd_stats_y.dist + rd_stats_uv.dist)) >
-              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
-                     (rd_stats_y.sse + rd_stats_uv.sse))) {
+                 this_dist) > RDCOST(x->rdmult,
+                                     mode_costs->skip_txfm_cost[skip_ctx][1],
+                                     this_sse)) {
         skip_blk = 1;
         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
         rd_stats_uv.rate = 0;
         rd_stats_y.dist = rd_stats_y.sse;
         rd_stats_uv.dist = rd_stats_uv.sse;
+        this_dist = this_sse;
       } else {
         skip_blk = 0;
         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
       }
       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
                       winner_rate_y - winner_rate_uv;
-      int64_t this_rd =
-          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
+      int64_t this_rd = RDCOST(x->rdmult, this_rate, this_dist);
       if (best_rd > this_rd) {
         *best_mbmode = *mbmi;
         *best_mode_index = winner_mode_index;
         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
         rd_cost->rate = this_rate;
-        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
+        rd_cost->dist = this_dist;
+        rd_cost->sse = this_sse;
         rd_cost->rdcost = this_rd;
         best_rd = this_rd;
         *best_skip2 = skip_blk;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 65189e2df6..ebed82e788 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -704,6 +704,8 @@ static void set_good_speed_features_lc_dec_framesize_independent(
       (update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE)
           ? 1
           : 0;
+
+  if (speed == 3) sf->hl_sf.weighted_chroma_distortion = 1;
 }

 static void set_good_speed_feature_framesize_dependent(
@@ -2253,6 +2255,7 @@ static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
   hl_sf->allow_sub_blk_me_in_tf = 0;
   hl_sf->ref_frame_mvs_lvl = 0;
   hl_sf->screen_detection_mode2_fast_detection = 0;
+  hl_sf->weighted_chroma_distortion = 0;
 }

 static inline void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 47ef9be7fc..3b35fc21e7 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -504,6 +504,13 @@ typedef struct HIGH_LEVEL_SPEED_FEATURES {
    *  1: Fast detection
    */
   int screen_detection_mode2_fast_detection;
+
+  /*!
+   *  Decide whether to enable weighted chroma distortion.
+   *  0: Disable
+   *  1: Enable
+   */
+  int weighted_chroma_distortion;
 } HIGH_LEVEL_SPEED_FEATURES;

 /*!
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 48ced2426d..df78888ea0 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -3855,7 +3855,14 @@ int av1_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
     const int is_cost_valid_uv =
         av1_txfm_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_chroma_rd);
     if (!is_cost_valid_uv) return 0;
-    av1_merge_rd_stats(rd_stats, rd_stats_uv);
+
+    if (cpi->sf.hl_sf.weighted_chroma_distortion) {
+      // Apply weighted distortion/SSE accumulation while merging uv rd stats to
+      // y rd stats.
+      av1_merge_rd_stats_weighted(rd_stats, rd_stats_uv);
+    } else {
+      av1_merge_rd_stats(rd_stats, rd_stats_uv);
+    }
   }

   int choose_skip_txfm = rd_stats->skip_txfm;