Commit e7837adfe8 for aom

commit e7837adfe8d62f58dab48f991f93805d9d220f6c
Author: Julio Barba <juliobbv@gmail.com>
Date:   Sun Mar 22 22:16:58 2026 -0400

    Optimize tune IQ/SSIMULACRA2 for realtime mode

    Up to 30% file size savings for the same SSIMULACRA2 score on
    two-layer progressive image encoding in 4:4:4 chroma subsampling
    mode (Daala's subset1).

    Change-Id: Iad3721ec3eed592758f678dab9c8c2ac9274d237

diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index d3aabdb5e0..99f90fe641 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -647,7 +647,8 @@ void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
         qindex2, cm->seq_params->bit_depth,
         cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
         boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-        is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+        is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+        cpi->oxcf.mode);

     av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index e7367c806b..aeda245241 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -826,7 +826,8 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
       qindex_rdmult, cm->seq_params->bit_depth,
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+      cpi->oxcf.mode);

   const int qindex_change = x->qindex != qindex;
   if (qindex_change || do_update) {
diff --git a/av1/encoder/enc_enums.h b/av1/encoder/enc_enums.h
index 98072b3179..142fce61e8 100644
--- a/av1/encoder/enc_enums.h
+++ b/av1/encoder/enc_enums.h
@@ -265,6 +265,17 @@ enum {
   USE_LARGESTALL,
 } UENUM1BYTE(TX_SIZE_SEARCH_METHOD);

+enum {
+  // Good Quality Fast Encoding. The encoder balances quality with the amount of
+  // time it takes to encode the output. Speed setting controls how fast.
+  GOOD,
+  // Realtime Fast Encoding. Will force some restrictions on bitrate
+  // constraints.
+  REALTIME,
+  // All intra mode. All the frames are coded as intra frames.
+  ALLINTRA
+} UENUM1BYTE(MODE);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 46d36c85fd..8cd163ad18 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -318,7 +318,7 @@ static inline int set_rdmult(const AV1_COMP *const cpi,
   return av1_compute_rd_mult(
       qindex, bit_depth, update_type, layer_depth, boost_index, frame_type,
       cpi->oxcf.q_cfg.use_fixed_qp_offsets, is_stat_consumption_stage(cpi),
-      cpi->oxcf.tune_cfg.tuning);
+      cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);
 }

 static inline int do_split_check(BLOCK_SIZE bsize) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 85895ada3d..5093bfd243 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3910,7 +3910,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
     // Note: Both use common rdmult based on base qindex of fullres.
     const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
         bit_depth, update_type, cm->quant_params.base_qindex,
-        cpi->oxcf.tune_cfg.tuning);
+        cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);

     // Find the best rdcost among all superres denoms.
     int best_denom = -1;
@@ -3975,7 +3975,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
     // Note: Both use common rdmult based on base qindex of fullres.
     const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
         bit_depth, update_type, cm->quant_params.base_qindex,
-        cpi->oxcf.tune_cfg.tuning);
+        cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);
     proj_rdcost1 =
         RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1, bit_depth);
     const double proj_rdcost2 =
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 77df0a9fc5..0bac37011e 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -107,17 +107,6 @@ typedef struct aom_rational64 {
   int den;           // fraction denominator
 } aom_rational64_t;  // alias for struct aom_rational

-enum {
-  // Good Quality Fast Encoding. The encoder balances quality with the amount of
-  // time it takes to encode the output. Speed setting controls how fast.
-  GOOD,
-  // Realtime Fast Encoding. Will force some restrictions on bitrate
-  // constraints.
-  REALTIME,
-  // All intra mode. All the frames are coded as intra frames.
-  ALLINTRA
-} UENUM1BYTE(MODE);
-
 enum {
   FRAMEFLAGS_KEY = 1 << 0,
   FRAMEFLAGS_GOLDEN = 1 << 1,
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 2bc36d3937..9655c0395d 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -526,7 +526,8 @@ void av1_apply_roi_map(AV1_COMP *cpi) {
           qindex, cm->seq_params->bit_depth,
           cpi->ppi->gf_group.update_type[cpi->gf_frame_index], 0, 15,
           INTER_FRAME, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-          is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+          is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+          cpi->oxcf.mode);
     }
   } else {
     av1_disable_segmentation(seg);
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index fcca31242e..7005da4f60 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -386,7 +386,8 @@ static double def_kf_rd_multiplier(int qindex) {

 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
                                         FRAME_UPDATE_TYPE update_type,
-                                        int qindex, aom_tune_metric tuning) {
+                                        int qindex, aom_tune_metric tuning,
+                                        MODE mode) {
   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
   int64_t rdmult = q * q;
   if (update_type == KF_UPDATE) {
@@ -401,21 +402,32 @@ int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
   }

   if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
-    // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
-    // quality. The most noticeable effect is a mild bias towards choosing
-    // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
-    // transforms).
-    // For very high qindexes, start progressively reducing the weight towards
-    // unity (128/128), as transforms are large enough and making them even
-    // larger actually harms subjective quality and SSIMULACRA 2 scores.
-    // This weight part of the equation was determined by iteratively increasing
-    // weight on CID22 and Daala's subset1, and observing its effects on visual
+    int weight;
+
+    // Weight terms were determined by iteratively testing various weights
+    // on CID22 and Daala's subset1, and observing its effects on visual
     // quality and SSIMULACRA 2 scores along the usable (0-100) range.
-    // The ramp-down part of the equation was determined by choosing a fixed
-    // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
-    // 2 scores for encodes with qindexes greater than 159 scored at or above
-    // their equivalents with no rdmult adjustment.
-    const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
+    if (mode == REALTIME) {
+      // Realtime mode: further multiply rdmult by a fourth (32/128 = 0.25)
+      // to improve image quality.
+      // The most noticeable effect is that for inter frames, there's a
+      // stronger bias towards choosing inter prediction modes with encoded
+      // coefficient residuals (i.e. no skip mode).
+      weight = 32;
+    } else {
+      // All-intra and good-quality modes: Further multiply rdmult (by up to
+      // 200/128 = 1.5625) to improve image quality.
+      // The most noticeable effect is a mild bias towards choosing larger
+      // transform sizes (e.g. one 16x16 transform instead of 4 8x8 transforms).
+      // For very high qindexes, start progressively reducing the weight towards
+      // unity (128/128), as transforms are large enough and making them even
+      // larger actually harms subjective quality and SSIMULACRA 2 scores.
+      // The ramp-down part of the equation was determined by choosing a fixed
+      // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where
+      // SSIMULACRA 2 scores for encodes with qindexes greater than 159 scored
+      // at or above their equivalents with no rdmult adjustment.
+      weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
+    }
     rdmult = (int64_t)((double)rdmult * weight / 128.0);
   }

@@ -436,9 +448,9 @@ int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
                         const FRAME_TYPE frame_type,
                         const int use_fixed_qp_offsets,
                         const int is_stat_consumption_stage,
-                        const aom_tune_metric tuning) {
+                        const aom_tune_metric tuning, const MODE mode) {
   int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
-                                                       qindex, tuning);
+                                                       qindex, tuning, mode);
   if (is_stat_consumption_stage && (use_fixed_qp_offsets == 0) &&
       (frame_type != KEY_FRAME)) {
     // Layer depth adjustment
@@ -506,7 +518,8 @@ int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
                    cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
                    layer_depth, boost_index, frame_type,
                    cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-                   is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
+                   is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+                   cpi->oxcf.mode) /
                beta);
 }
 #endif  // !CONFIG_REALTIME_ONLY
@@ -790,7 +803,8 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
       qindex_rdmult, cm->seq_params->bit_depth,
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+      cpi->oxcf.mode);
 #if CONFIG_RD_COMMAND
   if (cpi->oxcf.pass == 2) {
     const RD_COMMAND *rd_command = &cpi->rd_command;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 0b79fd9d82..b97149c96c 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -238,7 +238,8 @@ struct macroblock;
  */
 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
                                         FRAME_UPDATE_TYPE update_type,
-                                        int qindex, aom_tune_metric tuning);
+                                        int qindex, aom_tune_metric tuning,
+                                        MODE mode);

 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
                         const FRAME_UPDATE_TYPE update_type,
@@ -246,7 +247,7 @@ int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
                         const FRAME_TYPE frame_type,
                         const int use_fixed_qp_offsets,
                         const int is_stat_consumption_stage,
-                        const aom_tune_metric tuning);
+                        const aom_tune_metric tuning, const MODE mode);

 void av1_initialize_rd_consts(struct AV1_COMP *cpi);

diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 546d100bd2..748cb1e99c 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1416,7 +1416,8 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
       base_qindex, cm->seq_params->bit_depth,
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+      cpi->oxcf.mode);

   if (rdmult < 1) rdmult = 1;
   av1_set_error_per_bit(&x->errorperbit, rdmult);
@@ -1432,7 +1433,7 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
       gf_group->update_type[cpi->gf_frame_index];
   tpl_frame->base_rdmult = av1_compute_rd_mult_based_on_qindex(
                                bd_info.bit_depth, update_type, base_qindex,
-                               cpi->oxcf.tune_cfg.tuning) /
+                               cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode) /
                            6;

   if (cpi->use_ducky_encode)
@@ -2297,7 +2298,8 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
       orig_qindex_rdmult, cm->seq_params->bit_depth,
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+      cpi->oxcf.mode);

   const int new_qindex_rdmult = quant_params->base_qindex +
                                 x->rdmult_delta_qindex +
@@ -2306,7 +2308,8 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
       new_qindex_rdmult, cm->seq_params->bit_depth,
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
-      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+      cpi->oxcf.mode);

   const double scaling_factor = (double)new_rdmult / (double)orig_rdmult;