Dev news

Commit 04d4ac92f0 for aom

commit 04d4ac92f009a2ed451836793d7aacd6ae0e1861
Author: Julio Barba <juliobbv@gmail.com>
Date:   Sat Feb 28 00:26:15 2026 -0500

    Extend tune IQ/SSIMULACRA2 to inter-frame encoding modes

    This is a "naive" extension of tune IQ and SSIMULACRA2 to work with
    libaom's inter-frame encoding (good-quality and realtime) modes.
    This is useful for layered image encoding.

    As part of this change, deltaq-mode=6 (Variance Boost) is also
    extended to good-quality and realtime modes.

    Even without any inter-frame specific adjustments, tune IQ
    significantly improves over tune SSIM (both in good-quality mode,
    cpu-used=6) on two-layered image encoding (avifenc's
    `--progressive`) in 4:4:4 chroma subsampling mode (Daala's subset1):
    - SSIMULACRA 2 60: -13.7%
    - SSIMULACRA 2 70: -12.9%
    - SSIMULACRA 2 80: -11.6%
    - SSIMULACRA 2 90: -12.1%

    Change-Id: If3e6509d1ff5db8cbb2f9893ffcce2f4455eab84

diff --git a/aom/aomcx.h b/aom/aomcx.h
index af713bb72f..70c2dad23b 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1732,10 +1732,10 @@ typedef enum {
  * Changes the encoder to tune for certain types of input material.
  *
  * \note
- * AOM_TUNE_IQ and AOM_TUNE_SSIMULACRA2 are restricted to all intra mode
- * (AOM_USAGE_ALL_INTRA). Setting the tuning option to either AOM_TUNE_IQ or
- * AOM_TUNE_SSIMULACRA2 causes the following options to be set (expressed as
- * command-line options):
+ * AOM_TUNE_IQ and AOM_TUNE_SSIMULACRA2 are meant for image encoding. Using
+ * these tuning modes for videos isn't recommended.
+ * Setting the tuning option to either AOM_TUNE_IQ or AOM_TUNE_SSIMULACRA2
+ * causes the following options to be set (expressed as command-line options):
  *   * --enable-qm=1
  *   * --qm-min=2
  *   * --qm-max=10
@@ -1744,6 +1744,7 @@ typedef enum {
  *   * --enable-cdef=3
  *   * --enable-chroma-deltaq=1
  *   * --deltaq-mode=6
+ *   * --screen-detection-mode=2
  * AOM_TUNE_IQ additionally sets the following options:
  *   * --enable-adaptive-sharpness=1
  */
@@ -1759,9 +1760,11 @@ typedef enum {
   AOM_TUNE_VMAF_SALIENCY_MAP = 9,
 /*!\brief Allows detection of the presence of AOM_TUNE_IQ at compile time. */
 #define AOM_HAVE_TUNE_IQ 1
-  /* Image quality (or intra quality). Increases image quality and consistency,
+  /* "Image Quality" tuning mode. Increases image quality and consistency,
    * guided by the SSIMULACRA 2 metric and subjective quality checks. Shares
    * the rdmult code with AOM_TUNE_SSIM.
+   * Note: AOM_TUNE_IQ is only meant to be used to encode a still image or a
+   * layered AVIF image.
    */
   AOM_TUNE_IQ = 10,
 /*!\brief Allows detection of the presence of AOM_TUNE_SSIMULACRA2 at compile
@@ -1778,6 +1781,8 @@ typedef enum {
    * AOM_TUNE_IQ. However, AOM_TUNE_SSIMULACRA2 fine-tunes the encoder in ways
    * that have been shown to not come with a corresponding positive impact on
    * subjective quality in human evaluations.
+   * Note: AOM_TUNE_SSIMULACRA2 is only meant to be used to encode a still
+   * image or a layered AVIF image.
    */
   AOM_TUNE_SSIMULACRA2 = 11,
 } aom_tune_metric;
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index e8809cbe4f..f69ee704ad 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -326,7 +326,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .enable_tpl_model = ARG_DEF(NULL, "enable-tpl-model", 1,
                               "RDO based on frame temporal dependency "
                               "(0: off, 1: backward source based); "
-                              "required for deltaq mode"),
+                              "required for --deltaq-mode=1"),
   .enable_keyframe_filtering = ARG_DEF(
       NULL, "enable-keyframe-filtering", 1,
       "Apply temporal filtering on key frame "
@@ -557,7 +557,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
               "Delta qindex mode (0: off, 1: deltaq objective (default), "
               "2: deltaq placeholder, 3: key frame visual quality, 4: user "
               "rating based visual quality optimization, 5: HDR video, 6: "
-              "Variance Boost all intra); requires --enable-tpl-model=1"),
+              "Variance Boost); --deltaq-mode=1 requires --enable-tpl-model=1"),
   .deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1,
                              "Deltaq strength for"
                              " --deltaq-mode=4 and --deltaq-mode=6 (%)"),
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 6d80fc76c6..5abd959d91 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -713,12 +713,6 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK_BOOL(extra_cfg, lossless);
   RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1);
   RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTA_Q_MODE_COUNT - 1);
-
-  if (cfg->g_usage != ALLINTRA &&
-      extra_cfg->deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
-    ERROR("Variance Boost (deltaq_mode = 6) can only be set in all intra mode");
-  }
-
   RANGE_CHECK_HI(extra_cfg, deltalf_mode, 1);
   RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
 #if CONFIG_REALTIME_ONLY
@@ -1900,11 +1894,9 @@ static aom_codec_err_t ctrl_set_arnr_strength(aom_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }

-static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
-                                     struct av1_extracfg *extra_cfg) {
+static aom_codec_err_t handle_tuning(struct av1_extracfg *extra_cfg) {
   if (extra_cfg->tuning == AOM_TUNE_IQ ||
       extra_cfg->tuning == AOM_TUNE_SSIMULACRA2) {
-    if (ctx->cfg.g_usage != AOM_USAGE_ALL_INTRA) return AOM_CODEC_INCAPABLE;
     // Enable QMs as they've been found to be beneficial for images, when used
     // with alternative QM formulas:
     // - aom_get_qmlevel_allintra()
@@ -1913,8 +1905,8 @@ static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
     extra_cfg->enable_qm = 1;
     extra_cfg->qm_min = QM_FIRST_IQ_SSIMULACRA2;
     extra_cfg->qm_max = QM_LAST_IQ_SSIMULACRA2;
-    // We can turn on sharpness, as frames do not have to serve as references to
-    // others.
+    // Sharpness has been found to be beneficial for images (better perceptual
+    // quality).
     extra_cfg->sharpness = 7;
     // Using the QM-PSNR metric was found to be beneficial for images (over the
     // default PSNR metric), as it correlates better with subjective image
@@ -1932,6 +1924,8 @@ static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
     extra_cfg->enable_chroma_deltaq = 1;
     // Enable "Variance Boost" deltaq mode, optimized for images.
     extra_cfg->deltaq_mode = DELTA_Q_VARIANCE_BOOST;
+    // Enable "anti-aliased text and graphics aware" screen detection mode.
+    extra_cfg->screen_detection_mode = AOM_SCREEN_DETECTION_ANTIALIASING_AWARE;
   }
   if (extra_cfg->tuning == AOM_TUNE_IQ) {
     // Enable adaptive sharpness to adjust loop filter levels according to QP.
@@ -1946,7 +1940,7 @@ static aom_codec_err_t ctrl_set_tuning(aom_codec_alg_priv_t *ctx,
                                        va_list args) {
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
   extra_cfg.tuning = CAST(AOME_SET_TUNING, args);
-  aom_codec_err_t err = handle_tuning(ctx, &extra_cfg);
+  aom_codec_err_t err = handle_tuning(&extra_cfg);
   if (err != AOM_CODEC_OK) return err;
   return update_extra_cfg(ctx, &extra_cfg);
 }
@@ -4428,7 +4422,7 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tune_metric, argv,
                               err_string)) {
     extra_cfg.tuning = arg_parse_enum_helper(&arg, err_string);
-    err = handle_tuning(ctx, &extra_cfg);
+    err = handle_tuning(&extra_cfg);
   }
 #if CONFIG_TUNE_VMAF
   else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.vmaf_model_path, argv,
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index c0a1227a6c..e7367c806b 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -885,8 +885,7 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,

   // Disable deltaq in lossless mode.
   if (enable_chroma_deltaq && q) {
-    if (is_allintra &&
-        (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2)) {
+    if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
       int chroma_dc_delta_q = 0;
       int chroma_ac_delta_q = 0;

@@ -986,30 +985,28 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
   int (*get_luma_qmlevel)(int, int, int);
   int (*get_chroma_qmlevel)(int, int, int);

-  if (is_allintra) {
-    if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
-      if (tuning == AOM_TUNE_SSIMULACRA2) {
-        // Use luma QM formula specifically tailored for tune SSIMULACRA2
-        get_luma_qmlevel = aom_get_qmlevel_luma_ssimulacra2;
-      } else {
-        get_luma_qmlevel = aom_get_qmlevel_allintra;
-      }
-
-      if (cm->seq_params->subsampling_x == 0 &&
-          cm->seq_params->subsampling_y == 0) {
-        // 4:4:4 subsampling mode has 4x the number of chroma coefficients
-        // compared to 4:2:0 (2x on each dimension). This means the encoder
-        // should use lower chroma QM levels that more closely match the scaling
-        // of an equivalent 4:2:0 chroma QM.
-        get_chroma_qmlevel = aom_get_qmlevel_444_chroma;
-      } else {
-        // For all other chroma subsampling modes, use the all intra QM formula
-        get_chroma_qmlevel = aom_get_qmlevel_allintra;
-      }
+  if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
+    if (tuning == AOM_TUNE_SSIMULACRA2) {
+      // Use luma QM formula specifically tailored for tune SSIMULACRA2
+      get_luma_qmlevel = aom_get_qmlevel_luma_ssimulacra2;
     } else {
       get_luma_qmlevel = aom_get_qmlevel_allintra;
+    }
+
+    if (cm->seq_params->subsampling_x == 0 &&
+        cm->seq_params->subsampling_y == 0) {
+      // 4:4:4 subsampling mode has 4x the number of chroma coefficients
+      // compared to 4:2:0 (2x on each dimension). This means the encoder
+      // should use lower chroma QM levels that more closely match the scaling
+      // of an equivalent 4:2:0 chroma QM.
+      get_chroma_qmlevel = aom_get_qmlevel_444_chroma;
+    } else {
+      // For all other chroma subsampling modes, use the all intra QM formula
       get_chroma_qmlevel = aom_get_qmlevel_allintra;
     }
+  } else if (is_allintra) {
+    get_luma_qmlevel = aom_get_qmlevel_allintra;
+    get_chroma_qmlevel = aom_get_qmlevel_allintra;
   } else {
     get_luma_qmlevel = aom_get_qmlevel;
     get_chroma_qmlevel = aom_get_qmlevel;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index f708c7e33a..77df0a9fc5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -167,9 +167,8 @@ enum {
   DELTA_Q_PERCEPTUAL = 2,     // Modulation to improve video perceptual quality
   DELTA_Q_PERCEPTUAL_AI = 3,  // Perceptual quality opt for all intra mode
   DELTA_Q_USER_RATING_BASED = 4,  // User rating based delta q mode
-  DELTA_Q_HDR = 5,  // QP adjustment based on HDR block pixel average
-  DELTA_Q_VARIANCE_BOOST =
-      6,              // Variance Boost style modulation for all intra mode
+  DELTA_Q_HDR = 5,             // QP adjustment based on HDR block pixel average
+  DELTA_Q_VARIANCE_BOOST = 6,  // Variance Boost style modulation
   DELTA_Q_MODE_COUNT  // This should always be the last member of the enum
 } UENUM1BYTE(DELTAQ_MODE);

@@ -829,9 +828,10 @@ typedef struct {
    * For values 1-7, eob and skip block optimization are
    * avoided and rdmult is adjusted in favor of block sharpness.
    *
-   * In all-intra mode: it also sets the `loop_filter_sharpness` syntax element
-   * in the bitstream. Larger values increasingly reduce how much the filtering
-   * can change the sample values on block edges to favor perceived sharpness.
+   * In all-intra mode or tune IQ or SSIMULACRA2: it also sets the
+   * `loop_filter_sharpness` syntax element in the bitstream. Larger values
+   * increasingly reduce how much the filtering can change the sample values on
+   * block edges to favor perceived sharpness.
    */
   int sharpness;

diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 0b14bf870f..d498c0671d 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -824,7 +824,7 @@ void av1_cdef_search(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
   const bool apply_adaptive_cdef =
-      cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
+      cdef_control == CDEF_ADAPTIVE &&
       (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ);

   assert(cdef_control != CDEF_NONE);
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index ecf4244b63..d7333aaedf 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -217,10 +217,17 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
   int disable_filter_rt_screen = 0;
   (void)sd;

-  // Enable loop filter sharpness only for allintra encoding mode,
-  // as frames do not have to serve as references to others
-  lf->sharpness_level =
-      cpi->oxcf.mode == ALLINTRA ? cpi->oxcf.algo_cfg.sharpness : 0;
+  // Enable loop filter sharpness only for all-intra encoding mode,
+  // or tune IQ or SSIMULACRA2. This is because:
+  // - All-intra: frames do not have to serve as references to others
+  // - Tune IQ/SSIMULACRA2: enabling loop filter sharpness has been found to
+  //   be beneficial for sharpness perception
+  if (cpi->oxcf.mode == ALLINTRA || cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+    lf->sharpness_level = cpi->oxcf.algo_cfg.sharpness;
+  } else {
+    lf->sharpness_level = 0;
+  }

   if (cpi->oxcf.algo_cfg.enable_adaptive_sharpness) {
     // Loop filter sharpness levels are highly nonlinear. Visually, lf sharpness
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 7c14eab643..7939f7c217 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -2845,6 +2845,13 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
   const int is_arf2_bwd_type =
       cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;

+  if (cpi->oxcf.mode == ALLINTRA || cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+    if (cm->quant_params.base_qindex <= 140) {
+      sf->lpf_sf.zero_low_cdef_strengths = 1;
+    }
+  }
+
   if (cpi->oxcf.mode == REALTIME) {
     if (speed >= 6) {
       const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
@@ -2856,12 +2863,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
     return;
   }

-  if (cpi->oxcf.mode == ALLINTRA) {
-    if (cm->quant_params.base_qindex <= 140) {
-      sf->lpf_sf.zero_low_cdef_strengths = 1;
-    }
-  }
-
   if (speed == 0) {
     // qindex_thresh for resolution < 720p
     const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 0269f4351e..546d100bd2 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1411,12 +1411,6 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,

   const int base_qindex =
       cpi->use_ducky_encode ? gf_group->q_val[frame_idx] : pframe_qindex;
-  // The TPL model is only meant to be run in inter mode, so ensure that we are
-  // not running in all intra mode, which implies we are not tuning for image
-  // quality (IQ) or SSIMULACRA2.
-  assert(cpi->oxcf.tune_cfg.tuning != AOM_TUNE_IQ &&
-         cpi->oxcf.tune_cfg.tuning != AOM_TUNE_SSIMULACRA2 &&
-         cpi->oxcf.mode != ALLINTRA);
   // Get rd multiplier set up.
   rdmult = av1_compute_rd_mult(
       base_qindex, cm->seq_params->bit_depth,
diff --git a/test/encode_api_test.cc b/test/encode_api_test.cc
index cc53d9291f..af86bb8315 100644
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -190,21 +190,6 @@ TEST(EncodeAPI, InvalidControlId) {
   EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
 }

-TEST(EncodeAPI, TuneIqNotAllIntra) {
-  aom_codec_iface_t *iface = aom_codec_av1_cx();
-  aom_codec_enc_cfg_t cfg;
-  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME),
-            AOM_CODEC_OK);
-
-  aom_codec_ctx_t enc;
-  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
-
-  ASSERT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIMULACRA2),
-            AOM_CODEC_INCAPABLE);
-
-  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
-}
-
 void EncodeSetSFrameOnFirstFrame(aom_img_fmt fmt, aom_codec_flags_t flag) {
   constexpr int kWidth = 2;
   constexpr int kHeight = 128;