Commit 04d4ac92f0 for aom
commit 04d4ac92f009a2ed451836793d7aacd6ae0e1861
Author: Julio Barba <juliobbv@gmail.com>
Date: Sat Feb 28 00:26:15 2026 -0500
Extend tune IQ/SSIMULACRA2 to inter-frame encoding modes
This is a "naive" extension of tune IQ and SSIMULACRA2 to work with
libaom's inter-frame encoding (good-quality and realtime) modes.
This is useful for layered image encoding.
As part of this change, deltaq-mode=6 (Variance Boost) is also
extended to good-quality and realtime modes.
Even without any inter-frame specific adjustments, tune IQ
significantly improves over tune SSIM (both in good-quality mode,
cpu-used=6) on two-layered image encoding (avifenc's
`--progressive`) in 4:4:4 chroma subsampling mode (Daala's subset1):
- SSIMULACRA 2 60: -13.7%
- SSIMULACRA 2 70: -12.9%
- SSIMULACRA 2 80: -11.6%
- SSIMULACRA 2 90: -12.1%
Change-Id: If3e6509d1ff5db8cbb2f9893ffcce2f4455eab84
diff --git a/aom/aomcx.h b/aom/aomcx.h
index af713bb72f..70c2dad23b 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1732,10 +1732,10 @@ typedef enum {
* Changes the encoder to tune for certain types of input material.
*
* \note
- * AOM_TUNE_IQ and AOM_TUNE_SSIMULACRA2 are restricted to all intra mode
- * (AOM_USAGE_ALL_INTRA). Setting the tuning option to either AOM_TUNE_IQ or
- * AOM_TUNE_SSIMULACRA2 causes the following options to be set (expressed as
- * command-line options):
+ * AOM_TUNE_IQ and AOM_TUNE_SSIMULACRA2 are meant for image encoding. Using
+ * these tuning modes for videos isn't recommended.
+ * Setting the tuning option to either AOM_TUNE_IQ or AOM_TUNE_SSIMULACRA2
+ * causes the following options to be set (expressed as command-line options):
* * --enable-qm=1
* * --qm-min=2
* * --qm-max=10
@@ -1744,6 +1744,7 @@ typedef enum {
* * --enable-cdef=3
* * --enable-chroma-deltaq=1
* * --deltaq-mode=6
+ * * --screen-detection-mode=2
* AOM_TUNE_IQ additionally sets the following options:
* * --enable-adaptive-sharpness=1
*/
@@ -1759,9 +1760,11 @@ typedef enum {
AOM_TUNE_VMAF_SALIENCY_MAP = 9,
/*!\brief Allows detection of the presence of AOM_TUNE_IQ at compile time. */
#define AOM_HAVE_TUNE_IQ 1
- /* Image quality (or intra quality). Increases image quality and consistency,
+ /* "Image Quality" tuning mode. Increases image quality and consistency,
* guided by the SSIMULACRA 2 metric and subjective quality checks. Shares
* the rdmult code with AOM_TUNE_SSIM.
+ * Note: AOM_TUNE_IQ is only meant to be used to encode a still image or a
+ * layered AVIF image.
*/
AOM_TUNE_IQ = 10,
/*!\brief Allows detection of the presence of AOM_TUNE_SSIMULACRA2 at compile
@@ -1778,6 +1781,8 @@ typedef enum {
* AOM_TUNE_IQ. However, AOM_TUNE_SSIMULACRA2 fine-tunes the encoder in ways
* that have been shown to not come with a corresponding positive impact on
* subjective quality in human evaluations.
+ * Note: AOM_TUNE_SSIMULACRA2 is only meant to be used to encode a still
+ * image or a layered AVIF image.
*/
AOM_TUNE_SSIMULACRA2 = 11,
} aom_tune_metric;
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index e8809cbe4f..f69ee704ad 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -326,7 +326,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
.enable_tpl_model = ARG_DEF(NULL, "enable-tpl-model", 1,
"RDO based on frame temporal dependency "
"(0: off, 1: backward source based); "
- "required for deltaq mode"),
+ "required for --deltaq-mode=1"),
.enable_keyframe_filtering = ARG_DEF(
NULL, "enable-keyframe-filtering", 1,
"Apply temporal filtering on key frame "
@@ -557,7 +557,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
"Delta qindex mode (0: off, 1: deltaq objective (default), "
"2: deltaq placeholder, 3: key frame visual quality, 4: user "
"rating based visual quality optimization, 5: HDR video, 6: "
- "Variance Boost all intra); requires --enable-tpl-model=1"),
+ "Variance Boost); --deltaq-mode=1 requires --enable-tpl-model=1"),
.deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1,
"Deltaq strength for"
" --deltaq-mode=4 and --deltaq-mode=6 (%)"),
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 6d80fc76c6..5abd959d91 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -713,12 +713,6 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
RANGE_CHECK_BOOL(extra_cfg, lossless);
RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1);
RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTA_Q_MODE_COUNT - 1);
-
- if (cfg->g_usage != ALLINTRA &&
- extra_cfg->deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
- ERROR("Variance Boost (deltaq_mode = 6) can only be set in all intra mode");
- }
-
RANGE_CHECK_HI(extra_cfg, deltalf_mode, 1);
RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
#if CONFIG_REALTIME_ONLY
@@ -1900,11 +1894,9 @@ static aom_codec_err_t ctrl_set_arnr_strength(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
-static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
- struct av1_extracfg *extra_cfg) {
+static aom_codec_err_t handle_tuning(struct av1_extracfg *extra_cfg) {
if (extra_cfg->tuning == AOM_TUNE_IQ ||
extra_cfg->tuning == AOM_TUNE_SSIMULACRA2) {
- if (ctx->cfg.g_usage != AOM_USAGE_ALL_INTRA) return AOM_CODEC_INCAPABLE;
// Enable QMs as they've been found to be beneficial for images, when used
// with alternative QM formulas:
// - aom_get_qmlevel_allintra()
@@ -1913,8 +1905,8 @@ static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
extra_cfg->enable_qm = 1;
extra_cfg->qm_min = QM_FIRST_IQ_SSIMULACRA2;
extra_cfg->qm_max = QM_LAST_IQ_SSIMULACRA2;
- // We can turn on sharpness, as frames do not have to serve as references to
- // others.
+ // Sharpness has been found to be beneficial for images (better perceptual
+ // quality).
extra_cfg->sharpness = 7;
// Using the QM-PSNR metric was found to be beneficial for images (over the
// default PSNR metric), as it correlates better with subjective image
@@ -1932,6 +1924,8 @@ static aom_codec_err_t handle_tuning(aom_codec_alg_priv_t *ctx,
extra_cfg->enable_chroma_deltaq = 1;
// Enable "Variance Boost" deltaq mode, optimized for images.
extra_cfg->deltaq_mode = DELTA_Q_VARIANCE_BOOST;
+ // Enable "anti-aliased text and graphics aware" screen detection mode.
+ extra_cfg->screen_detection_mode = AOM_SCREEN_DETECTION_ANTIALIASING_AWARE;
}
if (extra_cfg->tuning == AOM_TUNE_IQ) {
// Enable adaptive sharpness to adjust loop filter levels according to QP.
@@ -1946,7 +1940,7 @@ static aom_codec_err_t ctrl_set_tuning(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.tuning = CAST(AOME_SET_TUNING, args);
- aom_codec_err_t err = handle_tuning(ctx, &extra_cfg);
+ aom_codec_err_t err = handle_tuning(&extra_cfg);
if (err != AOM_CODEC_OK) return err;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -4428,7 +4422,7 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
} else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tune_metric, argv,
err_string)) {
extra_cfg.tuning = arg_parse_enum_helper(&arg, err_string);
- err = handle_tuning(ctx, &extra_cfg);
+ err = handle_tuning(&extra_cfg);
}
#if CONFIG_TUNE_VMAF
else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.vmaf_model_path, argv,
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index c0a1227a6c..e7367c806b 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -885,8 +885,7 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
// Disable deltaq in lossless mode.
if (enable_chroma_deltaq && q) {
- if (is_allintra &&
- (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2)) {
+ if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
int chroma_dc_delta_q = 0;
int chroma_ac_delta_q = 0;
@@ -986,30 +985,28 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
int (*get_luma_qmlevel)(int, int, int);
int (*get_chroma_qmlevel)(int, int, int);
- if (is_allintra) {
- if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
- if (tuning == AOM_TUNE_SSIMULACRA2) {
- // Use luma QM formula specifically tailored for tune SSIMULACRA2
- get_luma_qmlevel = aom_get_qmlevel_luma_ssimulacra2;
- } else {
- get_luma_qmlevel = aom_get_qmlevel_allintra;
- }
-
- if (cm->seq_params->subsampling_x == 0 &&
- cm->seq_params->subsampling_y == 0) {
- // 4:4:4 subsampling mode has 4x the number of chroma coefficients
- // compared to 4:2:0 (2x on each dimension). This means the encoder
- // should use lower chroma QM levels that more closely match the scaling
- // of an equivalent 4:2:0 chroma QM.
- get_chroma_qmlevel = aom_get_qmlevel_444_chroma;
- } else {
- // For all other chroma subsampling modes, use the all intra QM formula
- get_chroma_qmlevel = aom_get_qmlevel_allintra;
- }
+ if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
+ if (tuning == AOM_TUNE_SSIMULACRA2) {
+ // Use luma QM formula specifically tailored for tune SSIMULACRA2
+ get_luma_qmlevel = aom_get_qmlevel_luma_ssimulacra2;
} else {
get_luma_qmlevel = aom_get_qmlevel_allintra;
+ }
+
+ if (cm->seq_params->subsampling_x == 0 &&
+ cm->seq_params->subsampling_y == 0) {
+ // 4:4:4 subsampling mode has 4x the number of chroma coefficients
+ // compared to 4:2:0 (2x on each dimension). This means the encoder
+ // should use lower chroma QM levels that more closely match the scaling
+ // of an equivalent 4:2:0 chroma QM.
+ get_chroma_qmlevel = aom_get_qmlevel_444_chroma;
+ } else {
+ // For all other chroma subsampling modes, use the all intra QM formula
get_chroma_qmlevel = aom_get_qmlevel_allintra;
}
+ } else if (is_allintra) {
+ get_luma_qmlevel = aom_get_qmlevel_allintra;
+ get_chroma_qmlevel = aom_get_qmlevel_allintra;
} else {
get_luma_qmlevel = aom_get_qmlevel;
get_chroma_qmlevel = aom_get_qmlevel;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index f708c7e33a..77df0a9fc5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -167,9 +167,8 @@ enum {
DELTA_Q_PERCEPTUAL = 2, // Modulation to improve video perceptual quality
DELTA_Q_PERCEPTUAL_AI = 3, // Perceptual quality opt for all intra mode
DELTA_Q_USER_RATING_BASED = 4, // User rating based delta q mode
- DELTA_Q_HDR = 5, // QP adjustment based on HDR block pixel average
- DELTA_Q_VARIANCE_BOOST =
- 6, // Variance Boost style modulation for all intra mode
+ DELTA_Q_HDR = 5, // QP adjustment based on HDR block pixel average
+ DELTA_Q_VARIANCE_BOOST = 6, // Variance Boost style modulation
DELTA_Q_MODE_COUNT // This should always be the last member of the enum
} UENUM1BYTE(DELTAQ_MODE);
@@ -829,9 +828,10 @@ typedef struct {
* For values 1-7, eob and skip block optimization are
* avoided and rdmult is adjusted in favor of block sharpness.
*
- * In all-intra mode: it also sets the `loop_filter_sharpness` syntax element
- * in the bitstream. Larger values increasingly reduce how much the filtering
- * can change the sample values on block edges to favor perceived sharpness.
+ * In all-intra mode or tune IQ or SSIMULACRA2: it also sets the
+ * `loop_filter_sharpness` syntax element in the bitstream. Larger values
+ * increasingly reduce how much the filtering can change the sample values on
+ * block edges to favor perceived sharpness.
*/
int sharpness;
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 0b14bf870f..d498c0671d 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -824,7 +824,7 @@ void av1_cdef_search(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
const bool apply_adaptive_cdef =
- cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
+ cdef_control == CDEF_ADAPTIVE &&
(cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ);
assert(cdef_control != CDEF_NONE);
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index ecf4244b63..d7333aaedf 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -217,10 +217,17 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int disable_filter_rt_screen = 0;
(void)sd;
- // Enable loop filter sharpness only for allintra encoding mode,
- // as frames do not have to serve as references to others
- lf->sharpness_level =
- cpi->oxcf.mode == ALLINTRA ? cpi->oxcf.algo_cfg.sharpness : 0;
+ // Enable loop filter sharpness only for all-intra encoding mode,
+ // or tune IQ or SSIMULACRA2. This is because:
+ // - All-intra: frames do not have to serve as references to others
+ // - Tune IQ/SSIMULACRA2: enabling loop filter sharpness has been found to
+ // be beneficial for sharpness perception
+ if (cpi->oxcf.mode == ALLINTRA || cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+ lf->sharpness_level = cpi->oxcf.algo_cfg.sharpness;
+ } else {
+ lf->sharpness_level = 0;
+ }
if (cpi->oxcf.algo_cfg.enable_adaptive_sharpness) {
// Loop filter sharpness levels are highly nonlinear. Visually, lf sharpness
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 7c14eab643..7939f7c217 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -2845,6 +2845,13 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
const int is_arf2_bwd_type =
cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
+ if (cpi->oxcf.mode == ALLINTRA || cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+ if (cm->quant_params.base_qindex <= 140) {
+ sf->lpf_sf.zero_low_cdef_strengths = 1;
+ }
+ }
+
if (cpi->oxcf.mode == REALTIME) {
if (speed >= 6) {
const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
@@ -2856,12 +2863,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
return;
}
- if (cpi->oxcf.mode == ALLINTRA) {
- if (cm->quant_params.base_qindex <= 140) {
- sf->lpf_sf.zero_low_cdef_strengths = 1;
- }
- }
-
if (speed == 0) {
// qindex_thresh for resolution < 720p
const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 0269f4351e..546d100bd2 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1411,12 +1411,6 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
const int base_qindex =
cpi->use_ducky_encode ? gf_group->q_val[frame_idx] : pframe_qindex;
- // The TPL model is only meant to be run in inter mode, so ensure that we are
- // not running in all intra mode, which implies we are not tuning for image
- // quality (IQ) or SSIMULACRA2.
- assert(cpi->oxcf.tune_cfg.tuning != AOM_TUNE_IQ &&
- cpi->oxcf.tune_cfg.tuning != AOM_TUNE_SSIMULACRA2 &&
- cpi->oxcf.mode != ALLINTRA);
// Get rd multiplier set up.
rdmult = av1_compute_rd_mult(
base_qindex, cm->seq_params->bit_depth,
diff --git a/test/encode_api_test.cc b/test/encode_api_test.cc
index cc53d9291f..af86bb8315 100644
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -190,21 +190,6 @@ TEST(EncodeAPI, InvalidControlId) {
EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
}
-TEST(EncodeAPI, TuneIqNotAllIntra) {
- aom_codec_iface_t *iface = aom_codec_av1_cx();
- aom_codec_enc_cfg_t cfg;
- ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME),
- AOM_CODEC_OK);
-
- aom_codec_ctx_t enc;
- ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
-
- ASSERT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIMULACRA2),
- AOM_CODEC_INCAPABLE);
-
- ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
-}
-
void EncodeSetSFrameOnFirstFrame(aom_img_fmt fmt, aom_codec_flags_t flag) {
constexpr int kWidth = 2;
constexpr int kHeight = 128;