Commit 4265e4aa4b for aom
commit 4265e4aa4b4ee9de380f3f8a82c74438cd5f2acf
Author: Julio Barba <juliobbv@gmail.com>
Date: Tue Mar 10 13:05:10 2026 -0400
Optimize tune IQ and SSIMULACRA2 for inter-frame encoding
- Adjust speed features for better image coding performance
- Bias encoder toward picking intra-coded block candidates
Approximate gains in good-quality mode, cpu-used=6, on two-layer
progressive image encoding in 4:4:4 chroma subsampling mode
(Daala's subset1):
- SSIMULACRA 2 60: -2.1%
- SSIMULACRA 2 70: -3.1%
- SSIMULACRA 2 80: -3.2%
- SSIMULACRA 2 90: -3.2%
Change-Id: I7d88835ac993a11f74d7644be501ed0b56d7ba9a
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 01e4bf7227..3e49001a04 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -784,7 +784,25 @@ static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
}
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
- RD_STATS *rd_cost) {
+ RD_STATS *rd_cost, bool is_inter_pred) {
+ if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
+ is_inter_pred) {
+ // Tune IQ and SSIMULACRA2 are often used to encode layered AVIFs, where
+ // keyframes can be encoded at a lower quality (i.e. higher QP) than
+ // inter-coded frames.
+ // In this case, libaom tends to underestimate the true RD cost of inter
+ // prediction candidates, causing encoded file size to increase without a
+ // corresponding increase in quality.
+ // To compensate for this effect, make inter block candidates appear more
+ // expensive to the encoder to slightly bias toward intra prediction.
+ // Doing this increases overall compression efficiency, while still allowing
+ // the encoder to pick inter prediction when it's beneficial.
+ rd_cost->dist += rd_cost->dist >> 3;
+ rd_cost->rdcost += rd_cost->rdcost >> 3;
+ return;
+ }
+
if (cpi->oxcf.algo_cfg.sharpness != 3) return;
if (frame_is_kf_gf_arf(cpi)) return;
@@ -807,7 +825,14 @@ static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
}
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
- int64_t *rd_cost) {
+ int64_t *rd_cost, bool is_inter_pred) {
+ if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
+ is_inter_pred) {
+ *rd_cost += *rd_cost >> 3;
+ return;
+ }
+
if (cpi->oxcf.algo_cfg.sharpness != 3) return;
if (frame_is_kf_gf_arf(cpi)) return;
@@ -1851,9 +1876,13 @@ static int64_t motion_mode_rd(
}
}
- adjust_cost(cpi, x, &this_yrd);
- adjust_rdcost(cpi, x, rd_stats);
- adjust_rdcost(cpi, x, rd_stats_y);
+ if (this_yrd < INT64_MAX) {
+ adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true);
+ }
+ adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true);
+ if (rd_stats_y->rdcost < INT64_MAX) {
+ adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true);
+ }
const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
if (mode_index == 0) {
@@ -5787,7 +5816,7 @@ static inline void search_intra_modes_in_interframe(
&best_model_rd, top_intra_model_rd);
if (intra_rd_y < INT64_MAX) {
- adjust_cost(cpi, x, &intra_rd_y);
+ adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false);
}
if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
@@ -5871,7 +5900,7 @@ static inline void search_intra_modes_in_interframe(
intra_rd_stats.rdcost = this_rd;
- adjust_rdcost(cpi, x, &intra_rd_stats);
+ adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false);
// Collect mode stats for multiwinner mode processing
const int txfm_search_done = 1;
@@ -6343,8 +6372,8 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
ref_frame_rd[ref_frame] = this_rd;
}
- adjust_cost(cpi, x, &this_rd);
- adjust_rdcost(cpi, x, &rd_stats);
+ adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true);
+ adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true);
// Did this mode help, i.e., is it the new best mode
if (this_rd < search_state.best_rd) {
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 876ed33e29..7c14eab643 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1062,6 +1062,11 @@ static void set_good_speed_feature_framesize_dependent(
if (cpi->oxcf.enable_low_complexity_decode)
set_good_speed_features_lc_dec_framesize_dependent(cpi, sf, speed);
+
+ if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+ sf->intra_sf.skip_intra_in_interframe = 0;
+ }
}
static void set_good_speed_features_framesize_independent(
@@ -1475,6 +1480,32 @@ static void set_good_speed_features_framesize_independent(
sf->tx_sf.adaptive_txb_search_level = 0;
sf->tx_sf.tx_type_search.use_skip_flag_prediction = 0;
}
+
+ // Set speed features for the IQ and SSIMULACRA2 tuning modes
+ // Layered image encoding has different requirements than regular video
+ // coding.
+ // Mainly, most of these speed features undo an implicit assumption that
+ // keyframes are encoded at a better quality than inter-coded frames.
+ // This means the encoder needs to be more thorough at considering and
+ // performing RDO on intra block candidates vs. inter block candidates for
+ // the best compression efficiency.
+ // Finally, enabling certain coding tools are beneficial for layered image
+ // encoding in general.
+ if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+ sf->intra_sf.skip_intra_in_interframe = 0;
+ sf->inter_sf.inter_mode_rd_model_estimation = 0;
+ sf->mv_sf.use_intrabc = 1;
+
+ // Don't prune intra candidates too aggressively, as it can cause more
+ // expensive inter candidates to be chosen instead
+ if (sf->intra_sf.intra_pruning_with_hog > 3) {
+ sf->intra_sf.intra_pruning_with_hog = 3;
+ }
+ if (sf->intra_sf.chroma_intra_pruning_with_hog > 3) {
+ sf->intra_sf.chroma_intra_pruning_with_hog = 3;
+ }
+ }
}
static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
@@ -2163,6 +2194,11 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
sf->winner_mode_sf.dc_blk_pred_level = 3;
}
+
+ if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+ sf->intra_sf.skip_intra_in_interframe = 0;
+ }
}
static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {