Commit e7837adfe8 for aom
commit e7837adfe8d62f58dab48f991f93805d9d220f6c
Author: Julio Barba <juliobbv@gmail.com>
Date: Sun Mar 22 22:16:58 2026 -0400
Optimize tune IQ/SSIMULACRA2 for realtime mode
Up to 30% file size savings for the same SSIMULACRA2 score on
two-layer progressive image encoding in 4:4:4 chroma subsampling
mode (Daala's subset1).
Change-Id: Iad3721ec3eed592758f678dab9c8c2ac9274d237
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index d3aabdb5e0..99f90fe641 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -647,7 +647,8 @@ void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
qindex2, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index e7367c806b..aeda245241 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -826,7 +826,8 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
const int qindex_change = x->qindex != qindex;
if (qindex_change || do_update) {
diff --git a/av1/encoder/enc_enums.h b/av1/encoder/enc_enums.h
index 98072b3179..142fce61e8 100644
--- a/av1/encoder/enc_enums.h
+++ b/av1/encoder/enc_enums.h
@@ -265,6 +265,17 @@ enum {
USE_LARGESTALL,
} UENUM1BYTE(TX_SIZE_SEARCH_METHOD);
+enum {
+ // Good Quality Fast Encoding. The encoder balances quality with the amount of
+ // time it takes to encode the output. Speed setting controls how fast.
+ GOOD,
+ // Realtime Fast Encoding. Will force some restrictions on bitrate
+ // constraints.
+ REALTIME,
+ // All intra mode. All the frames are coded as intra frames.
+ ALLINTRA
+} UENUM1BYTE(MODE);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 46d36c85fd..8cd163ad18 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -318,7 +318,7 @@ static inline int set_rdmult(const AV1_COMP *const cpi,
return av1_compute_rd_mult(
qindex, bit_depth, update_type, layer_depth, boost_index, frame_type,
cpi->oxcf.q_cfg.use_fixed_qp_offsets, is_stat_consumption_stage(cpi),
- cpi->oxcf.tune_cfg.tuning);
+ cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);
}
static inline int do_split_check(BLOCK_SIZE bsize) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 85895ada3d..5093bfd243 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3910,7 +3910,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
// Note: Both use common rdmult based on base qindex of fullres.
const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
bit_depth, update_type, cm->quant_params.base_qindex,
- cpi->oxcf.tune_cfg.tuning);
+ cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);
// Find the best rdcost among all superres denoms.
int best_denom = -1;
@@ -3975,7 +3975,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
// Note: Both use common rdmult based on base qindex of fullres.
const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
bit_depth, update_type, cm->quant_params.base_qindex,
- cpi->oxcf.tune_cfg.tuning);
+ cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode);
proj_rdcost1 =
RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1, bit_depth);
const double proj_rdcost2 =
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 77df0a9fc5..0bac37011e 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -107,17 +107,6 @@ typedef struct aom_rational64 {
int den; // fraction denominator
} aom_rational64_t; // alias for struct aom_rational
-enum {
- // Good Quality Fast Encoding. The encoder balances quality with the amount of
- // time it takes to encode the output. Speed setting controls how fast.
- GOOD,
- // Realtime Fast Encoding. Will force some restrictions on bitrate
- // constraints.
- REALTIME,
- // All intra mode. All the frames are coded as intra frames.
- ALLINTRA
-} UENUM1BYTE(MODE);
-
enum {
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_GOLDEN = 1 << 1,
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 2bc36d3937..9655c0395d 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -526,7 +526,8 @@ void av1_apply_roi_map(AV1_COMP *cpi) {
qindex, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], 0, 15,
INTER_FRAME, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
}
} else {
av1_disable_segmentation(seg);
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index fcca31242e..7005da4f60 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -386,7 +386,8 @@ static double def_kf_rd_multiplier(int qindex) {
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
FRAME_UPDATE_TYPE update_type,
- int qindex, aom_tune_metric tuning) {
+ int qindex, aom_tune_metric tuning,
+ MODE mode) {
const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
int64_t rdmult = q * q;
if (update_type == KF_UPDATE) {
@@ -401,21 +402,32 @@ int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
}
if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
- // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
- // quality. The most noticeable effect is a mild bias towards choosing
- // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
- // transforms).
- // For very high qindexes, start progressively reducing the weight towards
- // unity (128/128), as transforms are large enough and making them even
- // larger actually harms subjective quality and SSIMULACRA 2 scores.
- // This weight part of the equation was determined by iteratively increasing
- // weight on CID22 and Daala's subset1, and observing its effects on visual
+ int weight;
+
+ // Weight terms were determined by iteratively testing various weights
+ // on CID22 and Daala's subset1, and observing its effects on visual
// quality and SSIMULACRA 2 scores along the usable (0-100) range.
- // The ramp-down part of the equation was determined by choosing a fixed
- // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
- // 2 scores for encodes with qindexes greater than 159 scored at or above
- // their equivalents with no rdmult adjustment.
- const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
+ if (mode == REALTIME) {
+ // Realtime mode: further multiply rdmult by a fourth (32/128 = 0.25)
+ // to improve image quality.
+ // The most noticeable effect is that for inter frames, there's a
+ // stronger bias towards choosing inter prediction modes with encoded
+ // coefficient residuals (i.e. no skip mode).
+ weight = 32;
+ } else {
+ // All-intra and good-quality modes: Further multiply rdmult (by up to
+ // 200/128 = 1.5625) to improve image quality.
+ // The most noticeable effect is a mild bias towards choosing larger
+ // transform sizes (e.g. one 16x16 transform instead of 4 8x8 transforms).
+ // For very high qindexes, start progressively reducing the weight towards
+ // unity (128/128), as transforms are large enough and making them even
+ // larger actually harms subjective quality and SSIMULACRA 2 scores.
+ // The ramp-down part of the equation was determined by choosing a fixed
+ // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where
+ // SSIMULACRA 2 scores for encodes with qindexes greater than 159 scored
+ // at or above their equivalents with no rdmult adjustment.
+ weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
+ }
rdmult = (int64_t)((double)rdmult * weight / 128.0);
}
@@ -436,9 +448,9 @@ int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
const FRAME_TYPE frame_type,
const int use_fixed_qp_offsets,
const int is_stat_consumption_stage,
- const aom_tune_metric tuning) {
+ const aom_tune_metric tuning, const MODE mode) {
int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
- qindex, tuning);
+ qindex, tuning, mode);
if (is_stat_consumption_stage && (use_fixed_qp_offsets == 0) &&
(frame_type != KEY_FRAME)) {
// Layer depth adjustment
@@ -506,7 +518,8 @@ int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
layer_depth, boost_index, frame_type,
cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode) /
beta);
}
#endif // !CONFIG_REALTIME_ONLY
@@ -790,7 +803,8 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
#if CONFIG_RD_COMMAND
if (cpi->oxcf.pass == 2) {
const RD_COMMAND *rd_command = &cpi->rd_command;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 0b79fd9d82..b97149c96c 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -238,7 +238,8 @@ struct macroblock;
*/
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
FRAME_UPDATE_TYPE update_type,
- int qindex, aom_tune_metric tuning);
+ int qindex, aom_tune_metric tuning,
+ MODE mode);
int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
const FRAME_UPDATE_TYPE update_type,
@@ -246,7 +247,7 @@ int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
const FRAME_TYPE frame_type,
const int use_fixed_qp_offsets,
const int is_stat_consumption_stage,
- const aom_tune_metric tuning);
+ const aom_tune_metric tuning, const MODE mode);
void av1_initialize_rd_consts(struct AV1_COMP *cpi);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 546d100bd2..748cb1e99c 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1416,7 +1416,8 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
base_qindex, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
if (rdmult < 1) rdmult = 1;
av1_set_error_per_bit(&x->errorperbit, rdmult);
@@ -1432,7 +1433,7 @@ static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
gf_group->update_type[cpi->gf_frame_index];
tpl_frame->base_rdmult = av1_compute_rd_mult_based_on_qindex(
bd_info.bit_depth, update_type, base_qindex,
- cpi->oxcf.tune_cfg.tuning) /
+ cpi->oxcf.tune_cfg.tuning, cpi->oxcf.mode) /
6;
if (cpi->use_ducky_encode)
@@ -2297,7 +2298,8 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
orig_qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
const int new_qindex_rdmult = quant_params->base_qindex +
x->rdmult_delta_qindex +
@@ -2306,7 +2308,8 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
new_qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning,
+ cpi->oxcf.mode);
const double scaling_factor = (double)new_rdmult / (double)orig_rdmult;