Commit 673071a141 for aom
commit 673071a1411e240e3cf13982ff2a1eb3e3438dd1
Author: Yunqing Wang <yunqingwang@google.com>
Date: Mon May 18 11:34:43 2026 -0700
Weighted chroma distortion
This optimization is enabled for low-complexity mode at
speed 3. This gives over 2% AVG decoder time reduction
with a reasonable complexity-to-efficiency tradeoff.
Bug: 505128814
STATS_CHANGED for speed 3 LC mode
Change-Id: Ib1a25a22d9d96ba5d24df7c7f67c3318e15043de
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index c85cc9a8a5..55b17fce52 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -179,6 +179,32 @@ static inline void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
#endif
}
+static inline void av1_merge_rd_stats_weighted(RD_STATS *rd_stats_dst,
+ const RD_STATS *rd_stats_src) {
+ if (rd_stats_dst->rate == INT_MAX || rd_stats_src->rate == INT_MAX) {
+ // If rd_stats_dst or rd_stats_src has invalid rate, we will make
+ // rd_stats_dst invalid.
+ av1_invalid_rd_stats(rd_stats_dst);
+ return;
+ }
+ rd_stats_dst->rate = (int)AOMMIN(
+ ((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX);
+ if (!rd_stats_dst->zero_rate)
+ rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
+ rd_stats_dst->dist += rd_stats_src->dist * 15 / 16;
+ if (rd_stats_dst->sse < INT64_MAX && rd_stats_src->sse < INT64_MAX) {
+ rd_stats_dst->sse += rd_stats_src->sse * 15 / 16;
+ }
+ rd_stats_dst->skip_txfm &= rd_stats_src->skip_txfm;
+#if CONFIG_RD_DEBUG
+ // This may run into problems when monochrome video is
+ // encoded, as there will only be 1 plane
+ for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
+ }
+#endif
+}
+
static inline void av1_accumulate_rd_stats(RD_STATS *rd_stats, int64_t dist,
int rate, int skip_txfm, int64_t sse,
int zero_rate) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 706e08e316..62ef4815af 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3974,34 +3974,41 @@ static inline void refine_winner_mode_tx(
const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
const ModeCosts *mode_costs = &x->mode_costs;
+ int64_t this_dist = rd_stats_y.dist + rd_stats_uv.dist;
+ int64_t this_sse = rd_stats_y.sse + rd_stats_uv.sse;
+ if (cpi->sf.hl_sf.weighted_chroma_distortion) {
+ this_dist = rd_stats_y.dist + rd_stats_uv.dist * 15 / 16;
+ this_sse = rd_stats_y.sse + rd_stats_uv.sse * 15 / 16;
+ }
+
if (is_inter_mode(mbmi->mode) &&
(!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
RDCOST(x->rdmult,
mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
rd_stats_uv.rate,
- (rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
- (rd_stats_y.sse + rd_stats_uv.sse))) {
+ this_dist) > RDCOST(x->rdmult,
+ mode_costs->skip_txfm_cost[skip_ctx][1],
+ this_sse)) {
skip_blk = 1;
rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
rd_stats_uv.rate = 0;
rd_stats_y.dist = rd_stats_y.sse;
rd_stats_uv.dist = rd_stats_uv.sse;
+ this_dist = this_sse;
} else {
skip_blk = 0;
rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
}
int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
winner_rate_y - winner_rate_uv;
- int64_t this_rd =
- RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
+ int64_t this_rd = RDCOST(x->rdmult, this_rate, this_dist);
if (best_rd > this_rd) {
*best_mbmode = *mbmi;
*best_mode_index = winner_mode_index;
av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
rd_cost->rate = this_rate;
- rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
- rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
+ rd_cost->dist = this_dist;
+ rd_cost->sse = this_sse;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
*best_skip2 = skip_blk;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 65189e2df6..ebed82e788 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -704,6 +704,8 @@ static void set_good_speed_features_lc_dec_framesize_independent(
(update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE)
? 1
: 0;
+
+ if (speed == 3) sf->hl_sf.weighted_chroma_distortion = 1;
}
static void set_good_speed_feature_framesize_dependent(
@@ -2253,6 +2255,7 @@ static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
hl_sf->allow_sub_blk_me_in_tf = 0;
hl_sf->ref_frame_mvs_lvl = 0;
hl_sf->screen_detection_mode2_fast_detection = 0;
+ hl_sf->weighted_chroma_distortion = 0;
}
static inline void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 47ef9be7fc..3b35fc21e7 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -504,6 +504,13 @@ typedef struct HIGH_LEVEL_SPEED_FEATURES {
* 1: Fast detection
*/
int screen_detection_mode2_fast_detection;
+
+ /*!
+ * Decide whether to enable weighted chroma distortion.
+ * 0: Disable
+ * 1: Enable
+ */
+ int weighted_chroma_distortion;
} HIGH_LEVEL_SPEED_FEATURES;
/*!
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 48ced2426d..df78888ea0 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -3855,7 +3855,14 @@ int av1_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int is_cost_valid_uv =
av1_txfm_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_chroma_rd);
if (!is_cost_valid_uv) return 0;
- av1_merge_rd_stats(rd_stats, rd_stats_uv);
+
+ if (cpi->sf.hl_sf.weighted_chroma_distortion) {
+ // Apply weighted distortion/SSE accumulation while merging uv rd stats to
+ // y rd stats.
+ av1_merge_rd_stats_weighted(rd_stats, rd_stats_uv);
+ } else {
+ av1_merge_rd_stats(rd_stats, rd_stats_uv);
+ }
}
int choose_skip_txfm = rd_stats->skip_txfm;