Commit 4a527cabbb for aom
commit 4a527cabbbeffeb8ddd122f15534c9f6bbc5f49c
Author: Marco Paniconi <marpan@google.com>
Date: Wed Jun 17 14:42:24 2026 -0700
rtc: Implement intrabc for nonrd pickmode
Allows for intrabc for nonrd_pickmode (speed >= 7).
For rtc_screen, with enable-intrabc=1 and kf-max/min-dist=1,
the stats change across speeds 7-11:
~20% bdrate gain, ~25-40% slowdown, depending on content and speed.
Small/neutral bdrate with slowdown on clips with mixed content
(text + video playing) (i.e.,screenshare_yt_soccer_scroll).
For REALTIME_ONLY intrabc is off by default.
Change-Id: I0e7738a1aac6b80ac8630a2956618493d676861b
diff --git a/av1/encoder/nonrd_opt.h b/av1/encoder/nonrd_opt.h
index 9f81c0a588..ce625e4a77 100644
--- a/av1/encoder/nonrd_opt.h
+++ b/av1/encoder/nonrd_opt.h
@@ -526,6 +526,7 @@ static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
mbmi->ref_frame[1] = ref_frame1;
pmi->palette_size[PLANE_TYPE_Y] = 0;
pmi->palette_size[PLANE_TYPE_UV] = 0;
+ mbmi->use_intrabc = 0;
mbmi->filter_intra_mode_info.use_filter_intra = 0;
mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
mbmi->motion_mode = SIMPLE_TRANSLATION;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index aa0d2e2a6e..657d372837 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1579,6 +1579,135 @@ static bool should_prune_intra_modes_using_neighbors(
this_mode != left_mode;
}
+static void av1_search_intrabc_nonrd(AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, RD_STATS *this_rdc,
+ int_mv *best_dv) {
+ AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mi = xd->mi[0];
+ const int mi_row = xd->mi_row;
+ const int mi_col = xd->mi_col;
+ const int num_planes = av1_num_planes(cm);
+
+ this_rdc->rdcost = INT64_MAX;
+
+ // Save pre[0] state as it's shared across blocks
+ struct buf_2d ori_pre[MAX_MB_PLANE];
+ for (int i = 0; i < num_planes; ++i) ori_pre[i] = xd->plane[i].pre[0];
+
+ struct buf_2d yv12_mb[MAX_MB_PLANE];
+ av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
+ for (int i = 0; i < num_planes; ++i) {
+ xd->plane[i].pre[0] = yv12_mb[i];
+ }
+
+ MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
+ const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
+ av1_find_mv_refs(cm, xd, mi, ref_frame, mbmi_ext->ref_mv_count,
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mbmi_ext->mode_context);
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
+ int_mv nearestmv, nearmv;
+ av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
+ 0);
+
+ if (nearestmv.as_int == INVALID_MV) {
+ nearestmv.as_int = 0;
+ }
+ if (nearmv.as_int == INVALID_MV) {
+ nearmv.as_int = 0;
+ }
+
+ int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
+ if (dv_ref.as_int == 0) {
+ av1_find_ref_dv(&dv_ref, &xd->tile, cm->seq_params->mib_size, mi_row);
+ }
+ // Ref DV should not have sub-pel.
+ assert((dv_ref.as_mv.col & 7) == 0);
+ assert((dv_ref.as_mv.row & 7) == 0);
+ mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
+
+ FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
+ const SEARCH_METHODS search_method =
+ av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
+ const search_site_config *lookahead_search_sites =
+ cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
+ const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
+ av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
+ &dv_ref.as_mv, start_mv,
+ lookahead_search_sites, search_method,
+ /*fine_search_interval=*/0);
+ av1_set_ms_to_intra_mode(&fullms_params, x->dv_costs);
+ fullms_params.mv_limits.col_min = (xd->tile.mi_col_start - mi_col) * MI_SIZE;
+ fullms_params.mv_limits.col_max =
+ (xd->tile.mi_col_end - mi_col) * MI_SIZE - block_size_wide[bsize];
+ fullms_params.mv_limits.row_min = (xd->tile.mi_row_start - mi_row) * MI_SIZE;
+ fullms_params.mv_limits.row_max =
+ (xd->tile.mi_row_end - mi_row) * MI_SIZE - block_size_high[bsize];
+ int_mv best_mv;
+ int bestsme = INT_MAX;
+ if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
+ bestsme = av1_intrabc_hash_search(
+ cpi, xd, &fullms_params, &x->intrabc_hash_info, &best_mv.as_fullmv);
+ }
+ if (bestsme == INT_MAX) {
+ FULLPEL_MV_STATS best_mv_stats;
+ bestsme = av1_full_pixel_search(start_mv, &fullms_params,
+ cpi->mv_search_params.mv_step_param, NULL,
+ &best_mv.as_fullmv, &best_mv_stats, NULL);
+ }
+ if (bestsme != INT_MAX) {
+ MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
+ if (av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
+ cm->seq_params->mib_size_log2)) {
+ const int8_t ori_mi_use_intrabc = mi->use_intrabc;
+ const int_mv ori_mi_mv0 = mi->mv[0];
+ const PREDICTION_MODE ori_mi_mode = mi->mode;
+ const MV_REFERENCE_FRAME ori_ref0 = mi->ref_frame[0];
+
+ mi->use_intrabc = 1;
+ mi->mv[0].as_mv = dv;
+ mi->mode = DC_PRED;
+ mi->ref_frame[0] = INTRA_FRAME;
+
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
+ if (num_planes > 1) {
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 1,
+ 1);
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 2,
+ 2);
+ }
+
+ this_rdc->rate =
+ av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->dv_costs->joint_mv,
+ x->dv_costs->dv_costs, MV_COST_WEIGHT);
+ this_rdc->rate += x->mode_costs.intrabc_cost[1];
+ this_rdc->dist = 0;
+ int skippable;
+ av1_block_yrd(x, this_rdc, &skippable, bsize,
+ AOMMIN(mi->tx_size, TX_16X16));
+ if (num_planes > 1) {
+ RD_STATS rdc_uv;
+ av1_invalid_rd_stats(&rdc_uv);
+ av1_model_rd_for_sb_uv(cpi, bsize, x, xd, &rdc_uv, 1, 2);
+ this_rdc->rate += rdc_uv.rate;
+ this_rdc->dist += rdc_uv.dist;
+ }
+ this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist);
+ *best_dv = mi->mv[0];
+
+ // Restore mi state after trial
+ mi->use_intrabc = ori_mi_use_intrabc;
+ mi->mv[0] = ori_mi_mv0;
+ mi->mode = ori_mi_mode;
+ mi->ref_frame[0] = ori_ref0;
+ }
+ }
+
+ // Restore pre[0] state
+ for (int i = 0; i < num_planes; ++i) xd->plane[i].pre[0] = ori_pre[i];
+}
+
void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
AV1_COMMON *const cm = &cpi->common;
@@ -1737,6 +1866,27 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
}
}
+ bool try_intrabc = cpi->sf.rt_sf.rt_use_intrabc && av1_allow_intrabc(cm) &&
+ bsize <= BLOCK_16X16;
+
+ if (try_intrabc) {
+ int_mv best_dv;
+ av1_search_intrabc_nonrd(cpi, x, bsize, &this_rdc, &best_dv);
+ if (this_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = this_rdc;
+ best_mode = DC_PRED;
+ mi->use_intrabc = 1;
+ mi->mv[0] = best_dv;
+ mi->uv_mode = UV_DC_PRED;
+ mi->motion_mode = SIMPLE_TRANSLATION;
+ mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
+ memset(&mi->palette_mode_info, 0, sizeof(mi->palette_mode_info));
+ memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
+ } else {
+ mi->use_intrabc = 0;
+ }
+ }
+
mi->mode = best_mode;
// Keep DC for UV since mode test is based on Y channel only.
mi->uv_mode = UV_DC_PRED;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ebed82e788..7773464867 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1765,12 +1765,10 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
}
// Screen settings.
if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
- if (speed < 7) {
- sf->rt_sf.rt_use_intrabc = 1;
- sf->mv_sf.intrabc_search_level = 1;
- sf->mv_sf.hash_max_8x8_intrabc_blocks = 1;
- sf->mv_sf.prune_intrabc_candidate_block_hash_search = 1;
- }
+ sf->rt_sf.rt_use_intrabc = 1;
+ sf->mv_sf.intrabc_search_level = 1;
+ sf->mv_sf.hash_max_8x8_intrabc_blocks = 1;
+ sf->mv_sf.prune_intrabc_candidate_block_hash_search = 1;
if (speed >= 7) {
sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
sf->mv_sf.use_bsize_dependent_search_method = 0;
diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc
index c7956bcf7f..b5c196e14c 100644
--- a/examples/svc_encoder_rtc.cc
+++ b/examples/svc_encoder_rtc.cc
@@ -2069,7 +2069,7 @@ int main(int argc, const char **argv) {
aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
if (app_input.tune_content == AOM_CONTENT_SCREEN) {
aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
- // INTRABC is currently disabled for rt mode, as it's too slow.
+ // As default set intrabc to off.
aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
}
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 15852e461c..b46c248de2 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -1357,7 +1357,7 @@ class DatarateTestSVC
screen_mode_ = 1;
SetTargetBitratesFor1SL3TL();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- CheckDatarate(0.40, 2.0);
+ CheckDatarate(0.30, 2.0);
#if CONFIG_AV1_DECODER
// Top temporal layers are non_reference, so exlcude them from
// mismatch count, since loopfilter/cdef is not applied for these on
@@ -1389,7 +1389,7 @@ class DatarateTestSVC
screen_mode_ = 1;
SetTargetBitratesFor1SL2TL();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- CheckDatarate(0.75, 1.8);
+ CheckDatarate(0.75, 2.0);
#if CONFIG_AV1_DECODER
// Top temporal layers are non_reference, so exlcude them from
// mismatch count, since loopfilter/cdef is not applied for these on