Commit 4a527cabbb for aom

commit 4a527cabbbeffeb8ddd122f15534c9f6bbc5f49c
Author: Marco Paniconi <marpan@google.com>
Date:   Wed Jun 17 14:42:24 2026 -0700

    rtc: Implement intrabc for nonrd pickmode

    Allows for intrabc for nonrd_pickmode (speed >= 7).

    For rtc_screen, with enable-intrabc=1 and kf-max/min-dist=1,
    the stats change across speeds 7-11:
    ~20% bdrate gain, ~25-40% slowdown, depending on content and speed.
    Small/neutral bdrate with slowdown on clips with mixed content
    (text + video playing) (i.e.,screenshare_yt_soccer_scroll).

    For REALTIME_ONLY intrabc is off by default.

    Change-Id: I0e7738a1aac6b80ac8630a2956618493d676861b

diff --git a/av1/encoder/nonrd_opt.h b/av1/encoder/nonrd_opt.h
index 9f81c0a588..ce625e4a77 100644
--- a/av1/encoder/nonrd_opt.h
+++ b/av1/encoder/nonrd_opt.h
@@ -526,6 +526,7 @@ static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
   mbmi->ref_frame[1] = ref_frame1;
   pmi->palette_size[PLANE_TYPE_Y] = 0;
   pmi->palette_size[PLANE_TYPE_UV] = 0;
+  mbmi->use_intrabc = 0;
   mbmi->filter_intra_mode_info.use_filter_intra = 0;
   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
   mbmi->motion_mode = SIMPLE_TRANSLATION;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index aa0d2e2a6e..657d372837 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1579,6 +1579,135 @@ static bool should_prune_intra_modes_using_neighbors(
          this_mode != left_mode;
 }

+static void av1_search_intrabc_nonrd(AV1_COMP *cpi, MACROBLOCK *x,
+                                     BLOCK_SIZE bsize, RD_STATS *this_rdc,
+                                     int_mv *best_dv) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mi = xd->mi[0];
+  const int mi_row = xd->mi_row;
+  const int mi_col = xd->mi_col;
+  const int num_planes = av1_num_planes(cm);
+
+  this_rdc->rdcost = INT64_MAX;
+
+  // Save pre[0] state as it's shared across blocks
+  struct buf_2d ori_pre[MAX_MB_PLANE];
+  for (int i = 0; i < num_planes; ++i) ori_pre[i] = xd->plane[i].pre[0];
+
+  struct buf_2d yv12_mb[MAX_MB_PLANE];
+  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
+  for (int i = 0; i < num_planes; ++i) {
+    xd->plane[i].pre[0] = yv12_mb[i];
+  }
+
+  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
+  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
+  av1_find_mv_refs(cm, xd, mi, ref_frame, mbmi_ext->ref_mv_count,
+                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                   mbmi_ext->mode_context);
+  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
+  int_mv nearestmv, nearmv;
+  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
+                                   0);
+
+  if (nearestmv.as_int == INVALID_MV) {
+    nearestmv.as_int = 0;
+  }
+  if (nearmv.as_int == INVALID_MV) {
+    nearmv.as_int = 0;
+  }
+
+  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
+  if (dv_ref.as_int == 0) {
+    av1_find_ref_dv(&dv_ref, &xd->tile, cm->seq_params->mib_size, mi_row);
+  }
+  // Ref DV should not have sub-pel.
+  assert((dv_ref.as_mv.col & 7) == 0);
+  assert((dv_ref.as_mv.row & 7) == 0);
+  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
+
+  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
+  const SEARCH_METHODS search_method =
+      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
+  const search_site_config *lookahead_search_sites =
+      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
+  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
+  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
+                                     &dv_ref.as_mv, start_mv,
+                                     lookahead_search_sites, search_method,
+                                     /*fine_search_interval=*/0);
+  av1_set_ms_to_intra_mode(&fullms_params, x->dv_costs);
+  fullms_params.mv_limits.col_min = (xd->tile.mi_col_start - mi_col) * MI_SIZE;
+  fullms_params.mv_limits.col_max =
+      (xd->tile.mi_col_end - mi_col) * MI_SIZE - block_size_wide[bsize];
+  fullms_params.mv_limits.row_min = (xd->tile.mi_row_start - mi_row) * MI_SIZE;
+  fullms_params.mv_limits.row_max =
+      (xd->tile.mi_row_end - mi_row) * MI_SIZE - block_size_high[bsize];
+  int_mv best_mv;
+  int bestsme = INT_MAX;
+  if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
+    bestsme = av1_intrabc_hash_search(
+        cpi, xd, &fullms_params, &x->intrabc_hash_info, &best_mv.as_fullmv);
+  }
+  if (bestsme == INT_MAX) {
+    FULLPEL_MV_STATS best_mv_stats;
+    bestsme = av1_full_pixel_search(start_mv, &fullms_params,
+                                    cpi->mv_search_params.mv_step_param, NULL,
+                                    &best_mv.as_fullmv, &best_mv_stats, NULL);
+  }
+  if (bestsme != INT_MAX) {
+    MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
+    if (av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
+                        cm->seq_params->mib_size_log2)) {
+      const int8_t ori_mi_use_intrabc = mi->use_intrabc;
+      const int_mv ori_mi_mv0 = mi->mv[0];
+      const PREDICTION_MODE ori_mi_mode = mi->mode;
+      const MV_REFERENCE_FRAME ori_ref0 = mi->ref_frame[0];
+
+      mi->use_intrabc = 1;
+      mi->mv[0].as_mv = dv;
+      mi->mode = DC_PRED;
+      mi->ref_frame[0] = INTRA_FRAME;
+
+      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
+      if (num_planes > 1) {
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 1,
+                                      1);
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 2,
+                                      2);
+      }
+
+      this_rdc->rate =
+          av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->dv_costs->joint_mv,
+                          x->dv_costs->dv_costs, MV_COST_WEIGHT);
+      this_rdc->rate += x->mode_costs.intrabc_cost[1];
+      this_rdc->dist = 0;
+      int skippable;
+      av1_block_yrd(x, this_rdc, &skippable, bsize,
+                    AOMMIN(mi->tx_size, TX_16X16));
+      if (num_planes > 1) {
+        RD_STATS rdc_uv;
+        av1_invalid_rd_stats(&rdc_uv);
+        av1_model_rd_for_sb_uv(cpi, bsize, x, xd, &rdc_uv, 1, 2);
+        this_rdc->rate += rdc_uv.rate;
+        this_rdc->dist += rdc_uv.dist;
+      }
+      this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist);
+      *best_dv = mi->mv[0];
+
+      // Restore mi state after trial
+      mi->use_intrabc = ori_mi_use_intrabc;
+      mi->mv[0] = ori_mi_mv0;
+      mi->mode = ori_mi_mode;
+      mi->ref_frame[0] = ori_ref0;
+    }
+  }
+
+  // Restore pre[0] state
+  for (int i = 0; i < num_planes; ++i) xd->plane[i].pre[0] = ori_pre[i];
+}
+
 void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   AV1_COMMON *const cm = &cpi->common;
@@ -1737,6 +1866,27 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
     }
   }

+  bool try_intrabc = cpi->sf.rt_sf.rt_use_intrabc && av1_allow_intrabc(cm) &&
+                     bsize <= BLOCK_16X16;
+
+  if (try_intrabc) {
+    int_mv best_dv;
+    av1_search_intrabc_nonrd(cpi, x, bsize, &this_rdc, &best_dv);
+    if (this_rdc.rdcost < best_rdc.rdcost) {
+      best_rdc = this_rdc;
+      best_mode = DC_PRED;
+      mi->use_intrabc = 1;
+      mi->mv[0] = best_dv;
+      mi->uv_mode = UV_DC_PRED;
+      mi->motion_mode = SIMPLE_TRANSLATION;
+      mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
+      memset(&mi->palette_mode_info, 0, sizeof(mi->palette_mode_info));
+      memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
+    } else {
+      mi->use_intrabc = 0;
+    }
+  }
+
   mi->mode = best_mode;
   // Keep DC for UV since mode test is based on Y channel only.
   mi->uv_mode = UV_DC_PRED;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ebed82e788..7773464867 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1765,12 +1765,10 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
   }
   // Screen settings.
   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
-    if (speed < 7) {
-      sf->rt_sf.rt_use_intrabc = 1;
-      sf->mv_sf.intrabc_search_level = 1;
-      sf->mv_sf.hash_max_8x8_intrabc_blocks = 1;
-      sf->mv_sf.prune_intrabc_candidate_block_hash_search = 1;
-    }
+    sf->rt_sf.rt_use_intrabc = 1;
+    sf->mv_sf.intrabc_search_level = 1;
+    sf->mv_sf.hash_max_8x8_intrabc_blocks = 1;
+    sf->mv_sf.prune_intrabc_candidate_block_hash_search = 1;
     if (speed >= 7) {
       sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
       sf->mv_sf.use_bsize_dependent_search_method = 0;
diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc
index c7956bcf7f..b5c196e14c 100644
--- a/examples/svc_encoder_rtc.cc
+++ b/examples/svc_encoder_rtc.cc
@@ -2069,7 +2069,7 @@ int main(int argc, const char **argv) {
   aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
   if (app_input.tune_content == AOM_CONTENT_SCREEN) {
     aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
-    // INTRABC is currently disabled for rt mode, as it's too slow.
+    // As default set intrabc to off.
     aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
   }

diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 15852e461c..b46c248de2 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -1357,7 +1357,7 @@ class DatarateTestSVC
     screen_mode_ = 1;
     SetTargetBitratesFor1SL3TL();
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    CheckDatarate(0.40, 2.0);
+    CheckDatarate(0.30, 2.0);
 #if CONFIG_AV1_DECODER
     // Top temporal layers are non_reference, so exlcude them from
     // mismatch count, since loopfilter/cdef is not applied for these on
@@ -1389,7 +1389,7 @@ class DatarateTestSVC
     screen_mode_ = 1;
     SetTargetBitratesFor1SL2TL();
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    CheckDatarate(0.75, 1.8);
+    CheckDatarate(0.75, 2.0);
 #if CONFIG_AV1_DECODER
     // Top temporal layers are non_reference, so exlcude them from
     // mismatch count, since loopfilter/cdef is not applied for these on