Commit e5e50d6c3e for aom

commit e5e50d6c3eb4c15687f95559f895e36ab42d7bc9
Author: Marco Paniconi <marpan@google.com>
Date:   Thu Mar 19 20:30:13 2026 -0700

    rtc-screen: Rework sb search window and palette testing

    For superblock motion in variance partitioning: allow for
    larger search for scroll motion detection.

    And adjust some threshold for palette testing in
    nonrd_pickmode, needed in particular for cases where
    scroll motion is not detected.

    This helps to reduce encode_time spikes, observed in
    4K webrtc screen content test with scrolling.

    Change-Id: I4a452a586ae9017808153af9540b7ce6682c793d

diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 17390668d3..85df081bb1 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2055,12 +2055,13 @@ int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd,
 }

 int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl,
-                     int search_size, int full_search, int *sad) {
+                     int search_size_top, int search_size_bottom,
+                     int full_search, int *sad) {
   int best_sad = INT_MAX;
   int this_sad;
   int d;
   int center, offset = 0;
-  int bw = search_size << 1;
+  int bw = search_size_top + search_size_bottom;

   if (full_search) {
     for (d = 0; d <= bw; d++) {
@@ -2072,7 +2073,7 @@ int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl,
     }
     center = offset;
     *sad = best_sad;
-    return (center - (bw >> 1));
+    return (center - search_size_top);
   }

   for (d = 0; d <= bw; d += 16) {
@@ -2131,18 +2132,16 @@ int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl,
     }
   }
   *sad = best_sad;
-  return (center - (bw >> 1));
+  return (center - search_size_top);
 }

 // A special fast version of motion search used in rt mode.
 // The search window along columns and row is given by:
 //  +/- me_search_size_col/row.
-unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
-                                           BLOCK_SIZE bsize, int mi_row,
-                                           int mi_col, const MV *ref_mv,
-                                           unsigned int *y_sad_zero,
-                                           int me_search_size_col,
-                                           int me_search_size_row) {
+unsigned int av1_int_pro_motion_estimation(
+    const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
+    int mi_col, const MV *ref_mv, unsigned int *y_sad_zero,
+    int me_search_size_col, int me_search_size_row, int is_var_part) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mi = xd->mi[0];
@@ -2156,15 +2155,44 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
       is_screen && bsize == cm->seq_params->sb_size;
   // Keep border a multiple of 16.
   const int border = (cpi->oxcf.border_in_pixels >> 4) << 4;
-  int search_size_width = me_search_size_col;
-  int search_size_height = me_search_size_row;
-  // Adjust based on boundary.
-  if (((mi_col << 2) - search_size_width < -border) ||
-      ((mi_col << 2) + search_size_width > cm->width + border))
-    search_size_width = border;
-  if (((mi_row << 2) - search_size_height < -border) ||
-      ((mi_row << 2) + search_size_height > cm->height + border))
-    search_size_height = border;
+  int search_size_width_left = me_search_size_col;
+  int search_size_width_right = me_search_size_col;
+  int search_size_height_top = me_search_size_row;
+  int search_size_height_bottom = me_search_size_row;
+  // Allow for larger search size for column/horizontal screen motion.
+  if (screen_scroll_superblock && is_var_part) {
+    if (((mi_col << 2) - search_size_width_left) < -border)
+      search_size_width_left = (mi_col << 2) + border;
+    if (((mi_col << 2) + search_size_width_right + bw) > cm->width + border)
+      search_size_width_right = cm->width + border - (mi_col << 2) - bw;
+  } else {
+    if (((mi_col << 2) - search_size_width_left < -border) ||
+        ((mi_col << 2) + search_size_width_right + bw > cm->width + border)) {
+      search_size_width_left = AOMMIN(border, (mi_col << 2) + border);
+      search_size_width_right =
+          AOMMIN(border, cm->width + border - (mi_col << 2) - bw);
+    }
+  }
+  // Allow for larger search size for row/vertical screen motion.
+  if (screen_scroll_superblock && is_var_part) {
+    if (((mi_row << 2) - search_size_height_top) < -border)
+      search_size_height_top = (mi_row << 2) + border;
+    if (((mi_row << 2) + search_size_height_bottom + bh) > cm->height + border)
+      search_size_height_bottom = cm->height + border - (mi_row << 2) - bh;
+  } else {
+    if (((mi_row << 2) - search_size_height_top < -border) ||
+        ((mi_row << 2) + search_size_height_bottom + bh >
+         cm->height + border)) {
+      search_size_height_top = AOMMIN(border, (mi_row << 2) + border);
+      search_size_height_bottom =
+          AOMMIN(border, cm->height + border - (mi_row << 2) - bh);
+    }
+  }
+  // Make search_size_width/height_left/right/top/bottom multiple of 16.
+  search_size_width_left &= ~15;
+  search_size_width_right &= ~15;
+  search_size_height_top &= ~15;
+  search_size_height_bottom &= ~15;
   const int src_stride = x->plane[0].src.stride;
   const int ref_stride = xd->plane[0].pre[0].stride;
   uint8_t const *ref_buf, *src_buf;
@@ -2203,8 +2231,10 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
     }
     return best_sad;
   }
-  const int width_ref_buf = (search_size_width << 1) + bw;
-  const int height_ref_buf = (search_size_height << 1) + bh;
+  const int width_ref_buf =
+      search_size_width_left + search_size_width_right + bw;
+  const int height_ref_buf =
+      search_size_height_top + search_size_height_bottom + bh;
   int16_t *hbuf = (int16_t *)aom_malloc(width_ref_buf * sizeof(*hbuf));
   int16_t *vbuf = (int16_t *)aom_malloc(height_ref_buf * sizeof(*vbuf));
   int16_t *src_hbuf = (int16_t *)aom_malloc(bw * sizeof(*src_hbuf));
@@ -2219,12 +2249,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
   }

   // Set up prediction 1-D reference set for rows.
-  ref_buf = xd->plane[0].pre[0].buf - search_size_width;
+  ref_buf = xd->plane[0].pre[0].buf - search_size_width_left;
   aom_int_pro_row(hbuf, ref_buf, ref_stride, width_ref_buf, bh,
                   row_norm_factor);

   // Set up prediction 1-D reference set for cols
-  ref_buf = xd->plane[0].pre[0].buf - search_size_height * ref_stride;
+  ref_buf = xd->plane[0].pre[0].buf - search_size_height_top * ref_stride;
   aom_int_pro_col(vbuf, ref_buf, ref_stride, bw, height_ref_buf,
                   col_norm_factor);

@@ -2234,12 +2264,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
   aom_int_pro_col(src_vbuf, src_buf, src_stride, bw, bh, col_norm_factor);

   // Find the best match per 1-D search
-  best_int_mv->as_fullmv.col =
-      av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize],
-                       search_size_width, full_search, &best_sad_col);
-  best_int_mv->as_fullmv.row =
-      av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize],
-                       search_size_height, full_search, &best_sad_row);
+  best_int_mv->as_fullmv.col = av1_vector_match(
+      hbuf, src_hbuf, mi_size_wide_log2[bsize], search_size_width_left,
+      search_size_width_right, full_search, &best_sad_col);
+  best_int_mv->as_fullmv.row = av1_vector_match(
+      vbuf, src_vbuf, mi_size_high_log2[bsize], search_size_height_top,
+      search_size_height_bottom, full_search, &best_sad_row);

   // For screen: select between horiz or vert motion.
   if (is_screen) {
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 05cbf870da..d268481167 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -240,12 +240,13 @@ void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv);
 int av1_init_search_range(int size);

 int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl,
-                     int search_size, int full_search, int *sad);
+                     int search_size_top, int search_size_bottom,
+                     int full_search, int *sad);

 unsigned int av1_int_pro_motion_estimation(
     const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
     int mi_col, const MV *ref_mv, unsigned int *y_sad_zero,
-    int me_search_size_col, int me_search_size_row);
+    int me_search_size_col, int me_search_size_row, int is_var_part);

 int av1_refining_search_8p_c(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
                              const FULLPEL_MV start_mv, FULLPEL_MV *best_mv);
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 14581d7819..49953d0c3e 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -331,7 +331,7 @@ static int search_new_mv(AV1_COMP *cpi, MACROBLOCK *x,
     MV ref_mv = av1_get_ref_mv(x, 0).as_mv;
     tmp_sad = av1_int_pro_motion_estimation(
         cpi, x, bsize, mi_row, mi_col, &ref_mv, &y_sad_zero, me_search_size_col,
-        me_search_size_row);
+        me_search_size_row, 0);

     if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1;

@@ -3591,7 +3591,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
       x->content_state_sb.source_sad_nonrd != kZeroSad &&
       bsize <= BLOCK_16X16) {
-    unsigned int thresh_sse = cpi->rc.high_source_sad ? 15000 : 200000;
+    unsigned int thresh_sse = cpi->rc.high_source_sad ? 15000 : 100000;
     unsigned int thresh_source_var = cpi->rc.high_source_sad ? 50 : 200;
     unsigned int best_sse_inter_motion =
         (unsigned int)(search_state.best_rdc.sse >>
@@ -3622,7 +3622,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
                           x->content_state_sb.source_sad_nonrd != kZeroSad &&
                           !cpi->rc.high_source_sad &&
                           (cpi->rc.high_motion_content_screen_rtc ||
-                           cpi->rc.frame_source_sad < 10000);
+                           cpi->rc.frame_source_sad < 1000);

   bool try_palette = enable_palette(
       cpi, is_mode_intra(best_pickmode->best_mode), bsize, x->source_variance,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 491e7a5222..646219e6fb 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -3246,12 +3246,12 @@ static unsigned int estimate_scroll_motion(
   unsigned int best_sad;
   int best_sad_col, best_sad_row;
   // Find the best match per 1-D search
-  *best_intmv_col =
-      av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize],
-                       search_size_width, full_search, &best_sad_col);
-  *best_intmv_row =
-      av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize],
-                       search_size_height, full_search, &best_sad_row);
+  *best_intmv_col = av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize],
+                                     search_size_width, search_size_width,
+                                     full_search, &best_sad_col);
+  *best_intmv_row = av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize],
+                                     search_size_height, search_size_height,
+                                     full_search, &best_sad_row);
   if (best_sad_col < best_sad_row) {
     *best_intmv_row = 0;
     best_sad = best_sad_col;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index d09b0ca00e..b0c1a5bd89 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1340,10 +1340,8 @@ static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mi = xd->mi[0];
   const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
-  const int increase_col_sw = source_sad_nonrd > kMedSad &&
-                              !cpi->rc.high_motion_content_screen_rtc &&
-                              (cpi->svc.temporal_layer_id == 0 ||
-                               cpi->rc.num_col_blscroll_last_tl0 > 2);
+  const int increase_col_sw =
+      source_sad_nonrd > kMedSad && !cpi->rc.high_motion_content_screen_rtc;
   int me_search_size_col = is_screen
                                ? increase_col_sw ? 512 : 96
                                : block_size_wide[cm->seq_params->sb_size] >> 1;
@@ -1352,15 +1350,14 @@ static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x,
   int me_search_size_row = is_screen
                                ? source_sad_nonrd > kMedSad ? 512 : 192
                                : block_size_high[cm->seq_params->sb_size] >> 1;
-  if (cm->width * cm->height >= 3840 * 2160 &&
-      cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) {
+  if (cm->width * cm->height >= 3840 * 2160) {
     me_search_size_row = me_search_size_row << 1;
     me_search_size_col = me_search_size_col << 1;
   }
   unsigned int y_sad_zero;
   *y_sad = av1_int_pro_motion_estimation(
       cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero,
-      me_search_size_col, me_search_size_row);
+      me_search_size_col, me_search_size_row, 1);
   // The logic below selects whether the motion estimated in the
   // int_pro_motion() will be used in nonrd_pickmode. Only do this
   // for screen for now.