Commit da5bd13dcc for aom

commit da5bd13dcc0aff7b59c3c00bcfd94e6d57b432f7
Author: Deepa K G <deepa.kg@ittiam.com>
Date:   Fri Jan 16 14:39:07 2026 +0530

    Disable sf 'full_pixel_search_level' for speed>=4

    When the sf 'full_pixel_search_level' is disabled, an extra
    candidate is added for motion search different from ref MV.
    Thus, the sf 'skip_fullpel_search_using_startmv' is modified
    to skip full pixel search based on closeness of both start MV
    and ref MV.

        Encoder Instruction                BD-Rate Loss(%)
    cpu  Count Reduction(%)  avg.psnr  ovr.psnr  ssim     vmaf   vmaf_neg
     4      -1.38            -0.3935  -0.4150  -0.5076  -0.5059  -0.5095
     5      -1.13            -0.4691  -0.5095  -0.6407  -0.6350  -0.6209
     6      -0.98            -0.6367  -0.6740  -0.8182  -0.8046  -0.8001

    STATS_CHANGED for speed>=4

    Change-Id: Ib7ee4d5f525d038308dd594db8fa35c559fb467b

diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 28b036de76..428d1e446f 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -128,12 +128,12 @@ typedef struct HandleInterModeArgs {
   /*!
    * Stack to store full pixel search start mv of NEWMV mode.
    */
-  FULLPEL_MV start_mv_stack[(MAX_REF_MV_SEARCH - 1) * 2];
+  FULLPEL_MV start_mv_stack[MAX_REF_MV_SEARCH * 2];

   /*!
    * Stack to store ref_mv_idx of NEWMV mode.
    */
-  uint8_t ref_mv_idx_stack[(MAX_REF_MV_SEARCH - 1) * 2];
+  uint8_t ref_mv_idx_stack[MAX_REF_MV_SEARCH * 2];

   /*!
    * Count of mvs in start mv stack.
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 5169989297..0bd7d9db73 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -169,6 +169,8 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
   else
     start_mv = get_fullmv_from_mv(&ref_mv);

+  const FULLPEL_MV fullpel_ref_mv = start_mv;
+
   // cand stores start_mv and all possible MVs in a SB.
   cand_mv_t cand[MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB + 1];
   av1_zero(cand);
@@ -183,54 +185,74 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,

   const int cand_cnt = AOMMIN(2, cnt);
   // TODO(any): Test the speed feature for OBMC_CAUSAL mode.
-  if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv &&
+  if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv_refmv &&
       mbmi->motion_mode == SIMPLE_TRANSLATION) {
-    const int stack_size = args->start_mv_cnt;
     for (int cand_idx = 0; cand_idx < cand_cnt; cand_idx++) {
       int_mv *fmv_cand = &cand[cand_idx].fmv;
       int skip_cand_mv = 0;

       // Check difference between mvs in the stack and candidate mv.
-      for (int stack_idx = 0; stack_idx < stack_size; stack_idx++) {
-        const uint8_t this_ref_mv_idx = args->ref_mv_idx_stack[stack_idx];
-        const FULLPEL_MV *fmv_stack = &args->start_mv_stack[stack_idx];
+      for (int stack_idx = 0; stack_idx < args->start_mv_cnt; stack_idx++) {
+        uint8_t this_ref_mv_idx = args->ref_mv_idx_stack[stack_idx];
         const int this_newmv_valid =
             args->single_newmv_valid[this_ref_mv_idx][ref];
-        const int row_diff = abs(fmv_stack->row - fmv_cand->as_fullmv.row);
-        const int col_diff = abs(fmv_stack->col - fmv_cand->as_fullmv.col);

-        if (!this_newmv_valid) continue;
+        if (!this_newmv_valid && this_ref_mv_idx != mbmi->ref_mv_idx) continue;

-        if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv >= 2) {
+        const FULLPEL_MV *fmv_stack = &args->start_mv_stack[stack_idx];
+        const int start_mv_row_diff =
+            abs(fmv_stack->row - fmv_cand->as_fullmv.row);
+        const int start_mv_col_diff =
+            abs(fmv_stack->col - fmv_cand->as_fullmv.col);
+
+        if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
+          assert(has_second_ref(mbmi));
+          this_ref_mv_idx += 1;
+        }
+        const MV this_ref_mv =
+            av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, this_ref_mv_idx,
+                                      &x->mbmi_ext)
+                .as_mv;
+
+        assert(IMPLIES(args->ref_mv_idx_stack[stack_idx] == mbmi->ref_mv_idx,
+                       this_ref_mv.row == ref_mv.row));
+        assert(IMPLIES(args->ref_mv_idx_stack[stack_idx] == mbmi->ref_mv_idx,
+                       this_ref_mv.col == ref_mv.col));
+
+        const FULLPEL_MV this_fullpel_ref_mv = get_fullmv_from_mv(&this_ref_mv);
+        const int ref_mv_row_diff =
+            abs(this_fullpel_ref_mv.row - fullpel_ref_mv.row);
+        const int ref_mv_col_diff =
+            abs(this_fullpel_ref_mv.col - fullpel_ref_mv.col);
+
+        if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv_refmv >= 2) {
           // Prunes the current start_mv candidate, if the absolute mv
           // difference of both row and column are <= 1.
-          if (row_diff <= 1 && col_diff <= 1) {
+          if (start_mv_row_diff <= 1 && start_mv_col_diff <= 1 &&
+              ref_mv_row_diff <= 1 && ref_mv_col_diff <= 1) {
             skip_cand_mv = 1;
             break;
           }
-        } else if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv >= 1) {
+        } else if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv_refmv >= 1) {
           // Prunes the current start_mv candidate, if the sum of the absolute
           // mv difference of row and column is <= 1.
-          if (row_diff + col_diff <= 1) {
+          if ((start_mv_row_diff + start_mv_col_diff <= 1) &&
+              (ref_mv_row_diff + ref_mv_col_diff <= 1)) {
             skip_cand_mv = 1;
             break;
           }
         }
       }
       if (skip_cand_mv) {
-        // Ensure atleast one full-pel motion search is not pruned.
-        assert(mbmi->ref_mv_idx != 0);
         // Mark the candidate mv as invalid so that motion search gets skipped.
         cand[cand_idx].fmv.as_int = INVALID_MV;
       } else {
         // Store start_mv candidate and corresponding ref_mv_idx of full-pel
-        // search in the mv stack (except last ref_mv_idx).
-        if (mbmi->ref_mv_idx != MAX_REF_MV_SEARCH - 1) {
-          assert(args->start_mv_cnt < (MAX_REF_MV_SEARCH - 1) * 2);
-          args->start_mv_stack[args->start_mv_cnt] = fmv_cand->as_fullmv;
-          args->ref_mv_idx_stack[args->start_mv_cnt] = mbmi->ref_mv_idx;
-          args->start_mv_cnt++;
-        }
+        // search in the mv stack.
+        assert(args->start_mv_cnt < MAX_REF_MV_SEARCH * 2);
+        args->start_mv_stack[args->start_mv_cnt] = fmv_cand->as_fullmv;
+        args->ref_mv_idx_stack[args->start_mv_cnt] = mbmi->ref_mv_idx;
+        args->start_mv_cnt++;
       }
     }
   }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index cf08657c65..4617a3e359 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -943,7 +943,7 @@ static void set_good_speed_feature_framesize_dependent(
     if (is_480p_or_larger) {
       sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
     } else {
-      sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
+      sf->mv_sf.skip_fullpel_search_using_startmv_refmv = boosted ? 0 : 1;
     }

     sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
@@ -977,7 +977,7 @@ static void set_good_speed_feature_framesize_dependent(

     sf->inter_sf.skip_newmv_in_drl = 4;
     sf->inter_sf.prune_comp_ref_frames = 1;
-    sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
+    sf->mv_sf.skip_fullpel_search_using_startmv_refmv = boosted ? 0 : 1;

     if (!is_720p_or_larger) {
       sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
@@ -1011,7 +1011,7 @@ static void set_good_speed_feature_framesize_dependent(
     sf->inter_sf.prune_comp_ref_frames = 2;
     sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
         (boosted || allow_screen_content_tools) ? 0 : 1;
-    sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 2;
+    sf->mv_sf.skip_fullpel_search_using_startmv_refmv = boosted ? 0 : 2;

     if (is_720p_or_larger) {
       sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
@@ -1331,7 +1331,6 @@ static void set_good_speed_features_framesize_independent(

   if (speed >= 4) {
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-    sf->mv_sf.full_pixel_search_level = 1;

     sf->gm_sf.prune_zero_mv_with_sse = 2;
     sf->gm_sf.downsample_level = 1;
@@ -2280,7 +2279,7 @@ static inline void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
   mv_sf->use_downsampled_sad = 0;
   mv_sf->disable_extensive_joint_motion_search = 0;
   mv_sf->disable_second_mv = 0;
-  mv_sf->skip_fullpel_search_using_startmv = 0;
+  mv_sf->skip_fullpel_search_using_startmv_refmv = 0;
   mv_sf->warp_search_method = WARP_SEARCH_SQUARE;
   mv_sf->warp_search_iters = 8;
   mv_sf->use_intrabc = 1;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index b92cd19a9f..c73cea1f0b 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -956,11 +956,14 @@ typedef struct MV_SPEED_FEATURES {
   // 2: disable second MV
   int disable_second_mv;

-  // Skips full pixel search based on start mv of prior ref_mv_idx.
+  // Skips full pixel search based on closeness of start mv and ref mv
+  // of previous search.
   // 0: Disabled
-  // 1: Skips the full pixel search upto 4 neighbor full-pel MV positions.
-  // 2: Skips the full pixel search upto 8 neighbor full-pel MV positions.
-  int skip_fullpel_search_using_startmv;
+  // 1: Skips the full pixel search upto 4 neighbor full-pel start MV and ref MV
+  // positions.
+  // 2: Skips the full pixel search upto 8 neighbor full-pel start MV and ref MV
+  // positions.
+  int skip_fullpel_search_using_startmv_refmv;

   // Method to use for refining WARPED_CAUSAL motion vectors
   // TODO(rachelbarker): Can this be unified with OBMC in some way?