Commit ec4303d271 for aom

commit ec4303d27125aab9b49fc52227f802bb6a53bc9e
Author: Mudassir Galaganath <mudassir.galaganath@ittiam.com>
Date:   Mon Apr 27 22:33:18 2026 +0530

    lc-dec: Add sf to disable CDEF adaptively

    This patch adds speed feature 'adaptive_cdef_mode' that always
    disables chroma CDEF, and disables luma CDEF when the MSE
    improvement of a frame over zero filter strength falls below
    a threshold.

    Performance results of encoder and decoder for streams generated
    with low complexity decode enabled, measured w.r.t a baseline with
    CDEF disabled (--enable-cdef=0).

          Decode Time                   BD-Rate Loss(%)
    cpu  Reduction(%)  avg.psnr   ssim    vmaf    vmaf_neg  uvq_1p5
     1     -4.048     -0.3578   -0.4008  -0.5414  -0.3696   -8.6267
     2     -4.164     -0.4334   -0.5844  -0.5302  -0.3774  -10.3880
     3     -4.716     -0.6492   -0.8754  -0.7096  -0.5201  -10.9057

    STATS_CHANGED for low complexity decode

    Change-Id: I5e21281a5bb8f97c748cb37a9169690a02c45885

diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 9fad50c9ca..7585162a8d 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -3352,7 +3352,8 @@ static int cdef_filter_block_worker_hook(void *arg1, void *arg2) {
   while (cdef_get_next_job(cdef_sync, cdef_search_ctx, &cur_fbr, &cur_fbc,
                            &sb_count)) {
     av1_cdef_mse_calc_block(cdef_search_ctx, error_info, cur_fbr, cur_fbc,
-                            sb_count);
+                            sb_count,
+                            thread_data->cpi->sf.lpf_sf.adaptive_cdef_mode);
   }
   error_info->setjmp = 0;
   return 1;
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index a315f6a440..8ece93312b 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -511,11 +511,13 @@ static inline uint64_t get_filt_error(
 //   CDEF search context.
 //   fbr: Row index in units of 64x64 block
 //   fbc: Column index in units of 64x64 block
+//   adaptive_cdef_mode: Speed feature to control CDEF adaptively.
 // Returns:
 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
                              struct aom_internal_error_info *error_info,
-                             int fbr, int fbc, int sb_count) {
+                             int fbr, int fbc, int sb_count,
+                             int adaptive_cdef_mode) {
   // TODO(aomedia:3276): Pass error_info to the low-level functions as required
   // in future to handle error propagation.
   (void)error_info;
@@ -573,6 +575,17 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
   const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
   int dirinit = 0;
   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
+    // To disable CDEF filter of chroma, set MSE of chroma strength index 0 to
+    // zero and all non-zero strength indices to 1, such that the joint
+    // luma-chroma strength search always chooses filter strength 0 for chroma
+    // due to least cost.
+    if (adaptive_cdef_mode > 0 && pli > 0) {
+      cdef_search_ctx->mse[pli][sb_count][0] = 0;
+      for (int gi = 1; gi < cdef_search_ctx->total_strengths; gi++)
+        cdef_search_ctx->mse[pli][sb_count][gi] = 1;
+      break;
+    }
+
     /* We avoid filtering the pixels for which some of the pixels to
     average are outside the frame. We could change the filter instead,
     but it would add special cases for any future vectorization. */
@@ -614,10 +627,12 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
 // Inputs:
 //   cdef_search_ctx: Pointer to the structure containing parameters related to
 //   CDEF search context.
+//   adaptive_cdef_mode: Speed feature to control CDEF adaptively.
 // Returns:
 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
-                                struct aom_internal_error_info *error_info) {
+                                struct aom_internal_error_info *error_info,
+                                int adaptive_cdef_mode) {
   // Loop over each sb.
   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
@@ -625,7 +640,7 @@ static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
       // Calculate mse for each sb and store the relevant sb index.
       av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
-                              cdef_search_ctx->sb_count);
+                              cdef_search_ctx->sb_count, adaptive_cdef_mode);
       cdef_search_ctx->sb_count++;
     }
   }
@@ -869,6 +884,7 @@ void av1_cdef_search(AV1_COMP *cpi) {
   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
                     pick_method <= CDEF_FAST_SEARCH_LVL5);
+  const int adaptive_cdef_mode = cpi->sf.lpf_sf.adaptive_cdef_mode;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;

@@ -886,7 +902,7 @@ void av1_cdef_search(AV1_COMP *cpi) {
   if (cpi->mt_info.num_workers > 1) {
     av1_cdef_mse_calc_frame_mt(cpi);
   } else {
-    cdef_mse_calc_frame(cdef_search_ctx, cm->error);
+    cdef_mse_calc_frame(cdef_search_ctx, cm->error, adaptive_cdef_mode);
   }

   /* Search for different number of signaling bits. */
@@ -956,9 +972,11 @@ void av1_cdef_search(AV1_COMP *cpi) {

   cdef_info->cdef_bits = nb_strength_bits;
   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
+  uint64_t tot_best_mse_y = 0, tot_zero_mse_y = 0;
   for (int i = 0; i < sb_count; i++) {
     uint64_t best_mse = UINT64_MAX;
     int best_gi = 0;
+    tot_zero_mse_y += mse[0][i][0];
     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
@@ -967,9 +985,36 @@ void av1_cdef_search(AV1_COMP *cpi) {
         best_mse = curr;
       }
     }
+    // When adaptive_cdef_mode is enabled, CDEF filter of chroma is disabled by
+    // setting the MSE at chroma strength index 0 to zero, ensuring zero filter
+    // strength always wins for chroma. Hence, best_mse reflects luma MSE only.
+    tot_best_mse_y += best_mse;
     mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
         best_gi;
   }
+
+  // Disable CDEF filter of luma if the MSE improvement over zero filter
+  // strength is below the threshold.
+  if (cm->current_frame.pyramid_level > 1 &&
+      cpi->sf.lpf_sf.adaptive_cdef_mode > 0 && sb_count > 0) {
+    const double cdef_mse_pct_imp_thresh = 4.0;
+    double luma_mse_gain_pct = 0.0;
+    // Percentage reduction in luma MSE with best CDEF strength vs zero
+    // strength.
+    luma_mse_gain_pct =
+        (tot_zero_mse_y - tot_best_mse_y) * 100.0 / tot_zero_mse_y;
+    if (luma_mse_gain_pct < cdef_mse_pct_imp_thresh) {
+      cdef_info->nb_cdef_strengths = 1;
+      cdef_info->cdef_bits = 0;
+      cdef_info->cdef_strengths[0] = 0;
+      cdef_info->cdef_uv_strengths[0] = 0;
+      for (int i = 0; i < sb_count; i++) {
+        mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
+            0;
+      }
+    }
+  }
+
   if (fast) {
     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
       const int luma_strength = cdef_info->cdef_strengths[j];
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index 70fc0e2f78..006b1d0bb7 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -218,7 +218,8 @@ void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx);

 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
                              struct aom_internal_error_info *error_info,
-                             int fbr, int fbc, int sb_count);
+                             int fbr, int fbc, int sb_count,
+                             int adaptive_cdef_mode);
 /*!\endcond */

 /*!\brief AV1 CDEF parameter search
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 94642c4176..1f2d910aac 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -644,6 +644,7 @@ static void set_good_speed_features_lc_dec_framesize_dependent(

     sf->hl_sf.ref_frame_mvs_lvl = 2;

+    sf->lpf_sf.adaptive_cdef_mode = 1;
     sf->lpf_sf.dual_sgr_penalty_level = boosted ? 1 : 3;
     sf->lpf_sf.switchable_lr_with_bias_level = 1;
     sf->lpf_sf.skip_loop_filter_using_filt_error =
@@ -668,6 +669,7 @@ static void set_good_speed_features_lc_dec_framesize_dependent(

     sf->hl_sf.ref_frame_mvs_lvl = 1;

+    sf->lpf_sf.adaptive_cdef_mode = 1;
     sf->lpf_sf.skip_loop_filter_using_filt_error =
         (update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE &&
          cm->current_frame.pyramid_level > 1)
@@ -2536,6 +2538,7 @@ static inline void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
   lpf_sf->disable_wiener_coeff_refine_search = false;
   lpf_sf->use_downsampled_wiener_stats = 0;
   lpf_sf->switchable_lr_with_bias_level = 0;
+  lpf_sf->adaptive_cdef_mode = 0;
 }

 static inline void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7c756295a1..91b73bb047 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1615,6 +1615,13 @@ typedef struct LOOP_FILTER_SPEED_FEATURES {
   // This speed feature is only enabled in all intra mode.
   bool zero_low_cdef_strengths;

+  // Decoder side speed feature for adaptive CDEF control based on MSE
+  // 0 : Enable CDEF for all planes
+  // 1 : Disable CDEF for chroma planes and disable for luma adaptively based on
+  //     current frame's pyramid level and improvement in MSE after CDEF
+  //     filtering.
+  int adaptive_cdef_mode;
+
   // Decoder side speed feature to add penalty for use of dual-sgr filters.
   // Takes values 0 - 10, 0 indicating no penalty and each additional level
   // adding a penalty of 1%