Commit f8f91ecd56 for aom

commit f8f91ecd56f5bc38f401853a48e1b233966085e4
Author: Diksha Singh <diksha.singh@ittiam.com>
Date:   Mon Jun 8 11:08:24 2026 +0530

    Speed up av1_optimize_txb()

    Added specialized code when the absolute value of quantized
    coefficient equals one in update_coeff_simple(). Also removed
    redundant calls to get_dqv() in update_coeff_general().

    Encoder performance results averaged over all resolutions:

           Encoder Instruction
      CPU   Count Reduction (%)
       1        0.46
       2        0.36
       3        0.36
       4        0.31
       5        0.22
       6        0.20

    This change is bit-exact for all presets.

    Change-Id: If227d56a80b81beffa3456eff7f49037a51aa261

diff --git a/av1/encoder/txb_rdopt.c b/av1/encoder/txb_rdopt.c
index bd16ddc323..fb0ef12860 100644
--- a/av1/encoder/txb_rdopt.c
+++ b/av1/encoder/txb_rdopt.c
@@ -22,7 +22,6 @@ static inline void update_coeff_general(
     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
     tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels,
     const qm_val_t *iqmatrix, const qm_val_t *qmatrix) {
-  const int dqv = get_dqv(dequant, scan[si], iqmatrix);
   const int ci = scan[si];
   const tran_low_t qc = qcoeff[ci];
   const int is_last = si == (eob - 1);
@@ -51,6 +50,7 @@ static inline void update_coeff_general(
       dist_low = dist0;
       rate_low = txb_costs->base_cost[coeff_ctx][0];
     } else {
+      const int dqv = get_dqv(dequant, scan[si], iqmatrix);
       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
       abs_qc_low = abs_qc - 1;
       dist_low = get_coeff_dist(tqc, dqc_low, shift, qmatrix, ci);
@@ -95,34 +95,62 @@ static AOM_FORCE_INLINE void update_coeff_simple(
     const tran_low_t abs_qc = abs(qc);
     const tran_low_t abs_tqc = abs(tcoeff[ci]);
     const tran_low_t abs_dqc = abs(dqcoeff[ci]);
-    int rate_low = 0;
-    const int rate = get_two_coeff_cost_simple(
-        ci, abs_qc, coeff_ctx, txb_costs, bhl, tx_class, levels, &rate_low);
-    if (abs_dqc < abs_tqc) {
-      *accu_rate += rate;
-      return;
-    }
+    if (abs_qc == 1) {
+      const int *base_cost = txb_costs->base_cost[coeff_ctx];
+      const int rate = base_cost[1] + av1_cost_literal(1);
+      if (abs_dqc < abs_tqc) {
+        *accu_rate += rate;
+        return;
+      }

-    const int dqv = get_dqv(dequant, scan[si], iqmatrix);
-    const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift, qmatrix, ci);
-    const int64_t rd = RDCOST(rdmult, rate, dist);
+      const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift, qmatrix, ci);
+      const int64_t rd = RDCOST(rdmult, rate, dist);

-    const tran_low_t abs_qc_low = abs_qc - 1;
-    const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
-    const int64_t dist_low =
-        get_coeff_dist(abs_tqc, abs_dqc_low, shift, qmatrix, ci);
-    const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
+      const int64_t dist_low =
+          get_coeff_dist(abs_tqc, /*abs_dqc_low*/ 0, shift, qmatrix, ci);
+      const int rate_low = rate - base_cost[5];
+      const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);

-    int allow_lower_qc = sharpness ? (abs_qc > 1) : 1;
+      const int allow_lower_qc = sharpness ? 0 : 1;

-    if (rd_low < rd && allow_lower_qc) {
-      const int sign = (qc < 0) ? 1 : 0;
-      qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
-      dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
-      levels[get_padded_idx(ci, bhl)] = AOMMIN(abs_qc_low, INT8_MAX);
-      *accu_rate += rate_low;
+      if (rd_low < rd && allow_lower_qc) {
+        qcoeff[ci] = 0;
+        dqcoeff[ci] = 0;
+        levels[get_padded_idx(ci, bhl)] = 0;
+        *accu_rate += rate_low;
+      } else {
+        *accu_rate += rate;
+      }
     } else {
-      *accu_rate += rate;
+      int rate_low = 0;
+      const int rate = get_two_coeff_cost_simple(
+          ci, abs_qc, coeff_ctx, txb_costs, bhl, tx_class, levels, &rate_low);
+      if (abs_dqc < abs_tqc) {
+        *accu_rate += rate;
+        return;
+      }
+
+      const int dqv = get_dqv(dequant, scan[si], iqmatrix);
+      const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift, qmatrix, ci);
+      const int64_t rd = RDCOST(rdmult, rate, dist);
+
+      const tran_low_t abs_qc_low = abs_qc - 1;
+      const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
+      const int64_t dist_low =
+          get_coeff_dist(abs_tqc, abs_dqc_low, shift, qmatrix, ci);
+      const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
+
+      const int allow_lower_qc = sharpness ? (abs_qc > 1) : 1;
+
+      if (rd_low < rd && allow_lower_qc) {
+        const int sign = (qc < 0) ? 1 : 0;
+        qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
+        dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
+        levels[get_padded_idx(ci, bhl)] = AOMMIN(abs_qc_low, INT8_MAX);
+        *accu_rate += rate_low;
+      } else {
+        *accu_rate += rate;
+      }
     }
   }
 }