Commit c9b95efd26 for aom
commit c9b95efd26ee2b9937c9d8569dd0e6dcd8b4ae66
Author: Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
Date: Thu Apr 23 13:42:02 2026 +0530
Prune either h or v 1:4 partitions
In this CL, simple motion search is done to calculate SSE for
each of the 4 partition blocks of PARTITION_HORZ_4 and
PARTITION_VERT_4. Skip RD cost is computed using above to
prune either of the partition structures.
Instruction Count BD-Rate Loss(%)
cpu Reduction(%) avg.psnr ovr.psnr ssim vmaf vmaf_neg
3 1.684 0.0581 0.0611 0.0549 0.0890 0.0525
4 1.073 0.0438 0.0513 0.0519 0.0633 0.0598
5 0.464 0.0232 0.0242 0.0285 0.0463 0.0434
This CL is bit-exact for speed=6.
STATS_CHANGED
Change-Id: Ib03a7bf9dc5ffe274836683b657734731494b942
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 19d42b9e1c..9455b4bc51 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -4048,12 +4048,80 @@ static void prune_4_partition_using_split_info(
}
}
+static void prune_part4_using_sms(AV1_COMP *const cpi, MACROBLOCK *x,
+ const PartitionSearchState *part_search_state,
+ SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ int *part4_search_allowed) {
+ if (!part4_search_allowed[HORZ4] || !part4_search_allowed[VERT4]) return;
+
+ unsigned int sms_h_part4_sse[4];
+ unsigned int sms_v_part4_sse[4];
+
+ const BLOCK_SIZE subsize_h4 = get_partition_subsize(bsize, PARTITION_HORZ_4);
+ const BLOCK_SIZE subsize_v4 = get_partition_subsize(bsize, PARTITION_VERT_4);
+
+ const int h_mi = mi_size_high[bsize];
+ const int w_mi = mi_size_wide[bsize];
+
+ const int ref = cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
+ if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref])) return;
+
+ int64_t part4_h_sse_sum = 0;
+ int64_t part4_v_sse_sum = 0;
+
+ // ---- HORZ_4 ----
+ for (int r_idx = 0; r_idx < SUB_PARTITIONS_PART4; r_idx++) {
+ unsigned int part_var;
+ const int sub_mi_row = mi_row + r_idx * h_mi / 4;
+ const int sub_mi_col = mi_col;
+
+ av1_simple_motion_search_sse_var(cpi, x, sub_mi_row, sub_mi_col, subsize_h4,
+ ref, sms_tree->start_mvs[ref],
+ /*num_planes=*/1, /*use_subpixel=*/1,
+ &sms_h_part4_sse[r_idx], &part_var);
+
+ (void)part_var;
+
+ part4_h_sse_sum += sms_h_part4_sse[r_idx];
+ }
+
+ // ---- VERT_4 ----
+ for (int r_idx = 0; r_idx < SUB_PARTITIONS_PART4; r_idx++) {
+ unsigned int part_var;
+ const int sub_mi_row = mi_row;
+ const int sub_mi_col = mi_col + r_idx * w_mi / 4;
+
+ av1_simple_motion_search_sse_var(cpi, x, sub_mi_row, sub_mi_col, subsize_v4,
+ ref, sms_tree->start_mvs[ref],
+ /*num_planes=*/1, /*use_subpixel=*/1,
+ &sms_v_part4_sse[r_idx], &part_var);
+ (void)part_var;
+
+ part4_v_sse_sum += sms_v_part4_sse[r_idx];
+ }
+
+ // ---- Skip RD calculation ----
+ const int64_t part4_h_rd =
+ RDCOST(x->rdmult, part_search_state->partition_cost[PARTITION_HORZ_4],
+ part4_h_sse_sum);
+
+ const int64_t part4_v_rd =
+ RDCOST(x->rdmult, part_search_state->partition_cost[PARTITION_VERT_4],
+ part4_v_sse_sum);
+
+ // ---- pruning ----
+ if (part4_h_rd > part4_v_rd) part4_search_allowed[HORZ4] = 0;
+
+ if (part4_v_rd > part4_h_rd) part4_search_allowed[VERT4] = 0;
+}
+
// Prune 4-way partition search.
static void prune_4_way_partition_search(
AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree,
- PartitionSearchState *part_search_state, RD_STATS *best_rdc,
- int pb_source_variance, int prune_ext_part_state,
- int part4_search_allowed[NUM_PART4_TYPES]) {
+ PartitionSearchState *part_search_state, SIMPLE_MOTION_DATA_TREE *sms_tree,
+ RD_STATS *best_rdc, int pb_source_variance, int prune_ext_part_state,
+ int mi_row, int mi_col, int part4_search_allowed[NUM_PART4_TYPES]) {
const PartitionBlkParams blk_params = part_search_state->part_blk_params;
const BLOCK_SIZE bsize = blk_params.bsize;
@@ -4132,6 +4200,15 @@ static void prune_4_way_partition_search(
// in the current block and sub-blocks in PARTITION_SPLIT.
prune_4_partition_using_split_info(cpi, x, part_search_state,
part4_search_allowed);
+
+ if (cpi->sf.part_sf.prune_h_or_v_4part_using_sms_info && partition4_allowed &&
+ best_rdc->rdcost != INT64_MAX &&
+ av1_is_whole_blk_in_frame(&part_search_state->part_blk_params,
+ &cpi->common.mi_params) &&
+ !frame_is_intra_only(&cpi->common)) {
+ prune_part4_using_sms(cpi, x, part_search_state, sms_tree, mi_row, mi_col,
+ bsize, part4_search_allowed);
+ }
}
// Set params needed for PARTITION_NONE search.
@@ -5834,8 +5911,9 @@ BEGIN_PARTITION_SEARCH:
// 4-way partitions search stage.
int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 };
// Prune 4-way partition search.
- prune_4_way_partition_search(cpi, x, pc_tree, &part_search_state, &best_rdc,
- pb_source_variance, prune_ext_part_state,
+ prune_4_way_partition_search(cpi, x, pc_tree, &part_search_state, sms_tree,
+ &best_rdc, pb_source_variance,
+ prune_ext_part_state, mi_row, mi_col,
part4_search_allowed);
#if CONFIG_COLLECT_COMPONENT_TIMING
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index abfa1a53f3..8888da6571 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1280,6 +1280,7 @@ static void set_good_speed_features_framesize_independent(
: (boosted ? SIMPLE_AGG_LVL3 : QIDX_BASED_AGG_LVL1);
sf->part_sf.prune_ext_part_using_split_info = 1;
sf->part_sf.simple_motion_search_rect_split = 1;
+ sf->part_sf.prune_h_or_v_4part_using_sms_info = true;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->mv_sf.search_method = DIAMOND;
@@ -2311,6 +2312,7 @@ static inline void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
part_sf->skip_non_sq_part_based_on_none = 0;
part_sf->disable_8x8_part_based_on_qidx = 0;
part_sf->split_partition_penalty_level = 0;
+ part_sf->prune_h_or_v_4part_using_sms_info = false;
}
static inline void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 5b1a2e8e1e..7c756295a1 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -855,6 +855,10 @@ typedef struct PARTITION_SPEED_FEATURES {
// Disables 8x8 and below partitions for low quantizers.
int disable_8x8_part_based_on_qidx;
+ // Disables either of PARTITION_HORZ_4 or PARTITION_VERT_4 using SSE from
+ // simple motion search.
+ bool prune_h_or_v_4part_using_sms_info;
+
// Decoder side speed feature to add penalty for use of smaller partitions.
// Takes values 0 - 2, 0 indicating no penalty and higher level indicating
// increased penalty.