Commit f1e7f4e023 for aom
commit f1e7f4e023d8dd0a60ee8bb26ebdb73f91d0a20e
Author: Deepa K G <deepa.kg@ittiam.com>
Date: Fri Jun 26 17:55:36 2026 +0530
Avoid SSE calculation for blocks outside frame boundary
When the frame dimension is not a multiple of 8, the
function `aom_sum_squares_2d_i16()` can be called with
width = 0 or height = 0. However, the SSE2 and ARM
optimizations of 'aom_sum_squares_2d_i16()' require
that the width and height be non-zero. Thus, in this
patch aom_sum_squares_2d_i16() is called only for
non-zero values of width and height.
Also, suggestions in
https://aomedia-review.googlesource.com/c/aom/+/213801
are taken
- Setting of 'visible_cols' and 'visible_rows' in
get_visible_dimensions() is corrected for odd frame
dimensions.
- Code documentation is improved.
The flag 'do_border_pad' is not yet enabled.
Bug: 527078408, 527242002
Change-Id: Ie5b79b5c7ce0ac94c5609b2f2b28fc82920a3c35
diff --git a/aom_ports/mem.h b/aom_ports/mem.h
index f37a8259cc..a7e0585a2c 100644
--- a/aom_ports/mem.h
+++ b/aom_ports/mem.h
@@ -72,8 +72,9 @@
#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
+/* This macro requires d > 0. */
#define DIVIDE_AND_ROUND_SIGNED(n, d) \
- ((((n) < 0) ^ ((d) < 0)) ? (((n) - (d) / 2) / (d)) : (((n) + (d) / 2) / (d)))
+ (((n) < 0) ? (((n) - (d) / 2) / (d)) : (((n) + (d) / 2) / (d)))
#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index ffda806a08..9e49fc8fa6 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1439,10 +1439,20 @@ typedef struct macroblock {
RD_STATS *rdcost;
#endif // CONFIG_PARTITION_SEARCH_ORDER
- /*! \brief Distance from bottom edge of the frame in pixels. */
+ /*! \brief Signed vertical distance, in pixels, from the bottom edge of the
+ * current prediction block to the bottom edge of the frame.
+ *
+ * The value may be negative when the prediction block extends beyond the
+ * bottom edge of the frame.
+ */
int pix_to_bottom_edge;
- /*! \brief Distance from right edge of the frame in pixels. */
+ /*! \brief Signed horizontal distance, in pixels, from the right edge of the
+ * current prediction block to the right edge of the frame.
+ *
+ * The value may be negative when the prediction block extends beyond the
+ * right edge of the frame.
+ */
int pix_to_right_edge;
} MACROBLOCK;
#undef SINGLE_REF_MODES
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 7cd3a0273c..a6d255dcd4 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -38,46 +38,42 @@
// Compute the average value of the wxh block.
static inline int16_t avg_wxh_block_c(int16_t *diff, ptrdiff_t diff_stride,
int w, int h) {
+ assert(w > 0 && h > 0);
int32_t sum = 0;
for (int row = 0; row < h; ++row) {
for (int col = 0; col < w; ++col) {
- sum += *(diff + row * diff_stride + col);
+ sum += diff[row * diff_stride + col];
}
}
- return (w * h > 0) ? (int16_t)(DIVIDE_AND_ROUND_SIGNED(sum, w * h)) : 0;
+ return (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, w * h);
}
// Compute the row average value of the wxh block.
static inline void avg_wxh_block_horiz_c(int16_t *diff, ptrdiff_t diff_stride,
int w, int h, int16_t *out) {
+ assert(w > 0 && h > 0);
for (int row = 0; row < h; ++row) {
int32_t sum = 0;
for (int col = 0; col < w; ++col) {
- sum += *(diff + row * diff_stride + col);
+ sum += diff[row * diff_stride + col];
}
- out[row] = (w > 0) ? (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, w) : 0;
+ out[row] = (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, w);
}
}
// Compute the column average value of the wxh block.
static inline void avg_wxh_block_vert_c(int16_t *diff, ptrdiff_t diff_stride,
int w, int h, int16_t *out) {
+ assert(w > 0 && h > 0);
for (int col = 0; col < w; ++col) {
int32_t sum = 0;
for (int row = 0; row < h; ++row) {
- sum += *(diff + row * diff_stride + col);
+ sum += diff[row * diff_stride + col];
}
- out[col] = (h > 0) ? (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, h) : 0;
+ out[col] = (int16_t)DIVIDE_AND_ROUND_SIGNED(sum, h);
}
}
-static inline void *aom_memset_int16(void *dest, int16_t val, size_t length) {
- size_t i;
- int16_t *dest16 = (int16_t *)dest;
- for (i = 0; i < length; i++) *dest16++ = val;
- return dest;
-}
-
// Fill the outside-frame part's residues with values derived from the in-frame
// part's residues.
static inline void fill_residue_outside_frame(
@@ -95,12 +91,11 @@ static inline void fill_residue_outside_frame(
// Fill the remaining parts of the block with the average value
const int right_pixels = tx_cols - visible_tx_cols;
for (int i = 0; i < tx_rows; ++i) {
- aom_memset_int16(diff + i * diff_stride + visible_tx_cols, avg,
- right_pixels);
+ aom_memset16(diff + i * diff_stride + visible_tx_cols, avg, right_pixels);
}
for (int i = visible_tx_rows; i < tx_rows; ++i) {
- aom_memset_int16(diff + i * diff_stride, avg, visible_tx_cols);
+ aom_memset16(diff + i * diff_stride, avg, visible_tx_cols);
}
} else if (htx_tab[tx_type] == IDTX_1D) {
if (visible_tx_rows < tx_rows) {
@@ -134,8 +129,8 @@ static inline void fill_residue_outside_frame(
visible_tx_rows, out);
for (int i = 0; i < visible_tx_rows; ++i) {
- aom_memset_int16(diff + i * diff_stride + visible_tx_cols, out[i],
- right_pixels);
+ aom_memset16(diff + i * diff_stride + visible_tx_cols, out[i],
+ right_pixels);
}
}
@@ -170,9 +165,9 @@ void av1_subtract_block(const MACROBLOCK *x, int rows, int cols, int16_t *diff,
if (!do_border_pad) return;
int visible_cols, visible_rows;
- const int is_border_block =
- get_visible_dimensions(x, plane, plane_bsize, blk_col, blk_row, cols,
- rows, &visible_cols, &visible_rows, true);
+ const int is_border_block = get_visible_dimensions(
+ x, plane, plane_bsize, blk_col, blk_row, cols, rows,
+ /*clip_dims=*/true, &visible_cols, &visible_rows);
if (is_border_block)
fill_residue_outside_frame(diff, diff_stride, cols, rows, visible_cols,
visible_rows, tx_type);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 9505d0af53..9aa8ca0f4a 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -4287,16 +4287,26 @@ static inline int get_mi_ext_idx(const int mi_row, const int mi_col,
return mi_ext_row * mbmi_ext_stride + mi_ext_col;
}
+// Computes the signed distances from the bottom and right edges of the current
+// prediction block to the corresponding edges of the frame.
static inline void set_pixels_to_frame_edge(MACROBLOCK *x, int bw, int bh,
int mi_col, int mi_row, int mi_cols,
int mi_rows, int frame_width,
int frame_height,
bool do_border_pad) {
- int total_frame_width = do_border_pad ? frame_width : (mi_cols * 4);
- int total_frame_height = do_border_pad ? frame_height : (mi_rows * 4);
-
- x->pix_to_bottom_edge = total_frame_height - ((mi_row + bh) << MI_SIZE_LOG2);
- x->pix_to_right_edge = total_frame_width - ((mi_col + bw) << MI_SIZE_LOG2);
+ // For do_border_pad = 1, compute distances using the actual frame
+ // dimensions.
+ // For do_border_pad = 0, compute distances using the frame dimensions
+ // aligned to a multiple of 8 pixels to match the dimensions represented
+ // by mi_cols and mi_rows, which are rounded up to multiples of 8 pixels.
+ int boundary_frame_width =
+ do_border_pad ? frame_width : (mi_cols << MI_SIZE_LOG2);
+ int boundary_frame_height =
+ do_border_pad ? frame_height : (mi_rows << MI_SIZE_LOG2);
+
+ x->pix_to_bottom_edge =
+ boundary_frame_height - ((mi_row + bh) << MI_SIZE_LOG2);
+ x->pix_to_right_edge = boundary_frame_width - ((mi_col + bw) << MI_SIZE_LOG2);
}
// Lighter version of set_offsets that only sets the mode info
diff --git a/av1/encoder/model_rd.h b/av1/encoder/model_rd.h
index 90d8429347..d792b8ea0d 100644
--- a/av1/encoder/model_rd.h
+++ b/av1/encoder/model_rd.h
@@ -118,7 +118,7 @@ static inline int64_t compute_sse_plane(const AV1_COMP *cpi, MACROBLOCK *x,
const int block_height = block_size_high[plane_bsize];
get_visible_dimensions(x, plane, plane_bsize, 0, 0, block_width, block_height,
- &bw, &bh, true);
+ /*clip_dims=*/true, &bw, &bh);
int64_t sse = pixel_dist_visible_only(
cpi, x, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
@@ -280,7 +280,7 @@ static inline void model_rd_for_sb_with_curvfit(
const int block_height = block_size_high[plane_bsize];
get_visible_dimensions(x, plane, plane_bsize, 0, 0, block_width,
- block_height, &bw, &bh, true);
+ block_height, /*clip_dims=*/true, &bw, &bh);
sse = pixel_dist_visible_only(cpi, x, p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, plane_bsize,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 97174d75da..a11d58e4d0 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -883,8 +883,8 @@ static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
const int block_width = block_size_wide[bs];
const int block_height = block_size_high[bs];
- get_visible_dimensions(x, plane, bs, 0, 0, block_width, block_height, &bw,
- &bh, cpi->do_border_pad);
+ get_visible_dimensions(x, plane, bs, 0, 0, block_width, block_height,
+ cpi->do_border_pad, &bw, &bh);
sse = pixel_dist_visible_only(cpi, x, p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, bs, block_height,
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index 9e93fa246b..1c50cb4b3d 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -339,13 +339,31 @@ static inline void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
if (width) *width = txb_width;
}
+/*!
+ * \brief Computes the effective dimensions of a block.
+ *
+ * \param[in] x Pointer to structure holding the data for the
+ current encoding macroblock
+ * \param[in] plane The index of the current plane
+ * \param[in] plane_bsize Block size for the current plane
+ * \param[in] blk_col Column offset of the transform block, in MI units
+ * \param[in] blk_row Row offset of the transform block, in MI units
+ * \param[in] cols Transform block width, in pixels
+ * \param[in] rows Transform block height, in pixels
+ * \param[in] clip_dims If false, returns the original block dimensions
+ * If true, clips the block dimensions so they lie
+ * within the valid frame extent
+ * \param[out] visible_cols Pointer to the effective block width, in pixels
+ * \param[out] visible_rows Pointer to the effective block height, in pixels
+ *
+ * \return 1 if the block dimensions were clipped; otherwise 0.
+ */
static inline int get_visible_dimensions(const MACROBLOCK *x, int plane,
BLOCK_SIZE plane_bsize, int blk_col,
int blk_row, int cols, int rows,
- int *visible_cols, int *visible_rows,
- bool use_crop_dim) {
- if ((x->pix_to_bottom_edge >= 0 && x->pix_to_right_edge >= 0) ||
- !use_crop_dim) {
+ bool clip_dims, int *visible_cols,
+ int *visible_rows) {
+ if ((x->pix_to_bottom_edge >= 0 && x->pix_to_right_edge >= 0) || !clip_dims) {
if (visible_cols != NULL && visible_rows != NULL) {
*visible_rows = rows;
*visible_cols = cols;
@@ -361,7 +379,8 @@ static inline int get_visible_dimensions(const MACROBLOCK *x, int plane,
} else {
const int block_height = block_size_high[plane_bsize];
const int block_rows =
- (x->pix_to_bottom_edge >> pd->subsampling_y) + block_height;
+ ROUND_POWER_OF_TWO_SIGNED(x->pix_to_bottom_edge, pd->subsampling_y) +
+ block_height;
valid_rows = clamp(block_rows - (blk_row << MI_SIZE_LOG2), 0, rows);
}
@@ -370,7 +389,8 @@ static inline int get_visible_dimensions(const MACROBLOCK *x, int plane,
} else {
const int block_width = block_size_wide[plane_bsize];
const int block_cols =
- (x->pix_to_right_edge >> pd->subsampling_x) + block_width;
+ ROUND_POWER_OF_TWO_SIGNED(x->pix_to_right_edge, pd->subsampling_x) +
+ block_width;
valid_cols = clamp(block_cols - (blk_col << MI_SIZE_LOG2), 0, cols);
}
if (visible_cols != NULL && visible_rows != NULL) {
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index df78888ea0..2fe835b891 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -130,19 +130,22 @@ int64_t av1_pixel_diff_dist(const MACROBLOCK *x, int plane, int blk_row,
const int txb_cols = block_size_wide[tx_bsize];
const int txb_rows = block_size_high[tx_bsize];
get_visible_dimensions(x, plane, plane_bsize, blk_col, blk_row, txb_cols,
- txb_rows, &visible_cols, &visible_rows, true);
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *diff = x->plane[plane].src_diff;
+ txb_rows, /*clip_dims=*/true, &visible_cols,
+ &visible_rows);
- diff += ((blk_row * diff_stride + blk_col) << MI_SIZE_LOG2);
- uint64_t sse =
- aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
- if (block_mse_q8 != NULL) {
- if (visible_cols > 0 && visible_rows > 0)
+ uint64_t sse = 0;
+ if (visible_cols > 0 && visible_rows > 0) {
+ const int diff_stride = block_size_wide[plane_bsize];
+ const int16_t *diff = x->plane[plane].src_diff;
+
+ diff += ((blk_row * diff_stride + blk_col) << MI_SIZE_LOG2);
+ sse = aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
+ if (block_mse_q8 != NULL) {
*block_mse_q8 =
(unsigned int)((256 * sse) / (visible_cols * visible_rows));
- else
- *block_mse_q8 = 0;
+ }
+ } else {
+ if (block_mse_q8 != NULL) *block_mse_q8 = 0;
}
return sse;
}
@@ -157,15 +160,18 @@ static inline int64_t pixel_diff_stats(
const int txb_cols = block_size_wide[tx_bsize];
const int txb_rows = block_size_high[tx_bsize];
get_visible_dimensions(x, plane, plane_bsize, blk_col, blk_row, txb_cols,
- txb_rows, &visible_cols, &visible_rows, true);
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *diff = x->plane[plane].src_diff;
+ txb_rows, /*clip_dims=*/true, &visible_cols,
+ &visible_rows);
- diff += ((blk_row * diff_stride + blk_col) << MI_SIZE_LOG2);
uint64_t sse = 0;
- int sum = 0;
- sse = aom_sum_sse_2d_i16(diff, diff_stride, visible_cols, visible_rows, &sum);
if (visible_cols > 0 && visible_rows > 0) {
+ const int diff_stride = block_size_wide[plane_bsize];
+ const int16_t *diff = x->plane[plane].src_diff;
+
+ diff += ((blk_row * diff_stride + blk_col) << MI_SIZE_LOG2);
+ int sum = 0;
+ sse =
+ aom_sum_sse_2d_i16(diff, diff_stride, visible_cols, visible_rows, &sum);
double norm_factor = 1.0 / (visible_cols * visible_rows);
int sign_sum = sum > 0 ? 1 : -1;
// Conversion to transform domain
@@ -984,7 +990,8 @@ static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
txb_rows = block_size_high[tx_bsize];
get_visible_dimensions(x, plane, plane_bsize, blk_col, blk_row, txb_cols,
- txb_rows, &visible_cols, &visible_rows, true);
+ txb_rows, /*clip_dims=*/true, &visible_cols,
+ &visible_rows);
assert(visible_rows > 0);
assert(visible_cols > 0);
@@ -2099,8 +2106,9 @@ static void search_tx_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
int is_border_block = 0;
if (cpi->do_border_pad) {
- is_border_block = get_visible_dimensions(
- x, plane, plane_bsize, blk_col, blk_row, txw, txh, NULL, NULL, true);
+ is_border_block =
+ get_visible_dimensions(x, plane, plane_bsize, blk_col, blk_row, txw,
+ txh, /*clip_dims=*/true, NULL, NULL);
if (is_border_block)
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size,
best_tx_type, cpi->do_border_pad);