Commit ea448bbc12 for aom
commit ea448bbc12c338ef92bfd7d9d18b357151be0bee
Author: James Zern <jzern@google.com>
Date: Fri Apr 24 09:36:42 2026 -0700
Revert "Add SSE4.1 optimization for interp_cubic()"
This reverts commit 145a5f09dcb5e26bc9f40a00f7ef3559bd9c5159.
Reason for revert: test fails in x86 build
Bug: 506138571
Change-Id: I965ecfb7394b4601315abe20c4935f0b3b310739
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 4b92eb4add..5e730f9be5 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -363,7 +363,6 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_1
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_sse4.c"
"${AOM_ROOT}/av1/encoder/x86/encodetxb_sse4.c"
"${AOM_ROOT}/av1/encoder/x86/highbd_fwd_txfm_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/model_rd_sse4.c"
"${AOM_ROOT}/av1/encoder/x86/rdopt_sse4.c"
"${AOM_ROOT}/av1/encoder/x86/pickrst_sse4.c")
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 2e7acf36de..78e64294d1 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -460,9 +460,6 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void av1_get_horver_correlation_full/, "const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr";
specialize qw/av1_get_horver_correlation_full sse4_1 avx2 neon/;
- add_proto qw/void av1_interp_cubic_rate_dist/, "const double *p1, const double *p2, double x, double * const rate_f, double * const distbysse_f";
- specialize qw/av1_interp_cubic_rate_dist sse4_1/;
-
add_proto qw/void av1_nn_predict/, "const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output";
add_proto qw/void av1_nn_fast_softmax_16/, "const float *input_nodes, float *output";
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index b5f6b3a725..e9f6e54f03 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -949,13 +949,6 @@ static double interp_cubic(const double *p, double x) {
x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
}
-void av1_interp_cubic_rate_dist_c(const double *p1, const double *p2, double x,
- double *const rate_f,
- double *const distbysse_f) {
- *rate_f = interp_cubic(p1, x);
- *distbysse_f = interp_cubic(p2, x);
-}
-
/*
static double interp_bicubic(const double *p, int p_stride, double x,
double y) {
@@ -1087,8 +1080,9 @@ void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
assert(xi > 0);
const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
+ *rate_f = interp_cubic(prate, xo);
const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
- av1_interp_cubic_rate_dist(prate, pdist, xo, rate_f, distbysse_f);
+ *distbysse_f = interp_cubic(pdist, xo);
}
static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
diff --git a/av1/encoder/x86/model_rd_sse4.c b/av1/encoder/x86/model_rd_sse4.c
deleted file mode 100644
index 48bc580624..0000000000
--- a/av1/encoder/x86/model_rd_sse4.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2026, Alliance for Open Media. All rights reserved.
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-void av1_interp_cubic_rate_dist_sse4_1(const double *p1, const double *p2,
- double x, double *const rate_f,
- double *const distbysse_f) {
- const __m128d half = _mm_set1_pd(0.5);
- const __m128d two = _mm_set1_pd(2.0);
- const __m128d three = _mm_set1_pd(3.0);
- const __m128d four = _mm_set1_pd(4.0);
- const __m128d five = _mm_set1_pd(5.0);
-
- const __m128d reg_x = _mm_set1_pd(x);
- const __m128d reg_p0 = _mm_set_pd(p2[0], p1[0]);
- const __m128d reg_p1 = _mm_set_pd(p2[1], p1[1]);
- const __m128d reg_p2 = _mm_set_pd(p2[2], p1[2]);
- const __m128d reg_p3 = _mm_set_pd(p2[3], p1[3]);
-
- // To ensure that results are bit-identical to the C code, we need to perform
- // exactly the same sequence of operations here as in the C code.
- // reg_res_0 = x * (3.0 * (p[1] - p[2]) + p[3] - p[0])
- __m128d reg_res_0 = _mm_sub_pd(reg_p1, reg_p2);
- reg_res_0 = _mm_mul_pd(three, reg_res_0);
- reg_res_0 = _mm_add_pd(reg_res_0, reg_p3);
- reg_res_0 = _mm_sub_pd(reg_res_0, reg_p0);
- reg_res_0 = _mm_mul_pd(reg_x, reg_res_0);
-
- // reg_res_1 = 2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2]- p[3]
- const __m128d regp0_x_2 = _mm_mul_pd(two, reg_p0);
- const __m128d regp1_x_5 = _mm_mul_pd(five, reg_p1);
- const __m128d regp2_x_4 = _mm_mul_pd(four, reg_p2);
- __m128d reg_res_1 = _mm_sub_pd(regp0_x_2, regp1_x_5);
- reg_res_1 = _mm_add_pd(reg_res_1, regp2_x_4);
- reg_res_1 = _mm_sub_pd(reg_res_1, reg_p3);
-
- // reg_res_2 = x * (reg_res_1 + reg_res_0)
- __m128d reg_res_2 = _mm_add_pd(reg_res_1, reg_res_0);
- reg_res_2 = _mm_mul_pd(reg_x, reg_res_2);
-
- // reg_res_3 = p[2] - p[0] + reg_res_2
- __m128d reg_res_3 = _mm_sub_pd(reg_p2, reg_p0);
- reg_res_3 = _mm_add_pd(reg_res_3, reg_res_2);
-
- // reg_res_4 = p[1] + 0.5 * x * reg_res_3
- __m128d reg_res_4 = _mm_mul_pd(_mm_mul_pd(half, reg_x), reg_res_3);
- reg_res_4 = _mm_add_pd(reg_p1, reg_res_4);
-
- double result[2];
- _mm_storeu_pd(result, reg_res_4);
- *rate_f = result[0];
- *distbysse_f = result[1];
-}
diff --git a/test/model_rd_test.cc b/test/model_rd_test.cc
deleted file mode 100644
index 910f20abda..0000000000
--- a/test/model_rd_test.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2026, Alliance for Open Media. All rights reserved.
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdlib>
-
-#include "gtest/gtest.h"
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-using InterpCubicRateDistFunc = void (*)(const double *p1, const double *p2,
- double x, double *const rate_f,
- double *const distbysse_f);
-
-using InterpCubicTestParam = std::tuple<const InterpCubicRateDistFunc>;
-
-class InterpCubicTest : public ::testing::TestWithParam<InterpCubicTestParam> {
- public:
- double generate_random_double(double min, double max) {
- return min + (static_cast<double>(rnd_.Rand31()) / ((1U << 31) - 1)) *
- (max - min);
- }
- void SetUp() override { target_func_ = GET_PARAM(0); }
- void TearDown() override {}
- void CheckOutput();
- void SpeedTest();
-
- protected:
- InterpCubicRateDistFunc target_func_;
-
- private:
- libaom_test::ACMRandom rnd_;
-};
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InterpCubicTest);
-
-void InterpCubicTest::CheckOutput() {
- double p1[4], p2[4];
- double rate_f_ref, rate_f_mod, distbysse_f_ref, distbysse_f_mod;
- const int knum_iter = 10000;
- for (int iter = 0; iter < knum_iter; iter++) {
- for (int i = 0; i < 4; i++) {
- p1[i] = generate_random_double(0.0000, 4096.000000);
- p2[i] = generate_random_double(0.0000, 16.0000);
- }
- double x = generate_random_double(0.0000, 1.0000);
-
- av1_interp_cubic_rate_dist_c(p1, p2, x, &rate_f_ref, &distbysse_f_ref);
- target_func_(p1, p2, x, &rate_f_mod, &distbysse_f_mod);
- EXPECT_EQ(rate_f_ref, rate_f_mod) << "Error: rate_f value mismatch";
- EXPECT_EQ(distbysse_f_ref, distbysse_f_mod)
- << "Error: distbysse_f value mismatch";
- }
-}
-
-void InterpCubicTest::SpeedTest() {
- double p1[4], p2[4];
- double rate_f_ref, rate_f_mod, distbysse_f_ref, distbysse_f_mod;
-
- for (int i = 0; i < 4; i++) {
- p1[i] = generate_random_double(0.0000, 4096.0000);
- p2[i] = generate_random_double(0.0000, 16.0000);
- }
- double x = generate_random_double(0.0000, 1.0000);
-
- const int num_iter = 100000000;
-
- aom_usec_timer ref_timer, test_timer;
- aom_usec_timer_start(&ref_timer);
- for (int iter = 0; iter < num_iter; iter++) {
- av1_interp_cubic_rate_dist_c(p1, p2, x, &rate_f_ref, &distbysse_f_ref);
- }
- aom_usec_timer_mark(&ref_timer);
- const int elapsed_time_c =
- static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
-
- aom_usec_timer_start(&test_timer);
- for (int iter = 0; iter < num_iter; iter++) {
- target_func_(p1, p2, x, &rate_f_mod, &distbysse_f_mod);
- }
- aom_usec_timer_mark(&test_timer);
- const int elapsed_time_simd =
- static_cast<int>(aom_usec_timer_elapsed(&test_timer));
-
- printf(
- " c_time=%d \t simd_time=%d \t "
- "Scaling=%lf \n",
- elapsed_time_c, elapsed_time_simd,
- (static_cast<double>(elapsed_time_c) / elapsed_time_simd));
-}
-
-TEST_P(InterpCubicTest, CheckOutput) { CheckOutput(); }
-
-TEST_P(InterpCubicTest, DISABLED_Speed) { SpeedTest(); }
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_SUITE_P(SSE4_1, InterpCubicTest,
- ::testing::Values(av1_interp_cubic_rate_dist_sse4_1));
-#endif // HAVE_SSE4_1
-
-} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 84ba9182f5..6224c54e6e 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -232,7 +232,6 @@ if(NOT BUILD_SHARED_LIBS)
"${AOM_ROOT}/test/masked_variance_test.cc"
"${AOM_ROOT}/test/metadata_test.cc"
"${AOM_ROOT}/test/minmax_test.cc"
- "${AOM_ROOT}/test/model_rd_test.cc"
"${AOM_ROOT}/test/motion_vector_test.cc"
"${AOM_ROOT}/test/mv_cost_test.cc"
"${AOM_ROOT}/test/obmc_sad_test.cc"