Commit 6319130bae for aom

commit 6319130bae5075f25a821f59df6cdb4e2c6a6b2c
Author: Li Zhang <li.zhang2@arm.com>
Date:   Mon Feb 23 11:46:33 2026 +0100

    Make direct calls to Neon variance paths in Neon subpel variance

    Now that we have Neon DotProd paths for subpel variance as well as
    variance, we no longer need to have an indirect call to the variance
    function in Armv8.0 Neon subpel variance paths. Make a direct call to
    the Neon variance path in these Neon subpel functions.

    This is a port from SVT-AV1:
    https://gitlab.com/AOMediaCodec/SVT-AV1/-/merge_requests/2608

    Originally authored by: Jonathan Wright <Jonathan.Wright@arm.com>

    Change-Id: I16e2541bc293708a342c84efe95b47b086725b11

diff --git a/aom_dsp/arm/subpel_variance_neon.c b/aom_dsp/arm/subpel_variance_neon.c
index 2ba6d386b9..4e74737156 100644
--- a/aom_dsp/arm/subpel_variance_neon.c
+++ b/aom_dsp/arm/subpel_variance_neon.c
@@ -30,61 +30,62 @@
     var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
                                 xoffset);                                \
     var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);           \
-    return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
+    return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);  \
   }

-#define SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(w, h, padding)                  \
-  unsigned int aom_sub_pixel_variance##w##x##h##_neon(                       \
-      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
-      const uint8_t *ref, int ref_stride, unsigned int *sse) {               \
-    if (xoffset == 0) {                                                      \
-      if (yoffset == 0) {                                                    \
-        return aom_variance##w##x##h(src, src_stride, ref, ref_stride, sse); \
-      } else if (yoffset == 4) {                                             \
-        uint8_t tmp[w * h];                                                  \
-        var_filter_block2d_avg(src, tmp, src_stride, src_stride, w, h);      \
-        return aom_variance##w##x##h(tmp, w, ref, ref_stride, sse);          \
-      } else {                                                               \
-        uint8_t tmp[w * h];                                                  \
-        var_filter_block2d_bil_w##w(src, tmp, src_stride, src_stride, h,     \
-                                    yoffset);                                \
-        return aom_variance##w##x##h(tmp, w, ref, ref_stride, sse);          \
-      }                                                                      \
-    } else if (xoffset == 4) {                                               \
-      uint8_t tmp0[w * (h + padding)];                                       \
-      if (yoffset == 0) {                                                    \
-        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, h);              \
-        return aom_variance##w##x##h(tmp0, w, ref, ref_stride, sse);         \
-      } else if (yoffset == 4) {                                             \
-        uint8_t tmp1[w * (h + padding)];                                     \
-        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding));  \
-        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                      \
-        return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
-      } else {                                                               \
-        uint8_t tmp1[w * (h + padding)];                                     \
-        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding));  \
-        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);           \
-        return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
-      }                                                                      \
-    } else {                                                                 \
-      uint8_t tmp0[w * (h + padding)];                                       \
-      if (yoffset == 0) {                                                    \
-        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, h, xoffset);   \
-        return aom_variance##w##x##h(tmp0, w, ref, ref_stride, sse);         \
-      } else if (yoffset == 4) {                                             \
-        uint8_t tmp1[w * h];                                                 \
-        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
-                                    xoffset);                                \
-        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                      \
-        return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
-      } else {                                                               \
-        uint8_t tmp1[w * h];                                                 \
-        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
-                                    xoffset);                                \
-        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);           \
-        return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
-      }                                                                      \
-    }                                                                        \
+#define SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(w, h, padding)                   \
+  unsigned int aom_sub_pixel_variance##w##x##h##_neon(                        \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
+      const uint8_t *ref, int ref_stride, unsigned int *sse) {                \
+    if (xoffset == 0) {                                                       \
+      if (yoffset == 0) {                                                     \
+        return aom_variance##w##x##h##_neon(src, src_stride, ref, ref_stride, \
+                                            sse);                             \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp[w * h];                                                   \
+        var_filter_block2d_avg(src, tmp, src_stride, src_stride, w, h);       \
+        return aom_variance##w##x##h##_neon(tmp, w, ref, ref_stride, sse);    \
+      } else {                                                                \
+        uint8_t tmp[w * h];                                                   \
+        var_filter_block2d_bil_w##w(src, tmp, src_stride, src_stride, h,      \
+                                    yoffset);                                 \
+        return aom_variance##w##x##h##_neon(tmp, w, ref, ref_stride, sse);    \
+      }                                                                       \
+    } else if (xoffset == 4) {                                                \
+      uint8_t tmp0[w * (h + padding)];                                        \
+      if (yoffset == 0) {                                                     \
+        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, h);               \
+        return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse);   \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp1[w * (h + padding)];                                      \
+        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding));   \
+        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                       \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      } else {                                                                \
+        uint8_t tmp1[w * (h + padding)];                                      \
+        var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding));   \
+        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);            \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      }                                                                       \
+    } else {                                                                  \
+      uint8_t tmp0[w * (h + padding)];                                        \
+      if (yoffset == 0) {                                                     \
+        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, h, xoffset);    \
+        return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse);   \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp1[w * h];                                                  \
+        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding),  \
+                                    xoffset);                                 \
+        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                       \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      } else {                                                                \
+        uint8_t tmp1[w * h];                                                  \
+        var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding),  \
+                                    xoffset);                                 \
+        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);            \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      }                                                                       \
+    }                                                                         \
   }

 SUBPEL_VARIANCE_WXH_NEON(4, 4, 2)