Commit f0c69f3aa8 for aom
commit f0c69f3aa8d110615777123704c6da6a3d56f2c3
Author: James Zern <jzern@google.com>
Date: Thu Jun 18 14:11:12 2026 -0700
highbd_inv_txfm_*: fix bit shift static analysis warnings
Add asserts before using `log_range` and `log_range_out` in
highbd_inv_txfm_neon.c, highbd_inv_txfm_avx2.c and
highbd_inv_txfm_sse4.c. With clang-19 fixes warnings of the form:
```
The result of left shift is undefined because the right operand is not
smaller than 32, the capacity of 'int'
```
Bug: 474642915
Change-Id: I1c0e1f5f2bcd02f5718f6ce7bc8981a3cb1a036d
diff --git a/av1/common/arm/highbd_inv_txfm_neon.c b/av1/common/arm/highbd_inv_txfm_neon.c
index 70f65101c5..f3914da364 100644
--- a/av1/common/arm/highbd_inv_txfm_neon.c
+++ b/av1/common/arm/highbd_inv_txfm_neon.c
@@ -507,7 +507,7 @@ static inline void idct32_stage9_neon(int32x4_t *bf1, int32x4_t *out,
addsub_neon(bf1[15], bf1[16], out + 15, out + 16, clamp_lo, clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
for (int i = 0; i < 32; i += 8) {
@@ -540,7 +540,7 @@ static void neg_shift_neon(const int32x4_t *in0, const int32x4_t *in1,
static void idct4x4_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ int log_range = get_log_range(bd, do_cols);
int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t rnding = vdupq_n_s32(1 << (bit - 1));
@@ -577,7 +577,7 @@ static void idct4x4_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
addsub_neon(v1, v2, out + 1, out + 2, &clamp_lo, &clamp_hi);
if (!do_cols) {
- log_range = AOMMAX(16, bd + 6);
+ log_range = get_log_range_out(bd);
clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -734,7 +734,7 @@ static void iadst4x4_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
out[3] = u3;
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -817,7 +817,7 @@ static void iidentity4_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
#endif
}
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -971,7 +971,7 @@ static void load_buffer_8x8(const int32_t *coeff, int32x4_t *in) {
static void idct8x8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t u0, u1, u2, u3, u4, u5, u6, u7;
@@ -1066,7 +1066,7 @@ static void idct8x8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -1078,7 +1078,7 @@ static void iadst8x8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const int32x4_t kZero = vdupq_n_s32(0);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t u[8], v[8], x;
@@ -1188,7 +1188,7 @@ static void iadst8x8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
out[12] = u[5];
out[14] = vsubq_s32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -1310,7 +1310,7 @@ static void iadst8x8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
out[13] = u[5];
out[15] = vsubq_s32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -1339,7 +1339,7 @@ static void iidentity8_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
out[7] = vaddq_s32(in[7], in[7]);
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -1497,7 +1497,7 @@ void av1_inv_txfm2d_add_8x8_neon(const int32_t *input, uint16_t *output,
static void idct8x8_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t x;
@@ -1509,7 +1509,7 @@ static void idct8x8_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
// stage 4-5
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = vdupq_n_s32(-(1 << (log_range_out - 1)));
clamp_hi = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
@@ -1533,7 +1533,7 @@ static void idct8x8_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct8x8_new_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t u0, u1, u2, u3, u4, u5, u6, u7;
@@ -1607,7 +1607,7 @@ static void idct8x8_new_neon(int32x4_t *in, int32x4_t *out, int bit,
addsub_neon(u3, u4, out + 3, out + 4, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -1669,7 +1669,7 @@ static void iadst8x8_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
out[6] = u[5];
out[7] = vnegq_s32(u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -1689,7 +1689,7 @@ static void iadst8x8_new_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
// const int32x4_t rnding = vdupq_n_s32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t u[8], v[8], x;
@@ -1801,7 +1801,7 @@ static void iadst8x8_new_neon(int32x4_t *in, int32x4_t *out, int bit,
out[6] = u[5];
out[7] = vnegq_s32(u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -1820,7 +1820,7 @@ static void iadst8x8_new_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct16x16_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ int log_range = get_log_range(bd, do_cols);
int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t v_bit = vdupq_n_s32(-bit);
@@ -1831,7 +1831,7 @@ static void idct16x16_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
// stage 5-7
if (!do_cols) {
- log_range = AOMMAX(16, bd + 6);
+ log_range = get_log_range_out(bd);
clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
if (out_shift != 0) {
@@ -1864,7 +1864,7 @@ static void idct16x16_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct16x16_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t v_bit = vdupq_n_s32(-bit);
@@ -1975,7 +1975,7 @@ static void idct16x16_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
addsub_neon(u[7], u[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -2102,7 +2102,7 @@ static void iadst16x16_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
out[14] = v[9];
out[15] = vnegq_s32(v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -2129,7 +2129,7 @@ static void iadst16x16_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
static void iadst16x16_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t zero = vdupq_n_s32(0);
@@ -2332,7 +2332,7 @@ static void iadst16x16_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
out[14] = u[9];
out[15] = vsubq_s32(zero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -2359,7 +2359,7 @@ static void iadst16x16_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct16x16_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t u[16], v[16], x, y;
@@ -2515,7 +2515,7 @@ static void idct16x16_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
addsub_neon(v[7], v[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out =
vdupq_n_s32((1 << (log_range_out - 1)) - 1);
@@ -2528,7 +2528,7 @@ static void idct16x16_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
static void iadst16x16_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t zero = vdupq_n_s32(0);
@@ -2782,7 +2782,7 @@ static void iadst16x16_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
out[14] = v[9];
out[15] = vsubq_s32(zero, v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
const int32x4_t v_shift = vdupq_n_s32(-out_shift);
@@ -2833,7 +2833,7 @@ static void iidentity16_neon(int32x4_t *in, int32x4_t *out, int bit,
}
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -3007,7 +3007,7 @@ static inline void idct64_stage11_neon(int32x4_t *u, int32x4_t *out,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
for (int i = 0; i < 64; i += 4) {
@@ -3020,7 +3020,7 @@ static inline void idct64_stage11_neon(int32x4_t *u, int32x4_t *out,
static void idct64x64_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
@@ -3042,7 +3042,7 @@ static void idct64x64_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
// stage 10
// stage 11
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = vdupq_n_s32(-(1 << (log_range_out - 1)));
clamp_hi = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
if (out_shift != 0) {
@@ -3124,7 +3124,7 @@ static void idct64x64_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
int i, j;
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t v_bit = vdupq_n_s32(-bit);
@@ -3352,7 +3352,7 @@ static void idct64x64_low16_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
int i, j;
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
const int32x4_t v_bit = vdupq_n_s32(-bit);
@@ -3661,7 +3661,7 @@ static void idct64x64_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
const int32x4_t v_bit = vdupq_n_s32(-bit);
const int32x4_t rnding = vdupq_n_s32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
@@ -4139,7 +4139,7 @@ static void idct64x64_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out =
vdupq_n_s32((1 << (log_range_out - 1)) - 1);
@@ -4155,7 +4155,7 @@ static void idct64x64_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
static void idct32x32_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t bf1;
@@ -4172,7 +4172,7 @@ static void idct32x32_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
bf1 = vmaxq_s32(bf1, clamp_lo);
bf1 = vminq_s32(bf1, clamp_hi);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = vdupq_n_s32(-(1 << (log_range_out - 1)));
clamp_hi = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
if (out_shift != 0) {
@@ -4189,7 +4189,7 @@ static void idct32x32_low1_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct32x32_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t bf1[32];
@@ -4269,7 +4269,7 @@ static void idct32x32_low8_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct32x32_low16_neon(int32x4_t *in, int32x4_t *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t bf1[32];
@@ -4373,7 +4373,7 @@ static void idct32x32_low16_neon(int32x4_t *in, int32x4_t *out, int bit,
static void idct32x32_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1)));
const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1);
int32x4_t bf1[32], bf0[32];
@@ -4672,7 +4672,7 @@ static void idct32x32_neon(int32x4_t *in, int32x4_t *out, int bit, int do_cols,
addsub_neon(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -4704,7 +4704,7 @@ static void iidentity32_neon(int32x4_t *in, int32x4_t *out, int bit,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1)));
const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
diff --git a/av1/common/av1_txfm.h b/av1/common/av1_txfm.h
index 8603c3dd0b..bfe6a76a32 100644
--- a/av1/common/av1_txfm.h
+++ b/av1/common/av1_txfm.h
@@ -149,6 +149,24 @@ static inline uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
return clip_pixel_highbd(dest + (int)trans, bd);
}
+#if HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON
+static inline int get_log_range_out(int bd) {
+ const int log_range_out = AOMMAX(16, bd + 6);
+ // bd is limited by the bitstream to 12. This assert is to satisfy static
+ // analyzers that may assume `log_range_out - 1` is greater than 31.
+ assert(log_range_out <= 18);
+ return log_range_out;
+}
+
+static inline int get_log_range(int bd, int do_cols) {
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ // bd is limited by the bitstream to 12. This assert is to satisfy static
+ // analyzers that may assume `log_range - 1` is greater than 31.
+ assert(log_range <= 20);
+ return log_range;
+}
+#endif // HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON
+
typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
diff --git a/av1/common/x86/highbd_inv_txfm_avx2.c b/av1/common/x86/highbd_inv_txfm_avx2.c
index e8595b73c3..ea9d1c3031 100644
--- a/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/av1/common/x86/highbd_inv_txfm_avx2.c
@@ -429,7 +429,7 @@ static inline void idct32_stage9_avx2(__m256i *bf1, __m256i *out,
addsub_avx2(bf1[14], bf1[17], out + 14, out + 17, clamp_lo, clamp_hi);
addsub_avx2(bf1[15], bf1[16], out + 15, out + 16, clamp_lo, clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
_mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -444,7 +444,7 @@ static void idct32_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const int32_t *cospi = cospi_arr(bit);
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
__m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i x;
@@ -463,7 +463,7 @@ static void idct32_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
// stage 8
// stage 9
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
__m256i offset = _mm256_set1_epi32((1 << out_shift) >> 1);
clamp_lo = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -536,7 +536,7 @@ static void idct32_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i bf1[32];
@@ -661,7 +661,7 @@ static void idct32_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i bf1[32];
@@ -823,7 +823,7 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i bf1[32], bf0[32];
@@ -1140,7 +1140,7 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
addsub_avx2(bf0[14], bf0[17], out + 14, out + 17, &clamp_lo, &clamp_hi);
addsub_avx2(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -1156,7 +1156,7 @@ static void idct16_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const int32_t *cospi = cospi_arr(bit);
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
__m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
@@ -1174,7 +1174,7 @@ static void idct16_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
// stage 6
// stage 7
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
__m256i offset = _mm256_set1_epi32((1 << out_shift) >> 1);
@@ -1223,7 +1223,7 @@ static void idct16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospim36 = _mm256_set1_epi32(-cospi[36]);
const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u[16], x, y;
@@ -1338,7 +1338,7 @@ static void idct16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
addsub_avx2(u[7], u[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -1376,7 +1376,7 @@ static void idct16_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u[16], v[16], x, y;
@@ -1521,7 +1521,7 @@ static void idct16_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
addsub_avx2(v[7], v[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -1684,7 +1684,7 @@ static void iadst16_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
out[14] = v[9];
out[15] = _mm256_sub_epi32(_mm256_setzero_si256(), v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -1740,7 +1740,7 @@ static void iadst16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u[16], x, y;
@@ -2005,7 +2005,7 @@ static void iadst16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
out[14] = u[9];
out[15] = _mm256_sub_epi32(_mm256_setzero_si256(), u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -2061,7 +2061,7 @@ static void iadst16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u[16], v[16], x, y;
@@ -2385,7 +2385,7 @@ static void iadst16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
out[14] = v[9];
out[15] = _mm256_sub_epi32(_mm256_setzero_si256(), v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
@@ -2415,7 +2415,7 @@ static void idct8x8_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const int32_t *cospi = cospi_arr(bit);
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
__m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i x;
@@ -2431,7 +2431,7 @@ static void idct8x8_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
// stage 4
// stage 5
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
__m256i offset = _mm256_set1_epi32((1 << out_shift) >> 1);
clamp_lo = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2463,7 +2463,7 @@ static void idct8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u0, u1, u2, u3, u4, u5, u6, u7;
@@ -2550,7 +2550,7 @@ static void idct8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
addsub_avx2(u3, u4, out + 3, out + 4, &clamp_lo, &clamp_hi);
// stage 5
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
_mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2634,7 +2634,7 @@ static void iadst8x8_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
out[6] = u[5];
out[7] = _mm256_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
_mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2667,7 +2667,7 @@ static void iadst8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
const __m256i kZero = _mm256_setzero_si256();
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i u[8], v[8], x;
@@ -2803,7 +2803,7 @@ static void iadst8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
out[6] = u[5];
out[7] = _mm256_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
_mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2942,7 +2942,7 @@ static inline void idct64_stage11_avx2(__m256i *u, __m256i *out, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
_mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2959,7 +2959,7 @@ static void idct64_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
__m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
@@ -2981,7 +2981,7 @@ static void idct64_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
// stage 10
// stage 11
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = _mm256_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm256_set1_epi32((1 << (log_range_out - 1)) - 1);
if (out_shift != 0) {
@@ -3063,7 +3063,7 @@ static void idct64_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
@@ -3293,7 +3293,7 @@ static void idct64_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
@@ -3602,7 +3602,7 @@ static void idct64_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m256i rnding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
@@ -4061,7 +4061,7 @@ static void idct64_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
&clamp_hi);
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m256i clamp_lo_out =
_mm256_set1_epi32(-(1 << (log_range_out - 1)));
const __m256i clamp_hi_out =
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
index dc269c7bcd..7b5101887d 100644
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -421,7 +421,7 @@ static inline void idct32_stage9_sse4_1(__m128i *bf1, __m128i *out,
addsub_sse4_1(bf1[15], bf1[16], out + 15, out + 16, clamp_lo, clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
for (int i = 0; i < 32; i += 8) {
@@ -460,7 +460,7 @@ static void idct4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ int log_range = get_log_range(bd, do_cols);
__m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
__m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u0, u1, u2, u3;
@@ -501,7 +501,7 @@ static void idct4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
addsub_sse4_1(v1, v2, out + 1, out + 2, &clamp_lo, &clamp_hi);
if (!do_cols) {
- log_range = AOMMAX(16, bd + 6);
+ log_range = get_log_range_out(bd);
clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
@@ -622,7 +622,7 @@ static void iadst4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[3] = u3;
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -711,7 +711,7 @@ static void iidentity4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -874,7 +874,7 @@ static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u0, u1, u2, u3, u4, u5, u6, u7;
@@ -974,7 +974,7 @@ static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -999,7 +999,7 @@ static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
const __m128i kZero = _mm_setzero_si128();
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[8], v[8], x;
@@ -1139,7 +1139,7 @@ static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[12] = u[5];
out[14] = _mm_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1288,7 +1288,7 @@ static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[13] = u[5];
out[15] = _mm_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1316,7 +1316,7 @@ static void iidentity8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[7] = _mm_add_epi32(in[7], in[7]);
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
round_shift_4x4(out, out_shift);
@@ -1472,7 +1472,7 @@ static void idct8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
__m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i x;
@@ -1488,7 +1488,7 @@ static void idct8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
// stage 4
// stage 5
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = _mm_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1523,7 +1523,7 @@ static void idct8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u0, u1, u2, u3, u4, u5, u6, u7;
@@ -1611,7 +1611,7 @@ static void idct8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
addsub_sse4_1(u3, u4, out + 3, out + 4, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1695,7 +1695,7 @@ static void iadst8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit,
out[6] = u[5];
out[7] = _mm_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1727,7 +1727,7 @@ static void iadst8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
const __m128i kZero = _mm_setzero_si128();
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[8], v[8], x;
@@ -1866,7 +1866,7 @@ static void iadst8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[6] = u[5];
out[7] = _mm_sub_epi32(kZero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -1886,7 +1886,7 @@ static void idct16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ int log_range = get_log_range(bd, do_cols);
__m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
__m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
// stage 0
@@ -1902,7 +1902,7 @@ static void idct16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
// stage 6
// stage 7
if (!do_cols) {
- log_range = AOMMAX(16, bd + 6);
+ log_range = get_log_range_out(bd);
clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
if (out_shift != 0) {
@@ -1953,7 +1953,7 @@ static void idct16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[16], x, y;
@@ -2066,7 +2066,7 @@ static void idct16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
addsub_sse4_1(u[7], u[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -2223,7 +2223,7 @@ static void iadst16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
out[14] = v[9];
out[15] = _mm_sub_epi32(zero, v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2276,7 +2276,7 @@ static void iadst16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i zero = _mm_setzero_si128();
@@ -2540,7 +2540,7 @@ static void iadst16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
out[14] = u[9];
out[15] = _mm_sub_epi32(zero, u[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2590,7 +2590,7 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[16], v[16], x, y;
@@ -2735,7 +2735,7 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
addsub_sse4_1(v[7], v[8], out + 7, out + 8, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out =
_mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -2775,7 +2775,7 @@ static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
const __m128i zero = _mm_setzero_si128();
@@ -3099,7 +3099,7 @@ static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
out[14] = v[9];
out[15] = _mm_sub_epi32(zero, v[1]);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -3146,7 +3146,7 @@ static void iidentity16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
+ const int log_range = get_log_range_out(bd);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -3278,7 +3278,7 @@ static inline void idct64_stage11_sse4_1(__m128i *u, __m128i *out, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -3294,7 +3294,7 @@ static void idct64x64_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
__m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
@@ -3316,7 +3316,7 @@ static void idct64x64_low1_sse4_1(__m128i *in, __m128i *out, int bit,
// stage 10
// stage 11
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = _mm_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
if (out_shift != 0) {
@@ -3399,7 +3399,7 @@ static void idct64x64_low8_sse4_1(__m128i *in, __m128i *out, int bit,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
@@ -3630,7 +3630,7 @@ static void idct64x64_low16_sse4_1(__m128i *in, __m128i *out, int bit,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
@@ -3940,7 +3940,7 @@ static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int i, j;
const int32_t *cospi = cospi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
@@ -4400,7 +4400,7 @@ static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out =
_mm_set1_epi32((1 << (log_range_out - 1)) - 1);
@@ -4418,7 +4418,7 @@ static void idct32x32_low1_sse4_1(__m128i *in, __m128i *out, int bit,
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
__m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
__m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i bf1;
@@ -4441,7 +4441,7 @@ static void idct32x32_low1_sse4_1(__m128i *in, __m128i *out, int bit,
bf1 = _mm_max_epi32(bf1, clamp_lo);
bf1 = _mm_min_epi32(bf1, clamp_hi);
} else {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
clamp_lo = _mm_set1_epi32(-(1 << (log_range_out - 1)));
clamp_hi = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
if (out_shift != 0) {
@@ -4517,7 +4517,7 @@ static void idct32x32_low8_sse4_1(__m128i *in, __m128i *out, int bit,
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i bf1[32];
@@ -4640,7 +4640,7 @@ static void idct32x32_low16_sse4_1(__m128i *in, __m128i *out, int bit,
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i bf1[32];
@@ -4799,7 +4799,7 @@ static void idct32x32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const int log_range = get_log_range(bd, do_cols);
const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i bf1[32], bf0[32];
@@ -5116,7 +5116,7 @@ static void idct32x32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
addsub_sse4_1(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo, &clamp_hi);
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);
@@ -5189,7 +5189,7 @@ static void iidentity32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
}
if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
+ const int log_range_out = get_log_range_out(bd);
const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
round_shift_8x8(out, out_shift);