Commit 9e599b5cd7 for qemu.org
commit 9e599b5cd74b765dc18ed8094c47f36441c0d14c
Author: Richard Henderson <richard.henderson@linaro.org>
Date: Tue Jun 9 12:20:58 2026 -0700
target/arm: Implement FDOT (FP8 to FP32) for AdvSIMD
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20260609192110.752384-35-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 9af4ab239c..cea1c56cd3 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1625,6 +1625,11 @@ static inline bool isar_feature_aa64_f8fma(const ARMISARegisters *id)
return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8FMA);
}
+static inline bool isar_feature_aa64_f8dp4(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8DP4);
+}
+
/*
* Combinations of feature tests, for ease of use with TRANS_FEAT.
*/
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index ee7391a13c..08c493c11e 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1224,6 +1224,8 @@ FMLAL_hb_v 0 idxn:1 00 1110 110 rm:5 11111 1 rn:5 rd:5 \
FMLALL_sb_v 0.00 1110 0.0 rm:5 110001 rn:5 rd:5 \
&rxx idxm=0 idxn=%fmlall_idxn
+FDOT_sb_v 0.00 1110 000 ..... 11111 1 ..... ..... @qrrr_s
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
@@ -1348,6 +1350,8 @@ FMLAL_hb_vi 0 idxn:1 00 1111 11 ... rm:3 0000 . 0 rn:5 rd:5 \
FMLALL_sb_vi 0 . 10 1111 0 . ... rm:3 1000 . 0 rn:5 rd:5 \
&rxx idxm=%hlm4 idxn=%fmlall_idxn
+FDOT_sb_vi 0.00 1111 00 . ..... 0000 . 0 ..... ..... @qrrx_s
+
# Floating-point conditional select
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c
index ceeb96b9cc..f54acb03f3 100644
--- a/target/arm/tcg/fp8_helper.c
+++ b/target/arm/tcg/fp8_helper.c
@@ -729,3 +729,42 @@ void HELPER(gvec_fmla_idx_sb)(void *vd, void *vn, void *vm,
clear_tail(vd, oprsz, simd_maxsz(desc));
}
+
+void HELPER(gvec_fdot_sb)(void *vd, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc)
+{
+ FP8MulContext ctx = fp8_mul_start(env, -1);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 4;
+ uint32_t *n = vn;
+ uint32_t *m = vm;
+ float32 *d = vd;
+
+ for (size_t i = 0; i < nelem; i++) {
+ d[i] = f8dotadd_s(n[i], m[i], 4, d[i], &ctx);
+ }
+
+ clear_tail(vd, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fdot_idx_sb)(void *vd, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc)
+{
+ FP8MulContext ctx = fp8_mul_start(env, -1);
+ size_t idx = simd_data(desc);
+ size_t oprsz = simd_oprsz(desc);
+ size_t nelem = oprsz / 4;
+ uint32_t *n = vn;
+ uint32_t *m = vm;
+ float32 *d = vd;
+ size_t i = 0;
+
+ do {
+ uint32_t e1 = m[i + H4(idx)];
+ do {
+ d[i] = f8dotadd_s(n[i], e1, 4, d[i], &ctx);
+ } while (++i % 4 != 0);
+ } while (i < nelem);
+
+ clear_tail(vd, oprsz, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h
index 802a3b430e..ee6f2e9236 100644
--- a/target/arm/tcg/helper-fp8-defs.h
+++ b/target/arm/tcg/helper-fp8-defs.h
@@ -29,3 +29,6 @@ DEF_HELPER_FLAGS_5(gvec_fmla_idx_hb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env,
DEF_HELPER_FLAGS_5(gvec_fmla_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_fmla_idx_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fdot_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_fdot_idx_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index dcc6b5ae6d..ce29176cb7 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -7500,6 +7500,36 @@ TRANS_FEAT(FMLAL_hb_vi, aa64_f8fma, do_fmla_fp8, a, gen_helper_gvec_fmla_idx_hb)
TRANS_FEAT(FMLALL_sb_v, aa64_f8fma, do_fmla_fp8, a, gen_helper_gvec_fmla_sb)
TRANS_FEAT(FMLALL_sb_vi, aa64_f8fma, do_fmla_fp8, a, gen_helper_gvec_fmla_idx_sb)
+static bool do_f8dot(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (fpmr_access_check(s) && fp_access_check(s)) {
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ tcg_env, a->q ? 16 : 8, vec_full_reg_size(s),
+ 0, fn);
+ }
+ return true;
+}
+
+TRANS_FEAT(FDOT_sb_v, aa64_f8dp4, do_f8dot, a, gen_helper_gvec_fdot_sb)
+
+static bool do_f8dot_idx(DisasContext *s, arg_qrrx_e *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (fpmr_access_check(s) && fp_access_check(s)) {
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ tcg_env, a->q ? 16 : 8, vec_full_reg_size(s),
+ a->idx, fn);
+ }
+ return true;
+}
+
+TRANS_FEAT(FDOT_sb_vi, aa64_f8dp4, do_f8dot_idx, a, gen_helper_gvec_fdot_idx_sb)
+
static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
gen_helper_gvec_3 * const fns[2])
{