Commit 886729454f1 for php.net
commit 886729454f1a551f47a7316d33f36093e71cb43d
Author: Dmitry Stogov <dmitry@php.net>
Date: Mon Dec 15 20:13:03 2025 +0300
Update IR (#20710)
IR commit: 3d72a7295c77743da22b36bab808ebb5f564488d
diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index 81621ce11bd..745a66b2163 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -118,7 +118,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
{
char buf[128];
- if (insn->op == IR_FUNC || insn->op == IR_SYM) {
+ if (insn->op == IR_FUNC || insn->op == IR_SYM || insn->op == IR_LABEL) {
fprintf(f, "%s", ir_get_str(ctx, insn->val.name));
return;
} else if (insn->op == IR_STR) {
@@ -290,6 +290,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_kind_prb IR_OPND_PROB
#define ir_op_kind_opt IR_OPND_PROB
#define ir_op_kind_pro IR_OPND_PROTO
+#define ir_op_kind_lbl IR_OPND_LABEL_REF
#define _IR_OP_FLAGS(name, flags, op1, op2, op3) \
IR_OP_FLAGS(ir_op_flag_ ## flags, ir_op_kind_ ## op1, ir_op_kind_ ## op2, ir_op_kind_ ## op3),
@@ -689,6 +690,13 @@ ir_ref ir_const_str(ir_ctx *ctx, ir_ref str)
return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_STR, IR_ADDR, 0));
}
+ir_ref ir_const_label(ir_ctx *ctx, ir_ref str)
+{
+ ir_val val;
+ val.u64 = str;
+ return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_LABEL, IR_ADDR, 0));
+}
+
ir_ref ir_str(ir_ctx *ctx, const char *s)
{
size_t len;
@@ -879,6 +887,17 @@ static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir
return IR_UNUSED;
}
+IR_ALWAYS_INLINE ir_ref _ir_fold_cast(ir_ctx *ctx, ir_ref ref, ir_type type)
+{
+ if (ctx->ir_base[ref].type == type) {
+ return ref;
+ } else if (IR_IS_CONST_REF(ref) && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)) {
+ return ir_const(ctx, ctx->ir_base[ref].val, type);
+ } else {
+ return ir_emit1(ctx, IR_OPT(IR_BITCAST, type), ref);
+ }
+}
+
#define IR_FOLD(X) IR_FOLD1(X, __LINE__)
#define IR_FOLD1(X, Y) IR_FOLD2(X, Y)
#define IR_FOLD2(X, Y) case IR_RULE_ ## Y:
@@ -1158,7 +1177,7 @@ ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def)
IR_ASSERT(var < 0);
if (!ir_hashtab_add(ctx->binding, def, var)) {
/* Add a copy with different binding */
- def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, 1);
+ def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, IR_COPY_HARD);
ir_hashtab_add(ctx->binding, def, var);
}
return def;
@@ -1836,8 +1855,49 @@ int ir_mem_flush(void *ptr, size_t size)
return 1;
}
#else
+
+#if defined(__linux__) && defined(__x86_64__) && defined(PKEY_DISABLE_WRITE)
+# define HAVE_PKEY_MPROTECT 1
+#endif
+
+#ifdef HAVE_PKEY_MPROTECT
+
+#ifndef PKEY_DISABLE_EXECUTE
+# define PKEY_DISABLE_EXECUTE 0
+#endif
+
+int pkey_mprotect(void* addr, size_t len, int prot, int pkey) __attribute__((weak));
+int pkey_alloc(unsigned int, unsigned int) __attribute__((weak));
+int pkey_free(int) __attribute__((weak));
+int pkey_set(int, unsigned) __attribute__((weak));
+
+static int ir_pkey = 0;
+#endif
+
void *ir_mem_mmap(size_t size)
{
+#ifdef HAVE_PKEY_MPROTECT
+ if (!ir_pkey && pkey_mprotect) {
+ int key = pkey_alloc(0, PKEY_DISABLE_WRITE);
+ if (key > 0) {
+ ir_pkey = key;
+ }
+ }
+ if (ir_pkey > 0) {
+ void *ret = mmap(NULL, size, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ret == MAP_FAILED) {
+ return NULL;
+ }
+ if (pkey_mprotect(ret, size, PROT_EXEC|PROT_READ|PROT_WRITE, ir_pkey) != 0) {
+#ifdef IR_DEBUG
+ fprintf(stderr, "pkey_mprotect() failed\n");
+#endif
+ munmap(ret, size);
+ return NULL;
+ }
+ return ret;
+ }
+#endif
int prot_flags = PROT_EXEC;
#if defined(__NetBSD__)
prot_flags |= PROT_MPROTECT(PROT_READ|PROT_WRITE);
@@ -1852,11 +1912,28 @@ void *ir_mem_mmap(size_t size)
int ir_mem_unmap(void *ptr, size_t size)
{
munmap(ptr, size);
+#ifdef HAVE_PKEY_MPROTECT
+// if (ir_pkey > 0) {
+// pkey_free(ir_pkey);
+// ir_pkey = 0;
+// }
+#endif
return 1;
}
int ir_mem_protect(void *ptr, size_t size)
{
+#ifdef HAVE_PKEY_MPROTECT
+ if (ir_pkey > 0) {
+ if (pkey_set(ir_pkey, PKEY_DISABLE_WRITE)) {
+#ifdef IR_DEBUG
+ fprintf(stderr, "mprotect() failed\n");
+#endif
+ return 0;
+ }
+ return 1;
+ }
+#endif
if (mprotect(ptr, size, PROT_READ | PROT_EXEC) != 0) {
#ifdef IR_DEBUG
fprintf(stderr, "mprotect() failed\n");
@@ -1868,6 +1945,17 @@ int ir_mem_protect(void *ptr, size_t size)
int ir_mem_unprotect(void *ptr, size_t size)
{
+#ifdef HAVE_PKEY_MPROTECT
+ if (ir_pkey > 0) {
+ if (pkey_set(ir_pkey, PKEY_DISABLE_EXECUTE)) {
+#ifdef IR_DEBUG
+ fprintf(stderr, "mprotect() failed\n");
+#endif
+ return 0;
+ }
+ return 1;
+ }
+#endif
if (mprotect(ptr, size, PROT_READ | PROT_WRITE) != 0) {
#ifdef IR_DEBUG
fprintf(stderr, "mprotect() failed\n");
@@ -2070,7 +2158,26 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
}
} else if (insn->op == IR_RSTORE) {
modified_regset |= (1 << insn->op3);
- } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_CALL || insn->op == IR_VSTORE) {
+ } else if (insn->op == IR_CALL) {
+ ir_insn *func = &ctx->ir_base[insn->op2];
+ ir_ref func_proto;
+ const ir_proto_t *proto;
+
+ if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
+ func_proto = func->proto;
+ } else if (func->op == IR_PROTO) {
+ func_proto = func->op2;
+ } else {
+ break;
+ }
+ if (!func_proto) {
+ break;
+ }
+ proto = (const ir_proto_t *)ir_get_str(ctx, func_proto);
+ if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
+ break;
+ }
+ } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_VSTORE) {
return IR_UNUSED;
}
ref = insn->op1;
@@ -2116,7 +2223,26 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
break;
}
}
- } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_CALL || insn->op == IR_STORE) {
+ } else if (insn->op == IR_CALL) {
+ ir_insn *func = &ctx->ir_base[insn->op2];
+ ir_ref func_proto;
+ const ir_proto_t *proto;
+
+ if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
+ func_proto = func->proto;
+ } else if (func->op == IR_PROTO) {
+ func_proto = func->op2;
+ } else {
+ break;
+ }
+ if (!func_proto) {
+ break;
+ }
+ proto = (const ir_proto_t *)ir_get_str(ctx, func_proto);
+ if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
+ break;
+ }
+ } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_STORE) {
break;
}
ref = insn->op1;
@@ -3013,6 +3139,16 @@ void _ir_IJMP(ir_ctx *ctx, ir_ref addr)
ctx->control = IR_UNUSED;
}
+ir_ref _ir_IGOTO(ir_ctx *ctx, ir_ref addr)
+{
+ ir_ref ref;
+
+ IR_ASSERT(ctx->control);
+ ctx->control = ref = ir_emit2(ctx, IR_IGOTO, ctx->control, addr);
+ ctx->control = IR_UNUSED;
+ return ref;
+}
+
ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset)
{
if (offset) {
@@ -3135,6 +3271,18 @@ void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val)
ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val);
}
+ir_ref _ir_VLOAD_v(ir_ctx *ctx, ir_type type, ir_ref var)
+{
+ IR_ASSERT(ctx->control);
+ return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD_v, type), ctx->control, var);
+}
+
+void _ir_VSTORE_v(ir_ctx *ctx, ir_ref var, ir_ref val)
+{
+ IR_ASSERT(ctx->control);
+ ctx->control = ir_emit3(ctx, IR_VSTORE_v, ctx->control, var, val);
+}
+
ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset)
{
IR_ASSERT(ctx->control);
@@ -3193,6 +3341,18 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val)
ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val);
}
+ir_ref _ir_LOAD_v(ir_ctx *ctx, ir_type type, ir_ref addr)
+{
+ IR_ASSERT(ctx->control);
+ return ctx->control = ir_emit2(ctx, IR_OPT(IR_LOAD_v, type), ctx->control, addr);
+}
+
+void _ir_STORE_v(ir_ctx *ctx, ir_ref addr, ir_ref val)
+{
+ IR_ASSERT(ctx->control);
+ ctx->control = ir_emit3(ctx, IR_STORE_v, ctx->control, addr, val);
+}
+
void _ir_VA_START(ir_ctx *ctx, ir_ref list)
{
IR_ASSERT(ctx->control);
@@ -3217,11 +3377,13 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list)
return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list);
}
-ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size)
+ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size, size_t align)
{
IR_ASSERT(ctx->control);
- IR_ASSERT(size <= 0x7fffffff);
- return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list, (ir_ref)size);
+ IR_ASSERT(size <= 0x0fffffff);
+ IR_ASSERT(align != 0 && ((align & (align - 1)) == 0) && align <= 128);
+ return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list,
+ (ir_ref)IR_VA_ARG_OP3(size, align));
}
ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx)
diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index 8fcfbffa7d6..a9665059705 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -216,6 +216,7 @@ typedef enum _ir_type {
* prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT)
* opt - optional number
* pro - function prototype
+ * lbl - label used as value (a reference to constant): (BEGIN)
*
* The order of IR opcodes is carefully selected for efficient folding.
* - foldable instruction go first
@@ -322,6 +323,7 @@ typedef enum _ir_type {
_(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \
_(FUNC, r0, ___, ___, ___) /* constant func ref */ \
_(SYM, r0, ___, ___, ___) /* constant symbol ref */ \
+ _(LABEL, r0, ___, ___, ___) /* label address ref */ \
_(STR, r0, ___, ___, ___) /* constant str ref */ \
\
/* call ops */ \
@@ -334,11 +336,15 @@ typedef enum _ir_type {
_(BLOCK_BEGIN, a1, src, ___, ___) /* stacksave */ \
_(BLOCK_END, a2, src, def, ___) /* stackrestore */ \
_(VLOAD, l2, src, var, ___) /* load value of local var */ \
+ _(VLOAD_v, l2, src, var, ___) /* volatile variant of VLOAD */ \
_(VSTORE, s3, src, var, def) /* store value to local var */ \
+ _(VSTORE_v, s3, src, var, def) /* volatile variant of VSTORE */ \
_(RLOAD, l1X2, src, num, opt) /* load value from register */ \
_(RSTORE, s2X1, src, def, num) /* store value into register */ \
_(LOAD, l2, src, ref, ___) /* load from memory */ \
+ _(LOAD_v, l2, src, ref, ___) /* volatile variant of VLOAD */ \
_(STORE, s3, src, ref, def) /* store to memory */ \
+ _(STORE_v, s3, src, ref, def) /* volatile variant of VSTORE */ \
_(TLS, l1X2, src, num, num) /* thread local variable */ \
_(TRAP, x1, src, ___, ___) /* DebugBreak */ \
/* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \
@@ -360,7 +366,7 @@ typedef enum _ir_type {
/* control-flow nodes */ \
_(START, S0X1, ret, ___, ___) /* function start */ \
_(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \
- _(BEGIN, S1, src, ___, ___) /* block start */ \
+ _(BEGIN, S1X1, src, lbl, ___) /* block start, optional &&lbl */ \
_(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \
_(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \
_(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \
@@ -372,8 +378,9 @@ typedef enum _ir_type {
_(LOOP_END, E1, src, ___, ___) /* loop end */ \
_(IF, E2, src, def, ___) /* conditional control split */ \
_(SWITCH, E2, src, def, ___) /* multi-way control split */ \
+ _(IGOTO, E2, src, def, ___) /* computed goto (internal) */ \
+ _(IJMP, T2X1, src, def, ret) /* computed goto (terminating) */ \
_(RETURN, T2X1, src, def, ret) /* function return */ \
- _(IJMP, T2X1, src, def, ret) /* computed goto */ \
_(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
\
/* deoptimization helper */ \
@@ -400,6 +407,13 @@ typedef enum _ir_op {
#define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT))
#define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT)
+/* "opt" modifiers */
+#define IR_COPY_HARD (1<<0)
+
+#define IR_VA_ARG_SIZE(op3) (((uint32_t)(op3) >> 3))
+#define IR_VA_ARG_ALIGN(op3) (1U << ((uint32_t)(op3) & 0x7))
+#define IR_VA_ARG_OP3(s, a) (((s) << 3) | ir_ntzl(a))
+
/* IR References */
typedef int32_t ir_ref;
@@ -533,6 +547,9 @@ void ir_strtab_free(ir_strtab *strtab);
#define IR_EXTERN (1<<5)
#define IR_CONST (1<<6)
+#define IR_CONST_FUNC (1<<6)
+#define IR_PURE_FUNC (1<<7)
+
#define IR_INITIALIZED (1<<7) /* sym data flag: constant or an initialized variable */
#define IR_CONST_STRING (1<<8) /* sym data flag: constant string */
@@ -648,7 +665,6 @@ struct _ir_ctx {
ir_ref vars; /* list of VARs (used by register allocator) */
};
ir_snapshot_create_t snapshot_create;
- int32_t stack_frame_alignment;
int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */
int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */
uint64_t used_preserved_regs;
@@ -698,6 +714,7 @@ ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, ir_ref proto);
ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, ir_ref proto);
ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str);
ir_ref ir_const_str(ir_ctx *ctx, ir_ref str);
+ir_ref ir_const_label(ir_ctx *ctx, ir_ref str);
ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c);
@@ -893,6 +910,7 @@ struct _ir_loader {
void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags);
bool (*has_sym) (ir_loader *loader, const char *name);
bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
+ bool (*add_label) (ir_loader *loader, const char *name, void *addr);
};
void ir_loader_init(void);
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index 12c3694d469..b553243309f 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -218,6 +218,7 @@ typedef struct _ir_backend_data {
dasm_State *dasm_state;
ir_bitset emit_constants;
int rodata_label, jmp_table_label;
+ bool resolved_label_syms;
} ir_backend_data;
#define IR_GP_REG_NAME(code, name64, name32) \
@@ -315,6 +316,7 @@ const char *ir_reg_name(int8_t reg, ir_type type)
_(RETURN_VOID) \
_(RETURN_INT) \
_(RETURN_FP) \
+ _(IGOTO_DUP) \
#define IR_RULE_ENUM(name) IR_ ## name,
@@ -385,7 +387,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
n++;
break;
}
- } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
+ } else if (!IR_IS_CONST_REF(insn->op2) && ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n++;
}
@@ -478,10 +480,16 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
if (IR_IS_CONST_REF(insn->op1)) {
constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n++;
+ } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
+ constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+ n++;
}
if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n++;
+ } else if (!IR_IS_CONST_REF(insn->op2) && ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
+ constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+ n++;
}
break;
case IR_CMP_INT:
@@ -520,6 +528,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
}
break;
case IR_VSTORE:
+ case IR_VSTORE_v:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op3)) {
insn = &ctx->ir_base[insn->op3];
@@ -596,6 +605,19 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
}
flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
break;
+ case IR_IGOTO:
+ insn = &ctx->ir_base[ref];
+ if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
+ ir_insn *merge = &ctx->ir_base[insn->op1];
+ ir_ref *p, n = merge->inputs_count;
+
+ for (p = merge->ops + 1; n > 0; p++, n--) {
+ ir_ref input = *p;
+ IR_ASSERT(ctx->ir_base[input].op == IR_END || ctx->ir_base[input].op == IR_LOOP_END);
+ ctx->rules[input] = IR_IGOTO_DUP;
+ }
+ }
+ return insn->op;
case IR_COND:
insn = &ctx->ir_base[ref];
n = 0;
@@ -665,7 +687,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
}
break;
case IR_VA_ARG:
- flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG;
+ flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
n = 1;
insn = &ctx->ir_base[ref];
@@ -714,7 +736,8 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
do {
ir_insn *insn = &ctx->ir_base[*p];
- if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) {
+ if (insn->op != IR_LOAD && insn->op != IR_LOAD_v
+ && ((insn->op != IR_STORE && insn->op != IR_STORE_v) || insn->op3 == addr_ref)) {
return;
}
p++;
@@ -961,7 +984,7 @@ binop_fp:
ctx->flags2 |= IR_HAS_CALLS;
return IR_CALL;
case IR_VAR:
- return IR_SKIPPED | IR_VAR;
+ return IR_STATIC_ALLOCA;
case IR_PARAM:
return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM;
case IR_ALLOCA:
@@ -978,6 +1001,7 @@ binop_fp:
}
return IR_ALLOCA;
case IR_LOAD:
+ case IR_LOAD_v:
ir_match_fuse_addr(ctx, insn->op2, insn->type);
if (IR_IS_TYPE_INT(insn->type)) {
return IR_LOAD_INT;
@@ -986,6 +1010,7 @@ binop_fp:
}
break;
case IR_STORE:
+ case IR_STORE_v:
ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type);
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
return IR_STORE_INT;
@@ -1364,7 +1389,7 @@ static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref sr
} else if (type == IR_DOUBLE && insn->val.u64 == 0) {
| fmov Rd(reg-IR_REG_FP_FIRST), xzr
} else {
- label = ir_const_label(ctx, src);
+ label = ir_get_const_label(ctx, src);
if (type == IR_DOUBLE) {
| ldr Rd(reg-IR_REG_FP_FIRST), =>label
} else {
@@ -1441,10 +1466,41 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
| add Rx(reg), Rx(base), #offset
} else {
ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
- | add Rx(reg), sp, Rx(IR_REG_INT_TMP)
+ | add Rx(reg), Rx(base), Rx(IR_REG_INT_TMP)
}
}
+static void ir_resolve_label_syms(ir_ctx *ctx)
+{
+ uint32_t b;
+ ir_block *bb;
+
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = b;
+ }
+ }
+}
+
+static void ir_emit_load_label_addr(ir_ctx *ctx, ir_reg reg, ir_insn *label)
+{
+ ir_backend_data *data = ctx->data;
+ dasm_State **Dst = &data->dasm_state;
+
+ if (!data->resolved_label_syms) {
+ data->resolved_label_syms = 1;
+ ir_resolve_label_syms(ctx);
+ }
+
+ IR_ASSERT(label->op == IR_LABEL);
+ int b = label->val.u32_hi;
+
+ b = ir_skip_empty_target_blocks(ctx, b);
+ | adr Rx(reg), =>b
+}
static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
{
@@ -1459,9 +1515,11 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
} else if (insn->op == IR_STR) {
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
- int label = ir_const_label(ctx, src);
+ int label = ir_get_const_label(ctx, src);
| adr Rx(reg), =>label
+ } else if (insn->op == IR_LABEL) {
+ ir_emit_load_label_addr(ctx, reg, insn);
} else {
ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
}
@@ -1697,6 +1755,7 @@ static void ir_emit_prologue(ir_ctx *ctx)
| str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
} else {
ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ offset -= sizeof(void*);
| str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
| sub Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8
| str Rd(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
@@ -1795,7 +1854,12 @@ static void ir_emit_prologue(ir_ctx *ctx)
offset += 16 * ctx->fp_reg_params;
for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
// TODO: Rd->Rq stur->str ???
- | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, 8)) {
+ | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
offset += 16;
}
}
@@ -1828,26 +1892,44 @@ static void ir_emit_epilogue(ir_ctx *ctx)
offset -= sizeof(void*) * 2;
if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
| ldp Rx(prev), Rx(i), [Rx(fp), #offset]
- } else {
- IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
+ } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
| ldr Rx(prev), [Rx(fp), #offset]
| ldr Rx(i), [Rx(fp), #(offset+8)]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8
+ | ldr Rx(i), [Rx(fp), Rx(IR_REG_INT_TMP)]
}
prev = IR_REG_NONE;
} else {
if (prev < IR_REG_FP_FIRST) {
offset -= sizeof(void*);
- | ldr Rx(prev), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, 8)) {
+ | ldr Rx(prev), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
offset -= sizeof(void*);
- | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, 8)) {
+ | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
} else {
offset -= sizeof(void*) * 2;
if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
| ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
- } else {
- IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
+ } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
| ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
| ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rx(prev-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8
+ | ldr Rx(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
}
}
prev = IR_REG_NONE;
@@ -1857,10 +1939,20 @@ static void ir_emit_epilogue(ir_ctx *ctx)
if (prev != IR_REG_NONE) {
if (prev < IR_REG_FP_FIRST) {
offset -= sizeof(void*);
- | ldr Rx(prev), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, 8)) {
+ | ldr Rx(prev), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
} else {
offset -= sizeof(void*);
- | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, 8)) {
+ | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
}
}
@@ -1909,6 +2001,9 @@ static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
op1_reg = IR_REG_NUM(op1_reg);
ir_emit_load(ctx, type, op1_reg, op1);
}
+ if (op2_reg == IR_REG_NONE && op1 == op2) {
+ op2_reg = op1_reg;
+ }
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
@@ -3415,25 +3510,52 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
if (ir_type_size[src_type] == 1) {
- if (ir_type_size[dst_type] == 2) {
- | ldrsb Rw(def_reg), [Rx(fp), #offset]
- } else if (ir_type_size[dst_type] == 4) {
- | ldrsb Rw(def_reg), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ if (ir_type_size[dst_type] == 2) {
+ | ldrsb Rw(def_reg), [Rx(fp), #offset]
+ } else if (ir_type_size[dst_type] == 4) {
+ | ldrsb Rw(def_reg), [Rx(fp), #offset]
+ } else {
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldrsb Rx(def_reg), [Rx(fp), #offset]
+ }
} else {
- IR_ASSERT(ir_type_size[dst_type] == 8);
- | ldrsb Rx(def_reg), [Rx(fp), #offset]
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ if (ir_type_size[dst_type] == 2) {
+ | ldrsb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else if (ir_type_size[dst_type] == 4) {
+ | ldrsb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else {
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldrsb Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
} else if (ir_type_size[src_type] == 2) {
- if (ir_type_size[dst_type] == 4) {
- | ldrsh Rw(def_reg), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ if (ir_type_size[dst_type] == 4) {
+ | ldrsh Rw(def_reg), [Rx(fp), #offset]
+ } else {
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldrsh Rx(def_reg), [Rx(fp), #offset]
+ }
} else {
- IR_ASSERT(ir_type_size[dst_type] == 8);
- | ldrsh Rx(def_reg), [Rx(fp), #offset]
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ if (ir_type_size[dst_type] == 4) {
+ | ldrsh Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else {
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldrsh Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
} else {
IR_ASSERT(ir_type_size[src_type] == 4);
IR_ASSERT(ir_type_size[dst_type] == 8);
- | ldrsw Rx(def_reg), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ | ldrsw Rx(def_reg), [Rx(fp), #offset]
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | ldrsw Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
@@ -3473,14 +3595,27 @@ static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg fp;
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
- if (ir_type_size[src_type] == 1) {
- | ldrb Rw(def_reg), [Rx(fp), #offset]
- } else if (ir_type_size[src_type] == 2) {
- | ldrh Rw(def_reg), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ if (ir_type_size[src_type] == 1) {
+ | ldrb Rw(def_reg), [Rx(fp), #offset]
+ } else if (ir_type_size[src_type] == 2) {
+ | ldrh Rw(def_reg), [Rx(fp), #offset]
+ } else {
+ IR_ASSERT(ir_type_size[src_type] == 4);
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldr Rw(def_reg), [Rx(fp), #offset]
+ }
} else {
- IR_ASSERT(ir_type_size[src_type] == 4);
- IR_ASSERT(ir_type_size[dst_type] == 8);
- | ldr Rw(def_reg), [Rx(fp), #offset]
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ if (ir_type_size[src_type] == 1) {
+ | ldrb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else if (ir_type_size[src_type] == 2) {
+ | ldrh Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else {
+ IR_ASSERT(ir_type_size[src_type] == 4);
+ IR_ASSERT(ir_type_size[dst_type] == 8);
+ | ldr Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
@@ -3579,11 +3714,21 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg fp;
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
- if (src_type == IR_DOUBLE) {
- | ldr Rx(def_reg), [Rx(fp), #offset]
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ if (src_type == IR_DOUBLE) {
+ | ldr Rx(def_reg), [Rx(fp), #offset]
+ } else {
+ IR_ASSERT(src_type == IR_FLOAT);
+ | ldr Rw(def_reg), [Rx(fp), #offset]
+ }
} else {
- IR_ASSERT(src_type == IR_FLOAT);
- | ldr Rw(def_reg), [Rx(fp), #offset]
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ if (src_type == IR_DOUBLE) {
+ | ldr Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else {
+ IR_ASSERT(src_type == IR_FLOAT);
+ | ldr Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
}
}
} else if (IR_IS_TYPE_FP(dst_type)) {
@@ -3605,12 +3750,22 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg fp;
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
- if (dst_type == IR_DOUBLE) {
- | ldr Rd(def_reg), [Rx(fp), #offset]
- } else {
- IR_ASSERT(src_type == IR_FLOAT);
- | ldr Rs(def_reg), [Rx(fp), #offset]
- }
+ if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) {
+ if (dst_type == IR_DOUBLE) {
+ | ldr Rd(def_reg), [Rx(fp), #offset]
+ } else {
+ IR_ASSERT(dst_type == IR_FLOAT);
+ | ldr Rs(def_reg), [Rx(fp), #offset]
+ }
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ if (dst_type == IR_DOUBLE) {
+ | ldr Rd(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ } else {
+ IR_ASSERT(dst_type == IR_FLOAT);
+ | ldr Rs(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)]
+ }
+ }
}
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
@@ -3833,7 +3988,12 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn)
IR_ASSERT(def_reg != IR_REG_NONE);
offset = ir_var_spill_slot(ctx, insn->op1, &fp);
- | add Rx(def_reg), Rx(fp), #offset
+ if (aarch64_may_encode_imm12(offset)) {
+ | add Rx(def_reg), Rx(fp), #offset
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+ | add Rx(def_reg), Rx(fp), Rx(IR_REG_INT_TMP)
+ }
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
@@ -4221,7 +4381,12 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn)
/* Stack must be 16 byte aligned */
size = IR_ALIGNED_SIZE(size, 16);
- | add sp, sp, #size
+ if (aarch64_may_encode_imm12(size)) {
+ | add sp, sp, #size
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size);
+ | add sp, sp, Rx(IR_REG_INT_TMP)
+ }
if (!(ctx->flags & IR_USE_FRAME_POINTER)) {
ctx->call_stack_size -= size;
}
@@ -4283,8 +4448,11 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
if (ctx->flags & IR_USE_FRAME_POINTER) {
| mov Rx(def_reg), Rx(IR_REG_X29)
- } else {
+ } else if (aarch64_may_encode_imm12(ctx->stack_frame_size + ctx->call_stack_size)) {
| add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size)
+ } else {
+ ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, ctx->stack_frame_size + ctx->call_stack_size);
+ | add Rx(def_reg), Rx(IR_REG_X31), Rx(IR_REG_INT_TMP)
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, IR_ADDR, def, def_reg);
@@ -4377,7 +4545,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
reg_save_area_offset += 16 * IR_REG_FP_ARGS;
/* Set va_list.vr_top */
- if (overflow_arg_area_offset != reg_save_area_offset) {
+ if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < IR_REG_INT_ARGS) {
| add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
}
| str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
@@ -5246,6 +5414,19 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
| br Rx(op2_reg)
} else if (IR_IS_CONST_REF(insn->op2)) {
+ if (ctx->ir_base[insn->op2].op == IR_LABEL) {
+ if (!data->resolved_label_syms) {
+ data->resolved_label_syms = 1;
+ ir_resolve_label_syms(ctx);
+ }
+
+ uint32_t target = ctx->ir_base[insn->op2].val.u32_hi;
+ target = ir_skip_empty_target_blocks(ctx, target);
+
+ | b =>target
+ return;
+ }
+
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
if (aarch64_may_use_b(ctx->code_buffer, addr)) {
@@ -5636,6 +5817,7 @@ static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_re
{
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+ offset = IR_SPILL_POS_TO_OFFSET(offset);
IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE);
if (IR_IS_TYPE_INT(type)) {
@@ -5676,13 +5858,8 @@ static void ir_emit_load_params(ir_ctx *ctx)
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_offset = 0;
+ int32_t stack_start = ctx->stack_frame_size;
- if (ctx->flags & IR_USE_FRAME_POINTER) {
- /* skip old frame pointer and return address */
- stack_offset = sizeof(void*) * 2 + ctx->stack_frame_size + ctx->call_stack_size;
- } else {
- stack_offset = ctx->stack_frame_size + ctx->call_stack_size;
- }
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
@@ -5706,12 +5883,9 @@ static void ir_emit_load_params(ir_ctx *ctx)
if (ctx->vregs[use]) {
dst_reg = IR_REG_NUM(ctx->regs[use][0]);
IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
- stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
- ((ctx->flags & IR_USE_FRAME_POINTER) ?
- -(ctx->stack_frame_size - ctx->stack_frame_alignment) :
- ctx->call_stack_size));
+ stack_start + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos);
if (src_reg != dst_reg) {
- ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
+ ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_start + stack_offset);
}
if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) {
ir_emit_store(ctx, insn->type, use, dst_reg);
@@ -5785,14 +5959,8 @@ static void ir_fix_param_spills(ir_ctx *ctx)
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_offset = 0;
- int32_t param_stack_size = 0;
+ int32_t stack_start = ctx->stack_frame_size;
- if (ctx->flags & IR_USE_FRAME_POINTER) {
- /* skip old frame pointer and return address */
- stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment);
- } else {
- stack_offset = ctx->stack_frame_size;
- }
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
@@ -5819,15 +5987,13 @@ static void ir_fix_param_spills(ir_ctx *ctx)
if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
&& ival->stack_spill_pos == -1
&& (ival->next || ival->reg == IR_REG_NONE)) {
- ival->stack_spill_pos = stack_offset;
+ ival->stack_spill_pos = stack_start + stack_offset;
}
}
if (sizeof(void*) == 8) {
stack_offset += sizeof(void*);
- param_stack_size += sizeof(void*);
} else {
stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
- param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
}
}
}
@@ -5835,7 +6001,7 @@ static void ir_fix_param_spills(ir_ctx *ctx)
ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count);
ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count);
- ctx->param_stack_size = param_stack_size;
+ ctx->param_stack_size = stack_offset;
}
static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
@@ -5876,6 +6042,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
case IR_MERGE:
case IR_LOOP_BEGIN:
case IR_LOOP_END:
+ case IR_IGOTO_DUP:
break;
default:
def_flags = ir_get_target_constraints(ctx, i, &constraints);
@@ -5892,7 +6059,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
IR_REGSET_EXCL(available, reg);
ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
} else if (def_flags & IR_USE_MUST_BE_IN_REG) {
- if (insn->op == IR_VLOAD
+ if ((insn->op == IR_VLOAD || insn->op == IR_VLOAD_v)
&& ctx->live_intervals[ctx->vregs[i]]
&& ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1
&& ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) {
@@ -5932,7 +6099,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
use_insn = &ctx->ir_base[use];
- if (use_insn->op == IR_VLOAD) {
+ if (use_insn->op == IR_VLOAD || use_insn->op == IR_VLOAD_v) {
if (ctx->vregs[use]
&& !ctx->live_intervals[ctx->vregs[use]]) {
ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
@@ -5943,7 +6110,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ival->vreg = ctx->vregs[use];
ival->stack_spill_pos = stack_spill_pos;
}
- } else if (use_insn->op == IR_VSTORE) {
+ } else if (use_insn->op == IR_VSTORE || use_insn->op == IR_STORE_v) {
if (!IR_IS_CONST_REF(use_insn->op3)
&& ctx->vregs[use_insn->op3]
&& !ctx->live_intervals[ctx->vregs[use_insn->op3]]) {
@@ -6080,25 +6247,21 @@ void ir_fix_stack_frame(ir_ctx *ctx)
ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
ctx->stack_frame_size += additional_size;
- ctx->stack_frame_alignment = 0;
ctx->call_stack_size = 0;
if (!(ctx->flags & IR_FUNCTION)) {
while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
} else {
/* Stack must be 16 byte aligned */
if (!(ctx->flags & IR_FUNCTION)) {
while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
} else {
if (!(ctx->flags & IR_NO_STACK_COMBINE)) {
@@ -6107,7 +6270,6 @@ void ir_fix_stack_frame(ir_ctx *ctx)
while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) !=
ctx->stack_frame_size + ctx->call_stack_size) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
}
}
@@ -6143,6 +6305,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
int ret;
void *entry;
size_t size;
+ ir_ref igoto_dup_ref = IR_UNUSED;
+ uint32_t igoto_dup_block = 0;
data.ra_data.unused_slot_4 = 0;
data.ra_data.unused_slot_2 = 0;
@@ -6150,11 +6314,11 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.ra_data.handled = NULL;
data.rodata_label = 0;
data.jmp_table_label = 0;
+ data.resolved_label_syms = 0;
ctx->data = &data;
if (!ctx->live_intervals) {
ctx->stack_frame_size = 0;
- ctx->stack_frame_alignment = 0;
ctx->call_stack_size = 0;
ctx->used_preserved_regs = 0;
ir_allocate_unique_spill_slots(ctx);
@@ -6176,7 +6340,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
}
ctx->stack_frame_size = ctx->fixed_stack_frame_size;
ctx->call_stack_size = ctx->fixed_call_stack_size;
- ctx->stack_frame_alignment = 0;
}
Dst = &data.dasm_state;
@@ -6386,6 +6549,35 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_TAILCALL:
ir_emit_tailcall(ctx, i, insn);
break;
+ case IR_IGOTO_DUP:
+ if (bb->flags & IR_BB_DESSA_MOVES) {
+ ir_emit_dessa_moves(ctx, b, bb);
+ }
+ IR_ASSERT(!igoto_dup_ref && !igoto_dup_block);
+ igoto_dup_ref = i;
+ igoto_dup_block = b;
+ b = ctx->cfg_edges[bb->successors];
+ bb = &ctx->cfg_blocks[b];
+ i = bb->start;
+ insn = &ctx->ir_base[i];
+ rule = &ctx->rules[i];
+ break;
+ case IR_IGOTO:
+ if ((ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN)
+ && (ctx->rules[ctx->ir_base[insn->op1].op1] & IR_RULE_MASK) == IR_IGOTO_DUP
+ && igoto_dup_ref) {
+ ir_emit_ijmp(ctx, i, insn);
+ b = igoto_dup_block;
+ bb = &ctx->cfg_blocks[b];
+ i = igoto_dup_ref;
+ insn = &ctx->ir_base[i];
+ rule = &ctx->rules[i];
+ igoto_dup_block= 0;
+ igoto_dup_ref = 0;
+ break;
+ }
+ IR_ASSERT(!igoto_dup_ref && !igoto_dup_block);
+ IR_FALLTHROUGH;
case IR_IJMP:
ir_emit_ijmp(ctx, i, insn);
break;
@@ -6396,9 +6588,11 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
ir_emit_vaddr(ctx, i, insn);
break;
case IR_VLOAD:
+ case IR_VLOAD_v:
ir_emit_vload(ctx, i, insn);
break;
case IR_VSTORE:
+ case IR_VSTORE_v:
ir_emit_vstore(ctx, i, insn);
break;
case IR_RLOAD:
@@ -6645,6 +6839,28 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
} while (i != 0);
}
+ if ((ctx->flags2 & IR_HAS_BLOCK_ADDR) && ctx->loader && ctx->loader->add_label) {
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = 0;
+ ctx->loader->add_label(ctx->loader, ir_get_str(ctx, ctx->ir_base[insn->op2].val.str),
+ (char*)entry + dasm_getpclabel(&data.dasm_state, ir_skip_empty_target_blocks(ctx, b)));
+ }
+ }
+ } else if (data.resolved_label_syms) {
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = 0;
+ }
+ }
+ }
+
dasm_free(&data.dasm_state);
if (ctx->code_buffer) {
diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h
index c1dcffdbaa0..03add759065 100644
--- a/ext/opcache/jit/ir/ir_builder.h
+++ b/ext/opcache/jit/ir/ir_builder.h
@@ -490,7 +490,7 @@ extern "C" {
#define ir_ADD_OFFSET(_addr, _offset) _ir_ADD_OFFSET(_ir_CTX, (_addr), (_offset))
/* Unfoldable variant of COPY */
-#define ir_HARD_COPY(_type, _op1) ir_emit2(_ir_CTX, IR_OPT(IR_COPY, (_type)), (_op1), 1)
+#define ir_HARD_COPY(_type, _op1) ir_emit2(_ir_CTX, IR_OPT(IR_COPY, (_type)), (_op1), IR_COPY_HARD)
#define ir_HARD_COPY_B(_op1) ir_HARD_COPY(IR_BOOL, _op1)
#define ir_HARD_COPY_U8(_op1) ir_HARD_COPY(IR_U8, _op1)
#define ir_HARD_COPY_U16(_op1) ir_HARD_COPY(IR_U16, _op1)
@@ -544,6 +544,8 @@ extern "C" {
#define ir_VLOAD_D(_var) _ir_VLOAD(_ir_CTX, IR_DOUBLE, (_var))
#define ir_VLOAD_F(_var) _ir_VLOAD(_ir_CTX, IR_FLOAT, (_var))
#define ir_VSTORE(_var, _val) _ir_VSTORE(_ir_CTX, (_var), (_val))
+#define ir_VLOAD_v(_type, _var) _ir_VLOAD_v(_ir_CTX, (_type), (_var))
+#define ir_VSTORE_v(_var, _val) _ir_VSTORE_v(_ir_CTX, (_var), (_val))
#define ir_RLOAD(_type, _reg) _ir_RLOAD(_ir_CTX, (_type), (_reg))
#define ir_RLOAD_B(_reg) _ir_RLOAD(_ir_CTX, IR_BOOL, (_reg))
#define ir_RLOAD_U8(_reg) _ir_RLOAD(_ir_CTX, IR_U8, (_reg))
@@ -574,6 +576,8 @@ extern "C" {
#define ir_LOAD_D(_addr) _ir_LOAD(_ir_CTX, IR_DOUBLE, (_addr))
#define ir_LOAD_F(_addr) _ir_LOAD(_ir_CTX, IR_FLOAT, (_addr))
#define ir_STORE(_addr, _val) _ir_STORE(_ir_CTX, (_addr), (_val))
+#define ir_LOAD_v(_type, _addr) _ir_LOAD_v(_ir_CTX, (_type), (_addr))
+#define ir_STORE_v(_addr, _val) _ir_STORE_v(_ir_CTX, (_addr), (_val))
#define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset))
#define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0)
@@ -586,7 +590,7 @@ extern "C" {
#define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list)
#define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src)
#define ir_VA_ARG(_list, _type) _ir_VA_ARG(_ir_CTX, _type, _list)
-#define ir_VA_ARG_EX(_list, _type, size) _ir_VA_ARG_EX(_ir_CTX, _type, _list, size)
+#define ir_VA_ARG_EX(_list, _type, s, a) _ir_VA_ARG_EX(_ir_CTX, _type, _list, s, a)
#define ir_START() _ir_START(_ir_CTX)
#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num))
@@ -607,6 +611,7 @@ extern "C" {
#define ir_CASE_RANGE(_switch, _v1, _v2) _ir_CASE_RANGE(_ir_CTX, (_switch), (_v1), (_v2))
#define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch))
#define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val))
+#define ir_IGOTO(_addr) _ir_IGOTO(_ir_CTX, (_addr))
#define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr))
#define ir_UNREACHABLE() _ir_UNREACHABLE(_ir_CTX)
@@ -654,15 +659,19 @@ ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size);
void _ir_AFREE(ir_ctx *ctx, ir_ref size);
ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var);
void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val);
+ir_ref _ir_VLOAD_v(ir_ctx *ctx, ir_type type, ir_ref var);
+void _ir_VSTORE_v(ir_ctx *ctx, ir_ref var, ir_ref val);
ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg);
void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val);
ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr);
void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val);
+ir_ref _ir_LOAD_v(ir_ctx *ctx, ir_type type, ir_ref addr);
+void _ir_STORE_v(ir_ctx *ctx, ir_ref addr, ir_ref val);
void _ir_VA_START(ir_ctx *ctx, ir_ref list);
void _ir_VA_END(ir_ctx *ctx, ir_ref list);
void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src);
ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list);
-ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size);
+ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size, size_t align);
void _ir_START(ir_ctx *ctx);
void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num);
void _ir_BEGIN(ir_ctx *ctx, ir_ref src);
@@ -688,6 +697,7 @@ void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val);
void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2);
void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref);
void _ir_RETURN(ir_ctx *ctx, ir_ref val);
+ir_ref _ir_IGOTO(ir_ctx *ctx, ir_ref addr);
void _ir_IJMP(ir_ctx *ctx, ir_ref addr);
void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr);
void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr);
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 00923387bb2..46755067b24 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -820,11 +820,14 @@ int ir_build_dominators_tree(ir_ctx *ctx)
succ_b = ctx->cfg_edges[bb->successors];
if (bb->successors_count != 1) {
/* LOOP_END/END may be linked with the following ENTRY by a fake edge */
- IR_ASSERT(bb->successors_count == 2);
- if (blocks[succ_b].flags & IR_BB_ENTRY) {
+ if (bb->successors_count != 2) {
+ complete = 0;
+ break;
+ } else if (blocks[succ_b].flags & IR_BB_ENTRY) {
succ_b = ctx->cfg_edges[bb->successors + 1];
- } else {
- IR_ASSERT(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
+ } else if (!(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY)) {
+ complete = 0;
+ break;
}
}
dom_depth = blocks[succ_b].dom_depth;;
diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c
index c25a984aefc..ee951291b1b 100644
--- a/ext/opcache/jit/ir/ir_check.c
+++ b/ext/opcache/jit/ir/ir_check.c
@@ -328,7 +328,9 @@ bool ir_check(const ir_ctx *ctx)
}
break;
case IR_LOAD:
+ case IR_LOAD_v:
case IR_STORE:
+ case IR_STORE_v:
type = ctx->ir_base[insn->op2].type;
if (type != IR_ADDR
&& (!IR_IS_TYPE_INT(type) || ir_type_size[type] != ir_type_size[IR_ADDR])) {
@@ -338,7 +340,9 @@ bool ir_check(const ir_ctx *ctx)
}
break;
case IR_VLOAD:
+ case IR_VLOAD_v:
case IR_VSTORE:
+ case IR_VSTORE_v:
if (ctx->ir_base[insn->op2].op != IR_VAR) {
fprintf(stderr, "ir_base[%d].op2 must be 'VAR' (%s)\n",
i, ir_op_name[ctx->ir_base[insn->op2].op]);
@@ -408,6 +412,8 @@ bool ir_check(const ir_ctx *ctx)
ok = 0;
}
break;
+ case IR_IGOTO:
+ break;
default:
/* skip data references */
count = n = use_list->count;
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index a501d261f30..5cc732927d4 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -129,6 +129,11 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
case IR_OPND_CONTROL_REF:
fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT);
break;
+ case IR_OPND_LABEL_REF:
+ if (ref) {
+ fprintf(f, "\tc%d -> n%d [color=blue,weight=%d];\n", -ref, i, REF_WEIGHT);
+ }
+ break;
}
}
}
@@ -491,6 +496,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
ir_print_proto(ctx, insn->proto, f);
} else if (insn->op == IR_SYM) {
fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.name));
+ } else if (insn->op == IR_LABEL) {
+ fprintf(f, "label(%s)", ir_get_str(ctx, insn->val.name));
} else if (insn->op == IR_FUNC_ADDR) {
fprintf(f, "func *");
ir_print_const(ctx, insn, f, true);
@@ -648,6 +655,12 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
break;
+ case IR_OPND_LABEL_REF:
+ if (ref) {
+ IR_ASSERT(IR_IS_CONST_REF(ref));
+ fprintf(f, "%sc_%d", first ? "(" : ", ", -ref);
+ }
+ break;
}
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c
index 7a10da1322a..847ca375b5b 100644
--- a/ext/opcache/jit/ir/ir_emit.c
+++ b/ext/opcache/jit/ir/ir_emit.c
@@ -244,32 +244,30 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs
ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
type = arg->type;
if (IR_IS_TYPE_INT(type)) {
- if (arg->op == IR_ARGVAL) {
- continue;
- } else if (int_param < int_reg_params_count) {
+ if (int_param < int_reg_params_count && arg->op != IR_ARGVAL) {
regs[j] = int_reg_params[int_param];
count = j + 1;
+ int_param++;
+#ifdef _WIN64
+ /* WIN64 calling convention use common couter for int and fp registers */
+ fp_param++;
+#endif
} else {
regs[j] = IR_REG_NONE;
}
- int_param++;
-#ifdef _WIN64
- /* WIN64 calling convention use common couter for int and fp registers */
- fp_param++;
-#endif
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
if (fp_param < fp_reg_params_count) {
regs[j] = fp_reg_params[fp_param];
count = j + 1;
+ fp_param++;
+#ifdef _WIN64
+ /* WIN64 calling convention use common couter for int and fp registers */
+ int_param++;
+#endif
} else {
regs[j] = IR_REG_NONE;
}
- fp_param++;
-#ifdef _WIN64
- /* WIN64 calling convention use common couter for int and fp registers */
- int_param++;
-#endif
}
}
return count;
@@ -426,7 +424,7 @@ typedef struct _ir_common_backend_data {
ir_bitset emit_constants;
} ir_common_backend_data;
-static int ir_const_label(ir_ctx *ctx, ir_ref ref)
+static int ir_get_const_label(ir_ctx *ctx, ir_ref ref)
{
ir_common_backend_data *data = ctx->data;
int label = ctx->cfg_blocks_count - ref;
@@ -1015,11 +1013,16 @@ int ir_match(ir_ctx *ctx)
entries_count++;
}
ctx->rules[start] = IR_SKIPPED | IR_NOP;
+ if (ctx->ir_base[start].op == IR_BEGIN && ctx->ir_base[start].op2) {
+ ctx->flags2 |= IR_HAS_BLOCK_ADDR;
+ }
ref = bb->end;
if (bb->successors_count == 1) {
insn = &ctx->ir_base[ref];
if (insn->op == IR_END || insn->op == IR_LOOP_END) {
- ctx->rules[ref] = insn->op;
+ if (!ctx->rules[ref]) {
+ ctx->rules[ref] = insn->op;
+ }
ref = prev_ref[ref];
if (ref == start && ctx->cfg_edges[bb->successors] != b) {
if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index 74f7818d747..bab6b291607 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -755,8 +755,35 @@ IR_FOLD(NEG(C_FLOAT))
}
IR_FOLD(ABS(C_I8))
+{
+ IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
+ if (op1_insn->val.i64 >= 0) {
+ IR_FOLD_COPY(op1);
+ } else {
+ IR_FOLD_CONST_I(-op1_insn->val.i8);
+ }
+}
+
IR_FOLD(ABS(C_I16))
+{
+ IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
+ if (op1_insn->val.i64 >= 0) {
+ IR_FOLD_COPY(op1);
+ } else {
+ IR_FOLD_CONST_I(-op1_insn->val.i16);
+ }
+}
+
IR_FOLD(ABS(C_I32))
+{
+ IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
+ if (op1_insn->val.i64 >= 0) {
+ IR_FOLD_COPY(op1);
+ } else {
+ IR_FOLD_CONST_I((int32_t)-op1_insn->val.u32);
+ }
+}
+
IR_FOLD(ABS(C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
@@ -847,7 +874,7 @@ IR_FOLD(MUL_OV(C_U64, C_U64))
uint64_t res;
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
res = op1_insn->val.u64 * op2_insn->val.u64;
- if (op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64 && res <= max) {
+ if ((op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64) || res > max) {
IR_FOLD_NEXT;
}
IR_FOLD_CONST_U(res);
@@ -864,7 +891,7 @@ IR_FOLD(MUL_OV(C_I64, C_I64))
int64_t res;
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
res = op1_insn->val.u64 * op2_insn->val.u64;
- if (op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64 && res >= min && res <= max) {
+ if ((op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64) || res < min || res > max) {
IR_FOLD_NEXT;
}
IR_FOLD_CONST_U(res);
@@ -1037,220 +1064,220 @@ IR_FOLD(SHL(C_U8, C_U8))
IR_FOLD(SHL(C_CHAR, C_CHAR))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u8 << op2_insn->val.u8);
+ IR_FOLD_CONST_U(op1_insn->val.u8 << (op2_insn->val.u8 & 0x7));
}
IR_FOLD(SHL(C_I8, C_I8))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 << op2_insn->val.u8));
+ IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 << (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(SHL(C_U16, C_U16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u16 << op2_insn->val.u16);
+ IR_FOLD_CONST_U(op1_insn->val.u16 << (op2_insn->val.u16 & 0xf));
}
IR_FOLD(SHL(C_I16, C_I16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 << op2_insn->val.u16));
+ IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 << (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(SHL(C_U32, C_U32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u32 << op2_insn->val.u32);
+ IR_FOLD_CONST_U(op1_insn->val.u32 << (op2_insn->val.u32 & 0x1f));
}
IR_FOLD(SHL(C_I32, C_I32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 << op2_insn->val.u32));
+ IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 << (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(SHL(C_U64, C_U64))
IR_FOLD(SHL(C_I64, C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u64 << op2_insn->val.u64);
+ IR_FOLD_CONST_U(op1_insn->val.u64 << (op2_insn->val.u64 & 0x3f));
}
IR_FOLD(SHR(C_U8, C_U8))
IR_FOLD(SHR(C_CHAR, C_CHAR))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u8 >> op2_insn->val.u8);
+ IR_FOLD_CONST_U(op1_insn->val.u8 >> (op2_insn->val.u8 & 0x7));
}
IR_FOLD(SHR(C_I8, C_I8))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 >> op2_insn->val.u8));
+ IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 >> (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(SHR(C_U16, C_U16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u16 >> op2_insn->val.u16);
+ IR_FOLD_CONST_U(op1_insn->val.u16 >> (op2_insn->val.u16 & 0xf));
}
IR_FOLD(SHR(C_I16, C_I16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 >> op2_insn->val.u16));
+ IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 >> (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(SHR(C_U32, C_U32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u32 >> op2_insn->val.u32);
+ IR_FOLD_CONST_U(op1_insn->val.u32 >> (op2_insn->val.u32 & 0x1f));
}
IR_FOLD(SHR(C_I32, C_I32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 >> op2_insn->val.u32));
+ IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 >> (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(SHR(C_U64, C_U64))
IR_FOLD(SHR(C_I64, C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(op1_insn->val.u64 >> op2_insn->val.u64);
+ IR_FOLD_CONST_U(op1_insn->val.u64 >> (op2_insn->val.u64 & 0x3f));
}
IR_FOLD(SAR(C_U8, C_U8))
IR_FOLD(SAR(C_CHAR, C_CHAR))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U((uint8_t)(op1_insn->val.i8 >> op2_insn->val.i8));
+ IR_FOLD_CONST_U((uint8_t)(op1_insn->val.i8 >> (op2_insn->val.i8 & 0x7)));
}
IR_FOLD(SAR(C_I8, C_I8))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I(op1_insn->val.i8 >> op2_insn->val.i8);
+ IR_FOLD_CONST_I(op1_insn->val.i8 >> (op2_insn->val.i8 & 0x7));
}
IR_FOLD(SAR(C_U16, C_U16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U((uint16_t)(op1_insn->val.i16 >> op2_insn->val.i16));
+ IR_FOLD_CONST_U((uint16_t)(op1_insn->val.i16 >> (op2_insn->val.i16 & 0xf)));
}
IR_FOLD(SAR(C_I16, C_I16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I(op1_insn->val.i16 >> op2_insn->val.i16);
+ IR_FOLD_CONST_I(op1_insn->val.i16 >> (op2_insn->val.i16 & 0xf));
}
IR_FOLD(SAR(C_U32, C_U32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U((uint32_t)(op1_insn->val.i32 >> op2_insn->val.i32));
+ IR_FOLD_CONST_U((uint32_t)(op1_insn->val.i32 >> (op2_insn->val.i32 & 0x1f)));
}
IR_FOLD(SAR(C_I32, C_I32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I(op1_insn->val.i32 >> op2_insn->val.i32);
+ IR_FOLD_CONST_I(op1_insn->val.i32 >> (op2_insn->val.i32 & 0x1f));
}
IR_FOLD(SAR(C_U64, C_U64))
IR_FOLD(SAR(C_I64, C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I(op1_insn->val.i64 >> op2_insn->val.i64);
+ IR_FOLD_CONST_I(op1_insn->val.i64 >> (op2_insn->val.i64 & 0x3f));
}
IR_FOLD(ROL(C_U8, C_U8))
IR_FOLD(ROL(C_CHAR, C_CHAR))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, op2_insn->val.u8));
+ IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(ROL(C_I8, C_I8))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int8_t)ir_rol8(op1_insn->val.u8, op2_insn->val.u8));
+ IR_FOLD_CONST_I((int8_t)ir_rol8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(ROL(C_U16, C_U16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, op2_insn->val.u16));
+ IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(ROL(C_I16, C_I16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int16_t)ir_rol16(op1_insn->val.u16, op2_insn->val.u16));
+ IR_FOLD_CONST_I((int16_t)ir_rol16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(ROL(C_U32, C_U32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, op2_insn->val.u32));
+ IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(ROL(C_I32, C_I32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int32_t)ir_rol32(op1_insn->val.u32, op2_insn->val.u32));
+ IR_FOLD_CONST_I((int32_t)ir_rol32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(ROL(C_U64, C_U64))
IR_FOLD(ROL(C_I64, C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, op2_insn->val.u64));
+ IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, (op2_insn->val.u64 & 0x3f)));
}
IR_FOLD(ROR(C_U8, C_U8))
IR_FOLD(ROR(C_CHAR, C_CHAR))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, op2_insn->val.u8));
+ IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(ROR(C_I8, C_I8))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int8_t)ir_ror8(op1_insn->val.u8, op2_insn->val.u8));
+ IR_FOLD_CONST_I((int8_t)ir_ror8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7)));
}
IR_FOLD(ROR(C_U16, C_U16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, op2_insn->val.u16));
+ IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(ROR(C_I16, C_I16))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int16_t)ir_ror16(op1_insn->val.u16, op2_insn->val.u16));
+ IR_FOLD_CONST_I((int16_t)ir_ror16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf)));
}
IR_FOLD(ROR(C_U32, C_U32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, op2_insn->val.u32));
+ IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(ROR(C_I32, C_I32))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_I((int32_t)ir_ror32(op1_insn->val.u32, op2_insn->val.u32));
+ IR_FOLD_CONST_I((int32_t)ir_ror32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f)));
}
IR_FOLD(ROR(C_U64, C_U64))
IR_FOLD(ROR(C_I64, C_I64))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, op2_insn->val.u64));
+ IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, (op2_insn->val.u64 & 0x3f)));
}
//IR_FOLD(BSWAP(CONST))
@@ -1392,6 +1419,9 @@ IR_FOLD(TRUNC(C_U64))
IR_FOLD_CONST_U(op1_insn->val.u16);
case IR_U32:
IR_FOLD_CONST_U(op1_insn->val.u32);
+ case IR_ADDR:
+ IR_ASSERT(sizeof(void*) == 4);
+ IR_FOLD_CONST_U(op1_insn->val.u32);
}
}
@@ -1545,7 +1575,7 @@ IR_FOLD(FP2FP(C_DOUBLE))
IR_FOLD(COPY(_))
{
IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type);
- if (!op2) {
+ if (!(op2 & IR_COPY_HARD)) {
IR_FOLD_COPY(op1);
}
/* skip CSE */
@@ -2075,23 +2105,23 @@ IR_FOLD(SUB(ADD, ADD))
IR_FOLD_CONST_U(0);
} else if (op1_insn->op1 == op2_insn->op1) {
/* (a + b) - (a + c) => b - c */
- op1 = op1_insn->op2;
- op2 = op2_insn->op2;
+ op1 = _ir_fold_cast(ctx, op1_insn->op2, IR_OPT_TYPE(opt));
+ op2 = _ir_fold_cast(ctx, op2_insn->op2, IR_OPT_TYPE(opt));
IR_FOLD_RESTART;
} else if (op1_insn->op1 == op2_insn->op2) {
/* (a + b) - (c + a) => b - c */
- op1 = op1_insn->op2;
- op2 = op2_insn->op1;
+ op1 = _ir_fold_cast(ctx, op1_insn->op2, IR_OPT_TYPE(opt));
+ op2 = _ir_fold_cast(ctx, op2_insn->op1, IR_OPT_TYPE(opt));
IR_FOLD_RESTART;
} else if (op1_insn->op2 == op2_insn->op1) {
/* (a + b) - (b + c) => a - c */
- op1 = op1_insn->op1;
- op2 = op2_insn->op2;
+ op1 = _ir_fold_cast(ctx, op1_insn->op1, IR_OPT_TYPE(opt));
+ op2 = _ir_fold_cast(ctx, op2_insn->op2, IR_OPT_TYPE(opt));
IR_FOLD_RESTART;
} else if (op1_insn->op2 == op2_insn->op2) {
/* (a + b) - (c + b) => a - c */
- op1 = op1_insn->op1;
- op2 = op2_insn->op1;
+ op1 = _ir_fold_cast(ctx, op1_insn->op1, IR_OPT_TYPE(opt));
+ op2 = _ir_fold_cast(ctx, op2_insn->op1, IR_OPT_TYPE(opt));
IR_FOLD_RESTART;
}
}
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index 043e1e7bdd8..e6486ba64a1 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -361,20 +361,20 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
j = ctx->cfg_blocks[j].idom;
}
+ clone = ir_hashtab_find(&hash, j);
+ if (clone == IR_INVALID_VAL) {
+ clone = clones_count++;
+ ir_hashtab_add(&hash, j, clone);
+ clones[clone].block = j;
+ clones[clone].use_count = 0;
+ clones[clone].use = -1;
+ }
+ uses[uses_count].ref = use;
+ uses[uses_count].block = i;
+ uses[uses_count].next = clones[clone].use;
+ clones[clone].use_count++;
+ clones[clone].use = uses_count++;
}
- clone = ir_hashtab_find(&hash, j);
- if (clone == IR_INVALID_VAL) {
- clone = clones_count++;
- ir_hashtab_add(&hash, j, clone);
- clones[clone].block = j;
- clones[clone].use_count = 0;
- clones[clone].use = -1;
- }
- uses[uses_count].ref = use;
- uses[uses_count].block = i;
- uses[uses_count].next = clones[clone].use;
- clones[clone].use_count++;
- clones[clone].use = uses_count++;
}
}
@@ -1007,7 +1007,11 @@ int ir_schedule(ir_ctx *ctx)
start = i = bb->start;
_xlat[i] = bb->start = insns_count;
insn = &ctx->ir_base[i];
- if (insn->op == IR_CASE_VAL) {
+ if (insn->op == IR_BEGIN) {
+ if (insn->op2) {
+ consts_count += ir_count_constant(_xlat, insn->op2);
+ }
+ } else if (insn->op == IR_CASE_VAL) {
IR_ASSERT(insn->op2 < IR_TRUE);
consts_count += ir_count_constant(_xlat, insn->op2);
} else if (insn->op == IR_CASE_RANGE) {
@@ -1255,7 +1259,7 @@ int ir_schedule(ir_ctx *ctx)
const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
new_insn->proto = ir_strl(&new_ctx, proto, len);
}
- } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
+ } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) {
size_t len;
const char *str = ir_get_strl(ctx, new_insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, str, len);
@@ -1292,7 +1296,7 @@ int ir_schedule(ir_ctx *ctx)
} else {
new_insn->proto = 0;
}
- } else if (insn->op == IR_SYM || insn->op == IR_STR) {
+ } else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) {
size_t len;
const char *str = ir_get_strl(ctx, insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, str, len);
@@ -1364,6 +1368,8 @@ int ir_schedule(ir_ctx *ctx)
size_t len;
const char *str = ir_get_strl(ctx, insn->op2, &len);
new_insn->op2 = ir_strl(&new_ctx, str, len);
+ } else if (new_insn->op == IR_BEGIN && insn->op2) {
+ new_insn->op2 = _xlat[insn->op2];
} else {
new_insn->op2 = insn->op2;
}
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index 2f457cbc993..dbacc3967d0 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -887,7 +887,7 @@ void ir_print_escaped_str(const char *s, size_t len, FILE *f);
#define IR_IS_CONST_OP(op) ((op) > IR_NOP && (op) <= IR_C_FLOAT)
#define IR_IS_FOLDABLE_OP(op) ((op) <= IR_LAST_FOLDABLE_OP)
-#define IR_IS_SYM_CONST(op) ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC)
+#define IR_IS_SYM_CONST(op) ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC || (op) == IR_LABEL)
ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx);
@@ -946,12 +946,13 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
#define IR_OPND_UNUSED 0x0
#define IR_OPND_DATA 0x1
#define IR_OPND_CONTROL 0x2
-#define IR_OPND_CONTROL_DEP 0x3
-#define IR_OPND_CONTROL_REF 0x4
-#define IR_OPND_STR 0x5
-#define IR_OPND_NUM 0x6
-#define IR_OPND_PROB 0x7
-#define IR_OPND_PROTO 0x8
+#define IR_OPND_LABEL_REF 0x3
+#define IR_OPND_CONTROL_DEP 0x4
+#define IR_OPND_CONTROL_REF 0x5
+#define IR_OPND_STR 0x6
+#define IR_OPND_NUM 0x7
+#define IR_OPND_PROB 0x8
+#define IR_OPND_PROTO 0x9
#define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \
((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28))
@@ -1013,6 +1014,7 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
#define IR_HAS_VA_ARG_FP (1<<9)
#define IR_HAS_FP_RET_SLOT (1<<10)
#define IR_16B_FRAME_ALIGNMENT (1<<11)
+#define IR_HAS_BLOCK_ADDR (1<<12)
/* Temporary: MEM2SSA -> SCCP */
#define IR_MEM2SSA_VARS (1<<25)
@@ -1248,11 +1250,10 @@ struct _ir_live_range {
#define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2)
#define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3)
#define IR_LIVE_INTERVAL_MEM_PARAM (1<<4)
-#define IR_LIVE_INTERVAL_MEM_LOAD (1<<5)
-#define IR_LIVE_INTERVAL_COALESCED (1<<6)
-#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */
-#define IR_LIVE_INTERVAL_SPILLED (1<<8)
-#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9)
+#define IR_LIVE_INTERVAL_COALESCED (1<<5)
+#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<6) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */
+#define IR_LIVE_INTERVAL_SPILLED (1<<7)
+#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<8)
struct _ir_live_interval {
uint8_t type;
diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index 21c7ee3ac64..2e8a8e3f34f 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -776,9 +776,6 @@ int ir_compute_live_ranges(ir_ctx *ctx)
if (insn->op == IR_PARAM) {
/* We may reuse parameter stack slot for spilling */
ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
- } else if (insn->op == IR_VLOAD) {
- /* Load may be fused into the usage instruction */
- ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
}
def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
}
@@ -845,11 +842,17 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ival = ctx->live_intervals[v];
}
ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
- } else if (ctx->rules) {
- if (ctx->rules[input] & IR_FUSED) {
- ir_add_fusion_ranges(ctx, ref, input, bb, live);
- } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
- ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
+ } else {
+ if (ctx->rules) {
+ if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
+ ir_add_fusion_ranges(ctx, ref, input, bb, live);
+ } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
+ ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
+ }
+ }
+ if (reg != IR_REG_NONE) {
+ use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
+ ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
}
}
} else if (reg != IR_REG_NONE) {
@@ -1396,9 +1399,6 @@ int ir_compute_live_ranges(ir_ctx *ctx)
if (insn->op == IR_PARAM) {
/* We may reuse parameter stack slot for spilling */
ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
- } else if (insn->op == IR_VLOAD) {
- /* Load may be fused into the usage instruction */
- ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
}
def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
}
@@ -1465,17 +1465,17 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ival = ctx->live_intervals[v];
}
ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
- } else if (ctx->rules) {
- if (ctx->rules[input] & IR_FUSED) {
- ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
- } else {
- if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
+ } else {
+ if (ctx->rules) {
+ if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
+ ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
+ } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
}
- if (reg != IR_REG_NONE) {
- use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
- ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
- }
+ }
+ if (reg != IR_REG_NONE) {
+ use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
+ ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
}
}
} else if (reg != IR_REG_NONE) {
@@ -1605,7 +1605,7 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
}
while (*prev && ((*prev)->pos < use_pos->pos ||
((*prev)->pos == use_pos->pos &&
- (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) {
+ (use_pos->op_num == 0 || ((*prev)->op_num != 0 && (*prev)->op_num < use_pos->op_num))))) {
if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
(*prev)->hint_ref = 0;
}
@@ -1627,9 +1627,6 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
ctx->live_intervals[r1]->flags |=
IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
- if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) {
- ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD;
- }
if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM));
ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
@@ -2343,16 +2340,6 @@ static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos po
return p ? p->pos : 0x7fffffff;
}
-static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags)
-{
- ir_use_pos *p = ival->use_pos;
-
- while (p && !(p->flags & flags)) {
- p = p->next;
- }
- return p ? p->pos : 0x7fffffff;
-}
-
static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
{
ir_ref ref = IR_LIVE_POS_TO_REF(pos);
@@ -3194,7 +3181,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
/* split current before its first use position that requires a register */
ir_live_pos split_pos;
-spill_current:
if (next_use_pos == ival->range.start) {
IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
/* split right after definition */
@@ -3228,7 +3214,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
return IR_REG_NONE;
}
if (split_pos >= blockPos[reg]) {
-try_next_available_register:
IR_REGSET_EXCL(available, reg);
if (IR_REGSET_IS_EMPTY(available)) {
fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
@@ -3274,23 +3259,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
}
IR_LOG_LSRA(" ---- Finish", other, "");
} else {
- if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) {
- if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
- next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG);
- if (next_use_pos == ival->range.start) {
- IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
- /* split right after definition */
- split_pos = next_use_pos + 1;
- } else {
- split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
- }
-
- if (split_pos > ival->range.start) {
- goto spill_current;
- }
- }
- goto try_next_available_register;
- }
child = other;
other->reg = IR_REG_NONE;
if (prev) {
@@ -3400,12 +3368,13 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
{
ir_use_pos *use_pos = ival->use_pos;
- ir_insn *insn;
if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
- insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
+#if IR_DEBUG
+ ir_insn *insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
IR_ASSERT(insn->op == IR_PARAM);
+#endif
use_pos = use_pos->next;
if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
return 0;
@@ -3418,38 +3387,6 @@ static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_
}
}
- return 1;
- } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) {
- insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
- IR_ASSERT(insn->op == IR_VLOAD);
- IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR);
- use_pos = use_pos->next;
- if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
- return 0;
- }
-
- if (use_pos) {
- ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
- if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) {
- return 0;
- }
- /* check if VAR may be clobbered between VLOAD and use */
- ir_use_list *use_list = &ctx->use_lists[insn->op2];
- ir_ref n = use_list->count;
- ir_ref *p = &ctx->use_edges[use_list->refs];
- for (; n > 0; p++, n--) {
- ir_ref use = *p;
- if (ctx->ir_base[use].op == IR_VSTORE) {
- if (use > IR_LIVE_POS_TO_REF(ival->use_pos->pos) && use < IR_LIVE_POS_TO_REF(use_pos->pos)) {
- return 0;
- }
- } else if (ctx->ir_base[use].op == IR_VADDR) {
- return 0;
- }
- }
- }
- ival->stack_spill_pos = ctx->ir_base[insn->op2].op3;
-
return 1;
}
return 0;
@@ -3554,7 +3491,7 @@ static int ir_linear_scan(ir_ctx *ctx)
for (j = ctx->vregs_count; j != 0; j--) {
ival = ctx->live_intervals[j];
if (ival) {
- if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD))
+ if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
|| !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
ir_add_to_unhandled(&unhandled, ival);
}
diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index 5ba986fadd4..dd955172950 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -40,6 +40,11 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c
} else if (flags & IR_BUILTIN_FUNC) {
fprintf(f, " __builtin");
}
+ if (flags & IR_CONST_FUNC) {
+ fprintf(f, " __const");
+ } else if (flags & IR_PURE_FUNC) {
+ fprintf(f, " __pure");
+ }
}
static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
@@ -109,6 +114,10 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
fprintf(f, "sym(%s%s)",
(save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "",
ir_get_str(ctx, insn->val.name));
+ } else if (insn->op == IR_LABEL) {
+ fprintf(f, "label(%s%s)",
+ (save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "",
+ ir_get_str(ctx, insn->val.name));
} else if (insn->op == IR_FUNC_ADDR) {
fprintf(f, "func *");
ir_print_const(ctx, insn, f, true);
@@ -272,6 +281,13 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
break;
+ case IR_OPND_LABEL_REF:
+ if (ref) {
+ IR_ASSERT(IR_IS_CONST_REF(ref));
+ fprintf(f, "%sc_%d", first ? "(" : ", ", -ref);
+ first = 0;
+ }
+ break;
}
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index 45df92ec2be..e2f38a058ae 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -1508,8 +1508,8 @@ static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
switch (insn->op) {
case IR_FP2FP:
return 1;
- case IR_INT2FP:
- return ctx->use_lists[ref].count == 1;
+// case IR_INT2FP:
+// return ctx->use_lists[ref].count == 1;
case IR_NEG:
case IR_ABS:
return ctx->use_lists[ref].count == 1 &&
@@ -2110,7 +2110,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
} else {
- ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
+ ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
+ use_insn = &ctx->ir_base[use];
+ use_insn->op1 = tmp;
}
ir_bitqueue_add(worklist, use);
}
@@ -2119,7 +2121,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
} else {
- ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
+ ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
+ use_insn = &ctx->ir_base[use];
+ use_insn->op2 = tmp;
}
ir_bitqueue_add(worklist, use);
}
@@ -2147,7 +2151,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
} else {
- ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
+ ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
+ use_insn = &ctx->ir_base[use];
+ use_insn->op1 = tmp;
}
ir_bitqueue_add(worklist, use);
}
@@ -2156,7 +2162,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
} else {
- ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
+ ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
+ use_insn = &ctx->ir_base[use];
+ use_insn->op2 = tmp;
}
ir_bitqueue_add(worklist, use);
}
@@ -2178,7 +2186,8 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
&& !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
} else {
- ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
+ ir_ref tmp = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
+ ctx->ir_base[phi_ref].op2 = tmp;
}
return 1;
@@ -2251,42 +2260,6 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *
ir_ref prev, next;
ir_use_list *use_list;
- if (ctx->use_lists[begin].count > 1) {
- ir_ref *p, n, i, use;
- ir_insn *use_insn;
- ir_ref region = end;
- ir_ref next = IR_UNUSED;
-
- while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
- region = ctx->ir_base[region].op1;
- }
-
- use_list = &ctx->use_lists[begin];
- n = use_list->count;
- for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) {
- use = *p;
- use_insn = &ctx->ir_base[use];
- if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
- IR_ASSERT(!next);
- next = use;
- } else {
- IR_ASSERT(use_insn->op == IR_VAR);
- IR_ASSERT(use_insn->op1 == begin);
- use_insn->op1 = region;
- if (ir_use_list_add(ctx, region, use)) {
- /* restore after reallocation */
- use_list = &ctx->use_lists[begin];
- n = use_list->count;
- p = &ctx->use_edges[use_list->refs + i];
- }
- }
- }
-
- IR_ASSERT(next);
- ctx->use_edges[use_list->refs] = next;
- use_list->count = 1;
- }
-
IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
IR_ASSERT(ctx->ir_base[end].op == IR_END);
IR_ASSERT(ctx->ir_base[begin].op1 == end);
@@ -3595,7 +3568,10 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
if (!(ctx->flags & IR_OPT_CFG)) {
/* pass */
} else if (insn->op == IR_BEGIN) {
- if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) {
+ if (insn->op1
+ && !insn->op2 /* no computed goto label */
+ && ctx->use_lists[i].count == 1
+ && ctx->ir_base[insn->op1].op == IR_END) {
ir_merge_blocks(ctx, insn->op1, i, worklist);
}
} else if (insn->op == IR_MERGE) {
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 42e4eee7da0..7f714dd11d2 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -66,7 +66,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_
#define IR_SPILL_POS_TO_OFFSET(offset) \
((ctx->flags & IR_USE_FRAME_POINTER) ? \
- ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \
+ ((offset) - ctx->stack_frame_size) : \
((offset) + ctx->call_stack_size))
|.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1
@@ -892,6 +892,9 @@ typedef struct _ir_backend_data {
bool double_abs_const;
bool float_abs_const;
bool double_zero_const;
+ bool u2d_const;
+ bool u2f_const;
+ bool resolved_label_syms;
} ir_backend_data;
#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \
@@ -1087,6 +1090,7 @@ const char *ir_reg_name(int8_t reg, ir_type type)
_(SSE_TRUNC) \
_(SSE_NEARBYINT) \
_(BIT_OP) \
+ _(IGOTO_DUP) \
#define IR_LEA_FIRST IR_LEA_OB
#define IR_LEA_LAST IR_LEA_O_SYM
@@ -1110,35 +1114,24 @@ const char *ir_rule_name[IR_LAST_OP] = {
static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn)
{
- if (sizeof(void*) == 4) {
- return 1;
+ if (addr_insn->op == IR_LABEL) {
+ return 0;
} else if (IR_IS_SYM_CONST(addr_insn->op)) {
void *addr = ir_sym_addr(ctx, addr_insn);
if (!addr) {
return 0;
}
- return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr);
+ return (sizeof(void*) == 4) || IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr);
} else {
- return IR_IS_SIGNED_32BIT(addr_insn->val.i64);
+ return (sizeof(void*) == 4) || IR_IS_SIGNED_32BIT(addr_insn->val.i64);
}
}
static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn)
{
if (val_insn->type == IR_ADDR) {
- if (sizeof(void*) == 4) {
- return 1;
- } else if (IR_IS_SYM_CONST(val_insn->op)) {
- void *addr = ir_sym_addr(ctx, val_insn);
-
- if (!addr) {
- return 0;
- }
- return IR_IS_SIGNED_32BIT((intptr_t)addr);
- } else {
- return IR_IS_SIGNED_32BIT(val_insn->val.i64);
- }
+ return ir_may_fuse_addr(ctx, val_insn);
} else {
return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64));
}
@@ -1517,6 +1510,11 @@ op2_const:
constraints->tmp_regs[0] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
}
+ if (IR_IS_TYPE_UNSIGNED(ctx->ir_base[insn->op1].type)
+ && ir_type_size[ctx->ir_base[insn->op1].type] >= sizeof(void*)) {
+ constraints->tmp_regs[n] = IR_TMP_REG(2, ctx->ir_base[insn->op1].type, IR_USE_SUB_REF, IR_DEF_SUB_REF);
+ n++;
+ }
break;
case IR_ABS_INT:
flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
@@ -1542,6 +1540,7 @@ op2_const:
case IR_GUARD_NOT:
flags = IR_OP2_SHOULD_BE_IN_REG;
break;
+ case IR_IGOTO:
case IR_IJMP:
flags = IR_OP2_SHOULD_BE_IN_REG;
break;
@@ -1574,7 +1573,7 @@ op2_const:
}
break;
case IR_VA_ARG:
- flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG;
+ flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
n = 1;
insn = &ctx->ir_base[ref];
@@ -1669,7 +1668,9 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref)
do {
ir_insn *insn = &ctx->ir_base[*p];
- if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) {
+ if (insn->op != IR_LOAD
+ && insn->op != IR_LOAD_v
+ && ((insn->op != IR_STORE && insn->op != IR_STORE_v) || insn->op3 == addr_ref)) {
return;
}
p++;
@@ -1752,7 +1753,7 @@ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root)
do {
ir_insn *insn = &ctx->ir_base[pos];
- if (insn->op == IR_STORE) {
+ if (insn->op == IR_STORE || insn->op == IR_STORE_v || insn->op == IR_VSTORE || insn->op == IR_VSTORE_v) {
// TODO: check if LOAD and STORE addresses may alias
return 1;
} else if (insn->op == IR_CALL) {
@@ -1766,8 +1767,9 @@ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root)
static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root)
{
- if (ir_in_same_block(ctx, ref)
- && ctx->ir_base[ref].op == IR_LOAD) {
+ if (ir_in_same_block(ctx, ref) &&
+ (ctx->ir_base[ref].op == IR_LOAD || ctx->ir_base[ref].op == IR_LOAD_v ||
+ ctx->ir_base[ref].op == IR_VLOAD || ctx->ir_base[ref].op == IR_VLOAD_v)) {
if (ctx->use_lists[ref].count == 2
&& !ir_match_has_mem_deps(ctx, ref, root)) {
ir_ref addr_ref = ctx->ir_base[ref].op2;
@@ -1792,7 +1794,7 @@ static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root)
ir_insn *insn = &ctx->ir_base[ref];
if (ir_in_same_block(ctx, ref)
- && insn->op == IR_LOAD) {
+ && (insn->op == IR_LOAD || insn->op == IR_LOAD_v || insn->op == IR_VLOAD || insn->op == IR_VLOAD_v)) {
if (ctx->use_lists[ref].count == 2
&& !ir_match_has_mem_deps(ctx, ref, root)) {
ir_ref addr_ref = ctx->ir_base[ref].op2;
@@ -1814,8 +1816,6 @@ static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root)
&& ir_get_param_reg(ctx, ref) == IR_REG_NONE) {
return 1;
}
- } else if (ctx->ir_base[ref].op == IR_VLOAD) {
- return 1;
}
return 0;
}
@@ -2462,8 +2462,21 @@ binop_fp:
case IR_IJMP:
ir_match_fuse_load(ctx, insn->op2, ref);
return insn->op;
+ case IR_IGOTO:
+ if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
+ ir_insn *merge = &ctx->ir_base[insn->op1];
+ ir_ref *p, n = merge->inputs_count;
+
+ for (p = merge->ops + 1; n > 0; p++, n--) {
+ ir_ref input = *p;
+ IR_ASSERT(ctx->ir_base[input].op == IR_END || ctx->ir_base[input].op == IR_LOOP_END);
+ ctx->rules[input] = IR_IGOTO_DUP;
+ }
+ }
+ ir_match_fuse_load(ctx, insn->op2, ref);
+ return insn->op;
case IR_VAR:
- return IR_SKIPPED | IR_VAR;
+ return IR_STATIC_ALLOCA;
case IR_PARAM:
#ifndef _WIN64
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
@@ -2617,7 +2630,15 @@ store_int:
return IR_VSTORE_FP;
}
break;
+ case IR_VSTORE_v:
+ if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
+ return IR_VSTORE_INT;
+ } else {
+ return IR_VSTORE_FP;
+ }
+ break;
case IR_LOAD:
+ case IR_LOAD_v:
ir_match_fuse_addr(ctx, insn->op2);
if (IR_IS_TYPE_INT(insn->type)) {
return IR_LOAD_INT;
@@ -2635,6 +2656,14 @@ store_int:
return IR_STORE_FP;
}
break;
+ case IR_STORE_v:
+ ir_match_fuse_addr(ctx, insn->op2);
+ if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
+ return IR_STORE_INT;
+ } else {
+ return IR_STORE_FP;
+ }
+ break;
case IR_RLOAD:
if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) {
return IR_SKIPPED | IR_RLOAD;
@@ -3175,7 +3204,7 @@ static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref sr
| xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST)
}
} else {
- label = ir_const_label(ctx, src);
+ label = ir_get_const_label(ctx, src);
| ASM_FP_REG_TXT_OP movs, type, reg, [=>label]
}
}
@@ -3229,6 +3258,38 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
}
}
+static void ir_resolve_label_syms(ir_ctx *ctx)
+{
+ uint32_t b;
+ ir_block *bb;
+
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = b;
+ }
+ }
+}
+
+static void ir_emit_load_label_addr(ir_ctx *ctx, ir_reg reg, ir_insn *label)
+{
+ ir_backend_data *data = ctx->data;
+ dasm_State **Dst = &data->dasm_state;
+
+ if (!data->resolved_label_syms) {
+ data->resolved_label_syms = 1;
+ ir_resolve_label_syms(ctx);
+ }
+
+ IR_ASSERT(label->op == IR_LABEL);
+ int b = label->val.u32_hi;
+
+ b = ir_skip_empty_target_blocks(ctx, b);
+ | lea Ra(reg), aword [=>b]
+}
+
static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
{
if (IR_IS_CONST_REF(src)) {
@@ -3241,9 +3302,11 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
} else if (insn->op == IR_STR) {
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
- int label = ir_const_label(ctx, src);
+ int label = ir_get_const_label(ctx, src);
| lea Ra(reg), aword [=>label]
+ } else if (insn->op == IR_LABEL) {
+ ir_emit_load_label_addr(ctx, reg, insn);
} else {
ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
}
@@ -3289,7 +3352,7 @@ static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, i
IR_ASSERT(IR_IS_CONST_REF(src));
if (val_insn->op == IR_STR) {
- int label = ir_const_label(ctx, src);
+ int label = ir_get_const_label(ctx, src);
IR_ASSERT(tmp_reg != IR_REG_NONE);
|.if X64
@@ -3298,6 +3361,11 @@ static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, i
|.else
| ASM_TMEM_TXT_OP mov, aword, mem, =>label
|.endif
+ } else if (val_insn->op == IR_LABEL) {
+ IR_ASSERT(tmp_reg != IR_REG_NONE);
+ tmp_reg = IR_REG_NUM(tmp_reg);
+ ir_emit_load_label_addr(ctx, tmp_reg, val_insn);
+ ir_emit_store_mem_int(ctx, type, mem, tmp_reg);
} else {
int64_t val = val_insn->val.i64;
@@ -3726,7 +3794,8 @@ static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref)
ir_insn *load_insn = &ctx->ir_base[ref];
ir_reg reg;
- IR_ASSERT(load_insn->op == IR_LOAD);
+ IR_ASSERT(load_insn->op == IR_LOAD || load_insn->op == IR_LOAD_v ||
+ load_insn->op == IR_VLOAD || load_insn->op == IR_VLOAD_v);
if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
} else {
@@ -3762,9 +3831,11 @@ static void ir_emit_load_ex(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src, i
} else if (insn->op == IR_STR) {
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
- int label = ir_const_label(ctx, src);
+ int label = ir_get_const_label(ctx, src);
| lea Ra(reg), aword [=>label]
+ } else if (insn->op == IR_LABEL) {
+ ir_emit_load_label_addr(ctx, reg, insn);
} else {
ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
}
@@ -3862,7 +3933,7 @@ static void ir_emit_prologue(ir_ctx *ctx)
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
- offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size);
+ offset = -(ctx->stack_frame_size - ctx->locals_area_size);
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->locals_area_size + ctx->call_stack_size;
@@ -5607,7 +5678,7 @@ static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
break;
}
} else if (IR_IS_CONST_REF(op2)) {
- int label = ir_const_label(ctx, op2);
+ int label = ir_get_const_label(ctx, op2);
switch (insn->op) {
default:
@@ -5714,7 +5785,7 @@ static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn)
break;
}
} else if (IR_IS_CONST_REF(op2)) {
- int label = ir_const_label(ctx, op2);
+ int label = ir_get_const_label(ctx, op2);
switch (insn->op) {
default:
@@ -6126,7 +6197,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_
}
| ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg
} else if (IR_IS_CONST_REF(op2)) {
- int label = ir_const_label(ctx, op2);
+ int label = ir_get_const_label(ctx, op2);
| ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label]
} else {
@@ -6975,7 +7046,7 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
| fld1
} else {
- int label = ir_const_label(ctx, insn->op2);
+ int label = ir_get_const_label(ctx, insn->op2);
if (type == IR_DOUBLE) {
| fld qword [=>label]
@@ -7260,7 +7331,20 @@ static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (op1_reg != def_reg) {
+#ifdef IR_TARGET_X86
+ if (ir_type_size[dst_type] == 1
+ && (op1_reg == IR_REG_RBP || op1_reg == IR_REG_RSI || op1_reg == IR_REG_RDI)) {
+ ir_backend_data *data = ctx->data;
+ dasm_State **Dst = &data->dasm_state;
+
+ ir_emit_mov(ctx, src_type, def_reg, op1_reg);
+ | and Rb(def_reg), 0xff
+ } else {
+ ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
+ }
+#else
ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
+#endif
}
} else {
ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def);
@@ -7385,7 +7469,7 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
} else if (IR_IS_CONST_REF(insn->op1)) {
- int label = ir_const_label(ctx, insn->op1);
+ int label = ir_get_const_label(ctx, insn->op1);
| ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label]
} else {
@@ -7417,13 +7501,80 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
IR_ASSERT(IR_IS_TYPE_INT(src_type));
IR_ASSERT(IR_IS_TYPE_FP(dst_type));
IR_ASSERT(def_reg != IR_REG_NONE);
+
+ if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
+ op1_reg = IR_REG_NUM(op1_reg);
+ ir_emit_load(ctx, src_type, op1_reg, insn->op1);
+ }
+
+ if (IR_IS_TYPE_UNSIGNED(src_type) && ir_type_size[src_type] >= sizeof(void*)) {
+ ir_reg tmp_reg = ctx->regs[def][2];
+
+ IR_ASSERT(tmp_reg != IR_REG_NONE);
+ if (op1_reg == IR_REG_NONE) {
+ if (IR_IS_CONST_REF(insn->op1)) {
+ IR_ASSERT(0);
+ } else {
+ ir_mem mem;
+
+ if (ir_rule(ctx, insn->op1) & IR_FUSED) {
+ mem = ir_fuse_load(ctx, def, insn->op1);
+ } else {
+ mem = ir_ref_spill_slot(ctx, insn->op1);
+ }
+ ir_emit_load_mem_int(ctx, src_type, tmp_reg, mem);
+ op1_reg = tmp_reg;
+ }
+ }
+ if (sizeof(void*) == 4) {
+ if (tmp_reg == op1_reg) {
+ | add Rd(op1_reg), 0x80000000
+ } else {
+ | lea Rd(tmp_reg), dword [Rd(op1_reg)+0x80000000]
+ op1_reg = tmp_reg;
+ }
+ } else {
+|.if X64
+ | test Rq(op1_reg), Rq(op1_reg)
+ | js >1
+ |.cold_code
+ |1:
+ if (tmp_reg != op1_reg) {
+ | mov Rq(tmp_reg), Rq(op1_reg)
+ }
+ | shr Rq(tmp_reg), 1
+ | adc Rq(tmp_reg), 0
+ if (dst_type == IR_DOUBLE) {
+ if (ctx->mflags & IR_X86_AVX) {
+ | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg)
+ | vaddsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ } else {
+ | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg)
+ | addsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ }
+ } else {
+ IR_ASSERT(dst_type == IR_FLOAT);
+ if (ctx->mflags & IR_X86_AVX) {
+ | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg)
+ | vaddss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ } else {
+ | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg)
+ | addss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST)
+ }
+ }
+ | jmp >2
+ |.code
+|.endif
+ }
+ }
+
if (op1_reg != IR_REG_NONE) {
bool src64 = 0;
- if (IR_REG_SPILLED(op1_reg)) {
- op1_reg = IR_REG_NUM(op1_reg);
- ir_emit_load(ctx, src_type, op1_reg, insn->op1);
- }
if (IR_IS_TYPE_SIGNED(src_type)) {
if (ir_type_size[src_type] < 4) {
|.if X64
@@ -7462,7 +7613,6 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|| }
|.endif
} else {
- // TODO: uint64_t -> double
src64 = 1;
}
}
@@ -7508,6 +7658,40 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
|.endif
}
+ |2:
+ if (sizeof(void*) == 4 && IR_IS_TYPE_UNSIGNED(src_type) && ir_type_size[src_type] >= sizeof(void*)) {
+ if (dst_type == IR_DOUBLE) {
+ uint32_t c = (sizeof(void*) == 4) ? 0x41e00000 : 0x43e00000;
+ if (!data->u2d_const) {
+ data->u2d_const = 1;
+ ir_rodata(ctx);
+ |.align 8
+ |->u2d_const:
+ |.dword 0, c
+ |.code
+ }
+ if (ctx->mflags & IR_X86_AVX) {
+ | vaddsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [->u2d_const]
+ } else {
+ | addsd xmm(def_reg-IR_REG_FP_FIRST), qword [->u2d_const]
+ }
+ } else {
+ uint32_t c = (sizeof(void*) == 4) ? 0x4f000000 : 0x5f000000;
+ if (!data->u2f_const) {
+ data->u2f_const = 1;
+ ir_rodata(ctx);
+ |.align 4
+ |->u2f_const:
+ |.dword c
+ |.code
+ }
+ if (ctx->mflags & IR_X86_AVX) {
+ | vaddss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [->u2f_const]
+ } else {
+ | addss xmm(def_reg-IR_REG_FP_FIRST), dword [->u2f_const]
+ }
+ }
+ }
} else if (IR_IS_CONST_REF(insn->op1)) {
IR_ASSERT(0);
} else {
@@ -7625,7 +7809,7 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.endif
}
} else if (IR_IS_CONST_REF(insn->op1)) {
- int label = ir_const_label(ctx, insn->op1);
+ int label = ir_get_const_label(ctx, insn->op1);
if (!dst64) {
if (src_type == IR_DOUBLE) {
@@ -7746,7 +7930,7 @@ static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
} else if (IR_IS_CONST_REF(insn->op1)) {
- int label = ir_const_label(ctx, insn->op1);
+ int label = ir_get_const_label(ctx, insn->op1);
if (src_type == IR_DOUBLE) {
if (ctx->mflags & IR_X86_AVX) {
@@ -8429,7 +8613,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
- reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size);
+ reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size);
overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size;
} else {
fp = IR_REG_STACK_POINTER;
@@ -8588,11 +8772,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
| add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
} else {
- int size = (uint32_t)insn->op3 >> 3;
+ int size = IR_VA_ARG_SIZE(insn->op3);
if (def_reg != IR_REG_NONE) {
IR_ASSERT(type == IR_ADDR);
- int align = 1U << (insn->op3 & 0x7);
+ int align = IR_VA_ARG_ALIGN(insn->op3);
if (align > (int)sizeof(void*)) {
| add Ra(tmp_reg), (align-1)
@@ -8604,7 +8788,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
#endif
| mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
- if (def_reg && IR_REG_SPILLED(ctx->regs[def][0])) {
+ if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
#elif defined(IR_TARGET_X64)
@@ -8632,8 +8816,8 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (insn->op3) {
/* long struct arguemnt */
IR_ASSERT(type == IR_ADDR);
- int align = 1U << (insn->op3 & 0x7);
- int size = (uint32_t)insn->op3 >> 3;
+ int align = IR_VA_ARG_ALIGN(insn->op3);
+ int size = IR_VA_ARG_SIZE(insn->op3);
| mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))]
if (align > (int)sizeof(void*)) {
@@ -9701,6 +9885,19 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg op2_reg = ctx->regs[def][2];
if (IR_IS_CONST_REF(insn->op2)) {
+ if (ctx->ir_base[insn->op2].op == IR_LABEL) {
+ if (!data->resolved_label_syms) {
+ data->resolved_label_syms = 1;
+ ir_resolve_label_syms(ctx);
+ }
+
+ uint32_t target = ctx->ir_base[insn->op2].val.u32_hi;
+ target = ir_skip_empty_target_blocks(ctx, target);
+
+ | jmp =>target
+ return;
+ }
+
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) {
@@ -10478,6 +10675,7 @@ static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_re
{
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+ offset = IR_SPILL_POS_TO_OFFSET(offset);
IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE);
if (IR_IS_TYPE_INT(type)) {
@@ -10518,6 +10716,7 @@ static void ir_emit_load_params(ir_ctx *ctx)
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_offset = 0;
+ int32_t stack_start = 0;
#ifdef IR_TARGET_X86
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
@@ -10529,9 +10728,11 @@ static void ir_emit_load_params(ir_ctx *ctx)
#endif
if (ctx->flags & IR_USE_FRAME_POINTER) {
- stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */
+ /* skip old frame pointer and return address */
+ stack_start = sizeof(void*) * 2 + ctx->stack_frame_size;
} else {
- stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */
+ /* skip return address */
+ stack_start = sizeof(void*) + ctx->stack_frame_size;
}
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
@@ -10573,12 +10774,9 @@ static void ir_emit_load_params(ir_ctx *ctx)
if (ctx->vregs[use]) {
dst_reg = IR_REG_NUM(ctx->regs[use][0]);
IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
- stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
- ((ctx->flags & IR_USE_FRAME_POINTER) ?
- -(ctx->stack_frame_size - ctx->stack_frame_alignment) :
- ctx->call_stack_size));
+ stack_start + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos);
if (src_reg != dst_reg) {
- ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
+ ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_start + stack_offset);
}
if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) {
ir_emit_store(ctx, insn->type, use, dst_reg);
@@ -10665,7 +10863,7 @@ static void ir_fix_param_spills(ir_ctx *ctx)
if (ctx->flags & IR_USE_FRAME_POINTER) {
/* skip old frame pointer and return address */
- stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment);
+ stack_start = sizeof(void*) * 2 + ctx->stack_frame_size;
} else {
/* skip return address */
stack_start = sizeof(void*) + ctx->stack_frame_size;
@@ -10786,6 +10984,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
case IR_MERGE:
case IR_LOOP_BEGIN:
case IR_LOOP_END:
+ case IR_IGOTO_DUP:
break;
#ifndef IR_REG_FP_RET1
case IR_CALL:
@@ -10810,7 +11009,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
IR_REGSET_EXCL(available, reg);
ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
} else if (def_flags & IR_USE_MUST_BE_IN_REG) {
- if (insn->op == IR_VLOAD
+ if ((insn->op == IR_VLOAD || insn->op == IR_VLOAD_v)
&& ctx->live_intervals[ctx->vregs[i]]
&& ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1
&& ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) {
@@ -10850,7 +11049,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
use_insn = &ctx->ir_base[use];
- if (use_insn->op == IR_VLOAD) {
+ if (use_insn->op == IR_VLOAD || use_insn->op == IR_VLOAD_v) {
if (ctx->vregs[use]
&& !ctx->live_intervals[ctx->vregs[use]]) {
ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
@@ -10861,7 +11060,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ival->vreg = ctx->vregs[use];
ival->stack_spill_pos = stack_spill_pos;
}
- } else if (use_insn->op == IR_VSTORE) {
+ } else if (use_insn->op == IR_VSTORE || use_insn->op == IR_VSTORE_v) {
if (!IR_IS_CONST_REF(use_insn->op3)
&& ctx->vregs[use_insn->op3]
&& !ctx->live_intervals[ctx->vregs[use_insn->op3]]) {
@@ -11006,7 +11205,6 @@ void ir_fix_stack_frame(ir_ctx *ctx)
ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
ctx->stack_frame_size += additional_size;
- ctx->stack_frame_alignment = 0;
ctx->call_stack_size = 0;
if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) {
@@ -11014,12 +11212,10 @@ void ir_fix_stack_frame(ir_ctx *ctx)
if (!(ctx->flags & IR_FUNCTION)) {
while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
} else {
if (!(ctx->flags & IR_NO_STACK_COMBINE)) {
@@ -11028,7 +11224,6 @@ void ir_fix_stack_frame(ir_ctx *ctx)
while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) !=
ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) {
ctx->stack_frame_size += sizeof(void*);
- ctx->stack_frame_alignment += sizeof(void*);
}
}
}
@@ -11061,6 +11256,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
int ret;
void *entry;
size_t size;
+ ir_ref igoto_dup_ref = IR_UNUSED;
+ uint32_t igoto_dup_block = 0;
data.ra_data.unused_slot_4 = 0;
data.ra_data.unused_slot_2 = 0;
@@ -11073,11 +11270,13 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.double_abs_const = 0;
data.float_abs_const = 0;
data.double_zero_const = 0;
+ data.u2d_const = 0;
+ data.u2f_const = 0;
+ data.resolved_label_syms = 0;
ctx->data = &data;
if (!ctx->live_intervals) {
ctx->stack_frame_size = 0;
- ctx->stack_frame_alignment = 0;
ctx->call_stack_size = 0;
ctx->used_preserved_regs = 0;
ir_allocate_unique_spill_slots(ctx);
@@ -11099,7 +11298,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
}
ctx->stack_frame_size = ctx->fixed_stack_frame_size;
ctx->call_stack_size = ctx->fixed_call_stack_size;
- ctx->stack_frame_alignment = 0;
}
Dst = &data.dasm_state;
@@ -11420,6 +11618,35 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_TAILCALL:
ir_emit_tailcall(ctx, i, insn);
break;
+ case IR_IGOTO_DUP:
+ if (bb->flags & IR_BB_DESSA_MOVES) {
+ ir_emit_dessa_moves(ctx, b, bb);
+ }
+ IR_ASSERT(!igoto_dup_ref && !igoto_dup_block);
+ igoto_dup_ref = i;
+ igoto_dup_block = b;
+ b = ctx->cfg_edges[bb->successors];
+ bb = &ctx->cfg_blocks[b];
+ i = bb->start;
+ insn = &ctx->ir_base[i];
+ rule = &ctx->rules[i];
+ break;
+ case IR_IGOTO:
+ if ((ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN)
+ && (ctx->rules[ctx->ir_base[insn->op1].op1] & IR_RULE_MASK) == IR_IGOTO_DUP
+ && igoto_dup_ref) {
+ ir_emit_ijmp(ctx, i, insn);
+ b = igoto_dup_block;
+ bb = &ctx->cfg_blocks[b];
+ i = igoto_dup_ref;
+ insn = &ctx->ir_base[i];
+ rule = &ctx->rules[i];
+ igoto_dup_block= 0;
+ igoto_dup_ref = 0;
+ break;
+ }
+ IR_ASSERT(!igoto_dup_ref && !igoto_dup_block);
+ IR_FALLTHROUGH;
case IR_IJMP:
ir_emit_ijmp(ctx, i, insn);
break;
@@ -11449,6 +11676,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
ir_emit_vaddr(ctx, i, insn);
break;
case IR_VLOAD:
+ case IR_VLOAD_v:
ir_emit_vload(ctx, i, insn);
break;
case IR_VSTORE_INT:
@@ -11691,6 +11919,28 @@ next_block:;
} while (i != 0);
}
+ if ((ctx->flags2 & IR_HAS_BLOCK_ADDR) && ctx->loader && ctx->loader->add_label) {
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = 0;
+ ctx->loader->add_label(ctx->loader, ir_get_str(ctx, ctx->ir_base[insn->op2].val.str),
+ (char*)entry + dasm_getpclabel(&data.dasm_state, ir_skip_empty_target_blocks(ctx, b)));
+ }
+ }
+ } else if (data.resolved_label_syms) {
+ for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) {
+ ir_insn *insn = &ctx->ir_base[bb->start];
+
+ if (insn->op == IR_BEGIN && insn->op2) {
+ IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL);
+ ctx->ir_base[insn->op2].val.u32_hi = 0;
+ }
+ }
+ }
+
dasm_free(&data.dasm_state);
ir_mem_flush(entry, size);