Commit dd9421d8258 for php.net
commit dd9421d8258a0833258721397bce6f28631e7cf2
Author: Dmitry Stogov <dmitry@php.net>
Date: Tue Feb 10 01:34:09 2026 +0300
Update IR (#21183)
IR commit: a098f9ed6c2f1c2852d6c0921283212aafb4afed
diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index 745a66b2163..3476b9bb061 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -858,7 +858,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
{
ir_ref ref = ctx->prev_insn_chain[opt & IR_OPT_OP_MASK];
- ir_insn *insn;
+ const ir_insn *insn;
if (ref) {
ir_ref limit = ctx->fold_cse_limit;
@@ -954,7 +954,8 @@ IR_ALWAYS_INLINE ir_ref _ir_fold_cast(ir_ctx *ctx, ir_ref ref, ir_type type)
* ANY and UNUSED ops are represented by 0
*/
-ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn)
+ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
+ const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn)
{
uint8_t op;
ir_ref ref;
@@ -1136,9 +1137,9 @@ void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val)
ir_insn_set_op(insn, n, val);
}
-ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n)
+ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n)
{
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
#ifdef IR_DEBUG
if (n > 3) {
@@ -2025,7 +2026,7 @@ static ir_alias ir_check_aliasing(ir_ctx *ctx, ir_ref addr1, ir_ref addr2)
ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2, ir_type type1, ir_type type2)
{
- ir_insn *insn1, *insn2;
+ const ir_insn *insn1, *insn2;
ir_ref base1, base2, off1, off2;
/* this must be already check */
@@ -2117,9 +2118,9 @@ ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2
return IR_MAY_ALIAS;
}
-IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
+IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
{
- ir_insn *insn;
+ const ir_insn *insn;
uint32_t modified_regset = 0;
while (ref > limit) {
@@ -2159,7 +2160,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
} else if (insn->op == IR_RSTORE) {
modified_regset |= (1 << insn->op3);
} else if (insn->op == IR_CALL) {
- ir_insn *func = &ctx->ir_base[insn->op2];
+ const ir_insn *func = &ctx->ir_base[insn->op2];
ir_ref func_proto;
const ir_proto_t *proto;
@@ -2186,14 +2187,14 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
return IR_UNUSED;
}
-ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
+ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
{
return ir_find_aliasing_load_i(ctx, ref, type, addr, (addr > 0 && addr < ref) ? addr : 1);
}
-IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
+IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
{
- ir_insn *insn;
+ const ir_insn *insn;
while (ref > var) {
insn = &ctx->ir_base[ref];
@@ -2224,7 +2225,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
}
}
} else if (insn->op == IR_CALL) {
- ir_insn *func = &ctx->ir_base[insn->op2];
+ const ir_insn *func = &ctx->ir_base[insn->op2];
ir_ref func_proto;
const ir_proto_t *proto;
@@ -2251,7 +2252,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
return IR_UNUSED;
}
-ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
+ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
{
return ir_find_aliasing_vload_i(ctx, ref, type, var);
}
@@ -2547,12 +2548,12 @@ void _ir_BEGIN(ir_ctx *ctx, ir_ref src)
}
}
-static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
+static ir_ref _ir_fold_condition(const ir_ctx *ctx, ir_ref ref)
{
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_NE && IR_IS_CONST_REF(insn->op2)) {
- ir_insn *op2_insn = &ctx->ir_base[insn->op2];
+ const ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_TYPE_INT(op2_insn->type) && op2_insn->val.u64 == 0) {
ref = insn->op1;
@@ -2565,7 +2566,7 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
ref = insn->op1;
insn = &ctx->ir_base[ref];
} else if (insn->op == IR_EQ && insn->op2 == IR_NULL) {
- ir_insn *op1_insn = &ctx->ir_base[insn->op1];
+ const ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (op1_insn->op == IR_ALLOCA || op1_insn->op == IR_VADDR) {
return IR_FALSE;
}
@@ -2577,10 +2578,10 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
return ref;
}
-IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
+IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(const ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
{
- ir_insn *prev = NULL;
- ir_insn *insn;
+ const ir_insn *prev = NULL;
+ const ir_insn *insn;
while (ref > limit) {
insn = &ctx->ir_base[ref];
@@ -2610,7 +2611,7 @@ IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref
return condition;
}
-ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition)
+ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition)
{
IR_ASSERT(!IR_IS_CONST_REF(condition));
return ir_check_dominating_predicates_i(ctx, ref, condition, (condition < ref) ? condition : 1);
@@ -2751,7 +2752,7 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)
/* count inputs count */
do {
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op == IR_END);
ref = insn->op2;
@@ -2781,8 +2782,10 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)
ir_ref _ir_PHI_LIST(ir_ctx *ctx, ir_ref list)
{
- ir_insn *merge, *end;
- ir_ref phi, *ops, i;
+ const ir_insn *merge;
+ const ir_ref *ops;
+ ir_insn *end;
+ ir_ref phi, i;
ir_type type;
if (list == IR_UNUSED) {
@@ -3246,7 +3249,8 @@ ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var)
if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
ref = ir_find_aliasing_vload_i(ctx, ctx->control, type, var);
if (ref) {
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
+
if (insn->type == type) {
return ref;
} else if (ir_type_size[insn->type] == ir_type_size[type]) {
@@ -3312,7 +3316,8 @@ ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr)
}
ref = ir_find_aliasing_load_i(ctx, ctx->control, type, addr, (addr > 0) ? addr : 1);
if (ref) {
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
+
if (insn->type == type) {
return ref;
} else if (ir_type_size[insn->type] == ir_type_size[type]) {
diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index a274ceb5b16..b0a96b511bd 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -569,8 +569,6 @@ void ir_strtab_free(ir_strtab *strtab);
#define IR_OPT_CFG (1<<21) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
#define IR_OPT_MEM2SSA (1<<22)
#define IR_OPT_CODEGEN (1<<23)
-#define IR_GEN_NATIVE (1<<24)
-#define IR_GEN_CODE (1<<25)
/* debug related */
#ifdef IR_DEBUG
@@ -771,7 +769,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
-ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);
+ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n);
IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
@@ -865,13 +863,13 @@ int ir_reg_alloc(ir_ctx *ctx);
int ir_regs_number(void);
bool ir_reg_is_int(int32_t reg);
const char *ir_reg_name(int8_t reg, ir_type type);
-int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);
+int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref);
/* Target CPU instruction selection and code generation (see ir_x86.c) */
int ir_match(ir_ctx *ctx);
void *ir_emit_code(ir_ctx *ctx, size_t *size);
-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr);
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr);
void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr);
void ir_fix_thunk(void *thunk_entry, void *addr);
@@ -947,13 +945,14 @@ int ir_load_llvm_asm(ir_loader *loader, const char *filename);
#define IR_SAVE_REGS (1<<4) /* add info about selected registers */
#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */
+void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f);
void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);
/* IR debug dump API (implementation in ir_dump.c) */
void ir_dump(const ir_ctx *ctx, FILE *f);
-void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f);
+void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f);
void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
void ir_dump_cfg(ir_ctx *ctx, FILE *f);
void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index 88996cb6f98..5a6718b77c1 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -60,7 +60,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_
#define ADR_IMM (1<<20) // signed imm21
#define ADRP_IMM (1LL<<32) // signed imm21 * 4096
-static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr)
+static bool aarch64_may_use_b(const ir_code_buffer *code_buffer, const void *addr)
{
if (code_buffer) {
if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
@@ -824,6 +824,34 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
}
}
+static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
+{
+ ir_insn *insn = &ctx->ir_base[ref];
+
+ if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
+ ir_use_list *use_list = &ctx->use_lists[ref];
+ ir_ref n = use_list->count;
+
+ if (n > 0) {
+ ir_ref *p = ctx->use_edges + use_list->refs;
+
+ do {
+ insn = &ctx->ir_base[*p];
+ if (insn->op != IR_IF
+ && insn->op != IR_GUARD
+ && insn->op != IR_GUARD_NOT) {
+ return 0;
+ }
+ p++;
+ n--;
+ } while (n);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
{
ir_insn *op2_insn;
@@ -1145,7 +1173,7 @@ binop_fp:
return IR_RETURN_FP;
}
case IR_IF:
- if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -1168,13 +1196,13 @@ binop_fp:
}
case IR_GUARD:
case IR_GUARD_NOT:
- if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
- if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
+ if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
- && (insn->op2 == ref - 1 ||
- (insn->op2 == ctx->prev_ref[ref] - 1
- && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
+//??? && (insn->op2 == ref - 1 ||
+//??? (insn->op2 == ctx->prev_ref[ref] - 1
+//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_GUARD_CMP_INT;
@@ -3084,7 +3112,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
-static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
+static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@@ -3093,16 +3121,12 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
ir_ref op1, op2;
ir_reg op1_reg, op2_reg;
- if (op == IR_LT || op == IR_LE) {
- /* swap operands to avoid P flag check */
- op ^= 3;
- op1 = cmp_insn->op2;
- op2 = cmp_insn->op1;
- op1_reg = ctx->regs[cmp_ref][2];
- op2_reg = ctx->regs[cmp_ref][1];
+ op1 = cmp_insn->op1;
+ op2 = cmp_insn->op2;
+ if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
} else {
- op1 = cmp_insn->op1;
- op2 = cmp_insn->op2;
op1_reg = ctx->regs[cmp_ref][1];
op2_reg = ctx->regs[cmp_ref][2];
}
@@ -3131,7 +3155,7 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
- ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
+ ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn);
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag
@@ -3348,8 +3372,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
- ir_reg op1_reg = ctx->regs[insn->op2][1];
- ir_reg op2_reg = ctx->regs[insn->op2][2];
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[insn->op2][1];
+ op2_reg = ctx->regs[insn->op2][2];
+ }
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -3390,7 +3421,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
{
- ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
+ ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
}
@@ -3459,14 +3490,14 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
op3_reg = op2_reg;
}
}
- if (op3 != op2 && IR_REG_SPILLED(op3_reg)) {
+ if (IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
- if (op1 == op2) {
+ if (op1 == op3) {
op1_reg = op3_reg;
}
}
- if (op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
+ if (IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
ir_emit_load(ctx, op1_type, op1_reg, op1);
}
@@ -5682,9 +5713,16 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
- ir_reg op1_reg = ctx->regs[insn->op2][1];
- ir_reg op2_reg = ctx->regs[insn->op2][2];
void *addr;
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[insn->op2][1];
+ op2_reg = ctx->regs[insn->op2][2];
+ }
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -5738,7 +5776,7 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
- ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
+ ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (insn->op == IR_GUARD) {
@@ -7143,7 +7181,7 @@ static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, ui
return n;
}
-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
{
return !aarch64_may_use_b(code_buffer, addr);
}
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 46755067b24..bd314dcedb1 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -77,12 +77,86 @@ void ir_reset_cfg(ir_ctx *ctx)
}
}
+static void ir_remove_phis_inputs(ir_ctx *ctx, ir_use_list *use_list, int new_inputs_count, ir_bitset life_inputs)
+{
+ ir_ref i, j, n, k, *p, *q, use;
+ ir_insn *use_insn;
+
+ if (new_inputs_count == 1) {
+ for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
+ use = *p;
+ use_insn = &ctx->ir_base[use];
+ if (use_insn->op == IR_PHI) {
+ /* Convert PHI to COPY */
+ n = use_insn->inputs_count;
+ i = 2;
+ for (j = 2; j <= n; j++) {
+ ir_ref input = ir_insn_op(use_insn, j);
+
+ if (ir_bitset_in(life_inputs, j - 1)) {
+ use_insn->op1 = ir_insn_op(use_insn, j);
+ } else if (input > 0) {
+ ir_use_list_remove_one(ctx, input, use);
+ }
+ }
+ use_insn->op = IR_COPY;
+ use_insn->inputs_count = 1;
+ for (j = 2; j <= n; j++) {
+ ir_insn_set_op(use_insn, j, IR_UNUSED);
+ }
+ continue;
+ }
+
+ /*compact use list */
+ if (p != q){
+ *q = use;
+ }
+ q++;
+ }
+
+ if (p != q) {
+ use_list->count -= (p - q);
+ do {
+ *q = IR_UNUSED; /* clenu-op the removed tail */
+ q++;
+ } while (p != q);
+ }
+ } else {
+ for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
+ use = *p;
+ use_insn = &ctx->ir_base[use];
+ if (use_insn->op == IR_PHI) {
+ n = use_insn->inputs_count;
+ i = 2;
+ for (j = 2; j <= n; j++) {
+ ir_ref input = ir_insn_op(use_insn, j);
+
+ if (ir_bitset_in(life_inputs, j - 1)) {
+ IR_ASSERT(input);
+ if (i != j) {
+ ir_insn_set_op(use_insn, i, input);
+ }
+ i++;
+ } else if (input > 0) {
+ ir_use_list_remove_one(ctx, input, use);
+ }
+ }
+ use_insn->inputs_count = i - 1;
+ for (j = i; j <= n; j++) {
+ ir_insn_set_op(use_insn, j, IR_UNUSED);
+ }
+ }
+ }
+ }
+}
+
static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t *_blocks, ir_block *blocks, uint32_t bb_count)
{
uint32_t b, count = 0;
ir_block *bb = blocks + 1;
ir_insn *insn;
ir_ref i, j, n, *ops, input;
+ ir_bitset life_inputs = NULL;
for (b = 1; b <= bb_count; b++, bb++) {
bb->successors = count;
@@ -96,12 +170,27 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
for (i = 1, j = 1; i <= n; i++) {
input = ops[i];
if (_blocks[input]) {
+ if (life_inputs) {
+ ir_bitset_incl(life_inputs, i);
+ }
if (i != j) {
ops[j] = ops[i];
}
j++;
- } else if (input > 0) {
- ir_use_list_remove_one(ctx, input, bb->start);
+ } else {
+ if (ctx->use_lists[bb->start].count > 1) {
+ /* Some inputs of this MERGE are deleted and we have to update the depended PHIs */
+ if (!life_inputs) {
+ int k;
+ life_inputs = ir_bitset_malloc(n + 1);
+ for (k = 1; k < i; k++) {
+ ir_bitset_incl(life_inputs, k);
+ }
+ }
+ }
+ if (input > 0) {
+ ir_use_list_remove_one(ctx, input, bb->start);
+ }
}
}
j--;
@@ -115,6 +204,10 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
for (;j <= n; j++) {
ops[j] = IR_UNUSED;
}
+ if (life_inputs) {
+ ir_remove_phis_inputs(ctx, &ctx->use_lists[bb->start], insn->inputs_count, life_inputs);
+ ir_mem_free(life_inputs);
+ }
}
}
count += bb->predecessors_count;
@@ -375,8 +468,7 @@ static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from)
static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
{
- ir_ref i, j, n, k, *p, *q, use;
- ir_insn *use_insn;
+ ir_ref i, j, n;
ir_use_list *use_list;
ir_bitset life_inputs;
ir_insn *insn = &ctx->ir_base[merge];
@@ -402,80 +494,14 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
}
if (i == 1) {
insn->op = IR_BEGIN;
- insn->inputs_count = 1;
- use_list = &ctx->use_lists[merge];
- if (use_list->count > 1) {
- n++;
- for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
- use = *p;
- use_insn = &ctx->ir_base[use];
- if (use_insn->op == IR_PHI) {
- /* Convert PHI to COPY */
- i = 2;
- for (j = 2; j <= n; j++) {
- ir_ref input = ir_insn_op(use_insn, j);
-
- if (ir_bitset_in(life_inputs, j - 1)) {
- use_insn->op1 = ir_insn_op(use_insn, j);
- } else if (input > 0) {
- ir_use_list_remove_one(ctx, input, use);
- }
- }
- use_insn->op = IR_COPY;
- use_insn->inputs_count = 1;
- for (j = 2; j <= n; j++) {
- ir_insn_set_op(use_insn, j, IR_UNUSED);
- }
- continue;
- }
-
- /*compact use list */
- if (p != q){
- *q = use;
- }
- q++;
- }
-
- if (p != q) {
- use_list->count -= (p - q);
- do {
- *q = IR_UNUSED; /* clenu-op the removed tail */
- q++;
- } while (p != q);
- }
- }
- } else {
- insn->inputs_count = i;
+ }
+ insn->inputs_count = i;
- use_list = &ctx->use_lists[merge];
- if (use_list->count > 1) {
- n++;
- for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
- use = *p;
- use_insn = &ctx->ir_base[use];
- if (use_insn->op == IR_PHI) {
- i = 2;
- for (j = 2; j <= n; j++) {
- ir_ref input = ir_insn_op(use_insn, j);
-
- if (ir_bitset_in(life_inputs, j - 1)) {
- IR_ASSERT(input);
- if (i != j) {
- ir_insn_set_op(use_insn, i, input);
- }
- i++;
- } else if (input > 0) {
- ir_use_list_remove_one(ctx, input, use);
- }
- }
- use_insn->inputs_count = i - 1;
- for (j = i; j <= n; j++) {
- ir_insn_set_op(use_insn, j, IR_UNUSED);
- }
- }
- }
- }
+ use_list = &ctx->use_lists[merge];
+ if (use_list->count > 1) {
+ ir_remove_phis_inputs(ctx, use_list, i, life_inputs);
}
+
ir_mem_free(life_inputs);
ir_use_list_remove_all(ctx, from, merge);
}
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index 92962313d99..037003f021a 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -60,7 +60,7 @@ void ir_dump(const ir_ctx *ctx, FILE *f)
}
}
-void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
+void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f)
{
int DATA_WEIGHT = 0;
int CONTROL_WEIGHT = 5;
@@ -70,6 +70,13 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
uint32_t flags;
fprintf(f, "digraph %s {\n", name);
+ fprintf(f, "\tlabelloc=t;\n");
+ fprintf(f, "\tlabel=\"");
+ ir_print_func_proto(ctx, name, 0, f);
+ if (comments) {
+ fprintf(f, " # %s", comments);
+ }
+ fprintf(f, "\"\n");
fprintf(f, "\trankdir=TB;\n");
for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) {
fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]);
diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c
index a6dfde77f57..1cadb099bce 100644
--- a/ext/opcache/jit/ir/ir_emit.c
+++ b/ext/opcache/jit/ir/ir_emit.c
@@ -971,7 +971,7 @@ int ir_match(ir_ctx *ctx)
return 1;
}
-int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
+int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref)
{
int32_t offset;
diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index bab6b291607..136bbb0e08e 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -3439,5 +3439,84 @@ IR_FOLD(COND(_, _)) // TODO: COND(_, _, _)
if (op2 == op3) {
IR_FOLD_COPY(op2);
}
+
+ if (op1_insn->type == IR_BOOL) {
+ if (op2 == IR_TRUE) {
+ if (op3 == IR_FALSE) {
+ /* a ? true : false => a */
+ IR_FOLD_COPY(op1);
+ } else {
+ /* a ? true : b => a | b */
+ opt = IR_OPT(IR_OR, IR_BOOL);
+ op2 = op3;
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ }
+ } else if (op3 == IR_FALSE) {
+ /* a ? b : false => a & b */
+ opt = IR_OPT(IR_AND, IR_BOOL);
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ } else if (op2 == IR_FALSE) {
+ if (op3 == IR_TRUE) {
+ /* a ? flase : true => !a */
+ opt = IR_OPT(IR_NOT, IR_BOOL);
+ op2 = IR_UNUSED;
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ }
+ } else if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))
+ && IR_IS_CONST_REF(op2)
+ && IR_IS_CONST_REF(op3)
+ && op2_insn->val.u64 == 1
+ && op3_insn->val.u64 == 0) {
+ if (ir_type_size[IR_OPT_TYPE(opt)] > 1) {
+ /* a ? 1 : 0 => ZEXT(a) */
+ opt = IR_OPT(IR_ZEXT, IR_OPT_TYPE(opt));
+ } else {
+ /* a ? 1 : 0 => BITCAST(a) */
+ opt = IR_OPT(IR_BITCAST, IR_OPT_TYPE(opt));
+ }
+ op2 = IR_UNUSED;
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ }
+ } else if (IR_IS_TYPE_INT(op1_insn->type)) {
+ if (op2 == IR_TRUE) {
+ if (op3 == IR_FALSE) {
+ opt = IR_OPT(IR_NE, IR_BOOL);
+ val.u64 = 0;
+ op2 = ir_const(ctx, val, op1_insn->type);
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ }
+ } else if (op2 == IR_FALSE) {
+ if (op3 == IR_TRUE) {
+ opt = IR_OPT(IR_EQ, IR_BOOL);
+ val.u64 = 0;
+ op2 = ir_const(ctx, val, op1_insn->type);
+ op3 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ }
+ }
+ }
+
+ if (op1_insn->op == IR_NE) {
+ if (IR_IS_CONST_REF(op1_insn->op2)
+ && IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
+ && ctx->ir_base[op1_insn->op2].val.u64 == 0) {
+ op1 = op1_insn->op1;
+ IR_FOLD_RESTART;
+ }
+ } else if (op1_insn->op == IR_EQ) {
+ if (IR_IS_CONST_REF(op1_insn->op2)
+ && IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
+ && ctx->ir_base[op1_insn->op2].val.u64 == 0) {
+ op1 = op1_insn->op1;
+ SWAP_REFS(op2, op3);
+ IR_FOLD_RESTART;
+ }
+ }
+
IR_FOLD_NEXT;
}
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index 67c97611eaa..c644c188dca 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -262,7 +262,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
#endif
/* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
- * add them into TOTALLY_USEFUL set if all of their sucessors are already there.
+ * add them into TOTALLY_USEFUL set if all of their successors are already there.
*/
IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
_push_predecessors(ctx, &ctx->cfg_blocks[i], data);
@@ -788,7 +788,7 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
{
- ir_insn *insn = &ctx->ir_base[start];
+ ir_insn *insn = &ctx->ir_base[start];
uint32_t n = insn->inputs_count;
ir_ref *p = insn->ops + 1;
@@ -924,25 +924,120 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
ctx->cfg_blocks = new_blocks;
}
+#if IR_DEBUG
+static void ir_schedule_print_list(const ir_ctx *ctx, uint32_t b, const ir_ref *_next,
+ ir_ref start, ir_ref end, const char *label)
+{
+ ir_ref ref;
+
+ fprintf(stderr, " %s [%d", label, start);
+ ref = _next[start];
+ while (ref != end) {
+ fprintf(stderr, ",%d", ref);
+ ref = _next[ref];
+ }
+ fprintf(stderr, ",%d]\n", ref);
+}
+#endif
+
+/* Simple Stable Topological Sort */
+static void ir_schedule_topsort(const ir_ctx *ctx, uint32_t b, const ir_block *bb,
+ ir_ref *_xlat, ir_ref *_next, ir_ref *_prev,
+ ir_ref ref, ir_ref end,
+ ir_ref *insns_count, ir_ref *consts_count)
+{
+ ir_ref i = ref;
+ const ir_insn *insn;
+
+ if (bb->successors_count > 1) {
+ ir_ref input, j = bb->end;
+ ir_insn *end = &ctx->ir_base[j];
+
+ if (end->op == IR_IF) {
+ /* Move condition closer to IF */
+ input = end->op2;
+ if (input > 0
+ && ctx->cfg_map[input] == b
+ && !_xlat[input]
+ && _prev[j] != input
+ && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
+ if (input == i) {
+ i = _next[i];
+ insn = &ctx->ir_base[i];
+ }
+ /* remove "input" */
+ _prev[_next[input]] = _prev[input];
+ _next[_prev[input]] = _next[input];
+ /* insert before "j" */
+ _prev[input] = _prev[j];
+ _next[input] = j;
+ _next[_prev[j]] = input;
+ _prev[j] = input;
+ }
+ }
+ }
+
+ while (i != end) {
+ ir_ref n, j, input;
+ const ir_ref *p;
+
+restart:
+ IR_ASSERT(ctx->cfg_map[i] == b);
+ insn = &ctx->ir_base[i];
+ n = insn->inputs_count;
+ for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
+ input = *p;
+ if (!_xlat[input]) {
+ /* input is not scheduled yet */
+ if (input > 0) {
+ if (ctx->cfg_map[input] == b) {
+ /* "input" should be before "i" to satisfy dependency */
+#ifdef IR_DEBUG
+ if (ctx->flags & IR_DEBUG_SCHEDULE) {
+ fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
+ }
+#endif
+ /* remove "input" */
+ _prev[_next[input]] = _prev[input];
+ _next[_prev[input]] = _next[input];
+ /* insert before "i" */
+ _prev[input] = _prev[i];
+ _next[input] = i;
+ _next[_prev[i]] = input;
+ _prev[i] = input;
+ /* restart from "input" */
+ i = input;
+ goto restart;
+ }
+ } else if (input < IR_TRUE) {
+ *consts_count += ir_count_constant(_xlat, input);
+ }
+ }
+ }
+
+ _xlat[i] = *insns_count;
+ *insns_count += ir_insn_inputs_to_len(n);
+ IR_ASSERT(_next[i] != IR_UNUSED);
+ i = _next[i];
+ }
+}
+
int ir_schedule(ir_ctx *ctx)
{
- ir_ctx new_ctx;
ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
ir_ref *_xlat;
ir_ref *edges;
ir_ref prev_b_end;
uint32_t b;
- uint32_t *_blocks = ctx->cfg_map;
ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_block *bb;
- ir_insn *insn, *new_insn;
+ ir_insn *insn, *new_insn, *base;
ir_use_list *lists, *use_list, *new_list;
bool bad_bb_order = 0;
-
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
- IR_ASSERT(_blocks[1] == 1);
+ IR_ASSERT(ctx->cfg_map[1] == 1);
/* link BB boundaries */
_prev[1] = 0;
@@ -950,30 +1045,34 @@ int ir_schedule(ir_ctx *ctx)
_next[1] = prev_b_end;
_prev[prev_b_end] = 1;
for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) {
- _next[prev_b_end] = bb->start;
- _prev[bb->start] = prev_b_end;
- _next[bb->start] = bb->end;
- _prev[bb->end] = bb->start;
- prev_b_end = bb->end;
- if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
+ ir_ref start = bb->start;
+ ir_ref end = bb->end;
+ _next[prev_b_end] = start;
+ _prev[start] = prev_b_end;
+ _next[start] = end;
+ _prev[end] = start;
+ prev_b_end = end;
+ if (!ir_is_good_bb_order(ctx, b, bb, start)) {
bad_bb_order = 1;
}
}
_next[prev_b_end] = 0;
/* insert intermediate BB nodes */
- for (i = 2, j = 1; i < ctx->insns_count; i++) {
- b = _blocks[i];
+ use_edges_count = ctx->use_lists[1].count;
+ for (i = 2, use_list = &ctx->use_lists[i]; i < ctx->insns_count; use_list++, i++) {
+ b = ctx->cfg_map[i];
if (!b) continue;
+ use_edges_count += use_list->count;
bb = &ctx->cfg_blocks[b];
if (i != bb->start && i != bb->end) {
/* insert before "end" */
- ir_ref n = bb->end;
- ir_ref p = _prev[n];
- _prev[i] = p;
- _next[i] = n;
- _next[p] = i;
- _prev[n] = i;
+ ir_ref next = bb->end;
+ ir_ref prev = _prev[next];
+ _prev[i] = prev;
+ _next[i] = next;
+ _next[prev] = i;
+ _prev[next] = i;
}
}
@@ -981,15 +1080,6 @@ int ir_schedule(ir_ctx *ctx)
ir_fix_bb_order(ctx, _prev, _next);
}
-#ifdef IR_DEBUG
- if (ctx->flags & IR_DEBUG_SCHEDULE) {
- fprintf(stderr, "Before Schedule\n");
- for (i = 1; i != 0; i = _next[i]) {
- fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
- }
- }
-#endif
-
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE;
@@ -999,10 +1089,17 @@ int ir_schedule(ir_ctx *ctx)
insns_count = 1;
consts_count = -(IR_TRUE - 1);
- /* Topological sort according dependencies inside each basic block */
+ /* Schedule instructions inside each BB (now just topological sort according to dependencies) */
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
ir_ref start;
+#ifdef IR_DEBUG
+ if (ctx->flags & IR_DEBUG_SCHEDULE) {
+ fprintf(stderr, "BB%d\n", b);
+ ir_schedule_print_list(ctx, b, _next, bb->start, bb->end, "INITIAL");
+ }
+#endif
+
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
/* Schedule BB start */
start = i = bb->start;
@@ -1062,8 +1159,8 @@ int ir_schedule(ir_ctx *ctx)
for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) {
ir_ref use = *p;
ir_insn *use_insn = &ctx->ir_base[use];
- if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
- IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
+ if (!_xlat[use] && ctx->cfg_map[use]) {
+ IR_ASSERT(ctx->cfg_map[use] == b);
if (use_insn->op == IR_PARAM
|| use_insn->op == IR_VAR
|| use_insn->op == IR_PI
@@ -1100,76 +1197,20 @@ int ir_schedule(ir_ctx *ctx)
insn = &ctx->ir_base[i];
}
}
- if (bb->successors_count > 1) {
- ir_ref input, j = bb->end;
- ir_insn *end = &ctx->ir_base[j];
-
- if (end->op == IR_IF) {
- /* Move condition closer to IF */
- input = end->op2;
- if (input > 0
- && _blocks[input] == b
- && !_xlat[input]
- && _prev[j] != input
- && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
- if (input == i) {
- i = _next[i];
- insn = &ctx->ir_base[i];
- }
- /* remove "input" */
- _prev[_next[input]] = _prev[input];
- _next[_prev[input]] = _next[input];
- /* insert before "j" */
- _prev[input] = _prev[j];
- _next[input] = j;
- _next[_prev[j]] = input;
- _prev[j] = input;
- }
- }
+
+ if (i != bb->end) {
+ ir_schedule_topsort(ctx, b, bb, _xlat, _next, _prev, i, bb->end, &insns_count, &consts_count);
}
- while (i != bb->end) {
- ir_ref n, j, *p, input;
-restart:
- IR_ASSERT(_blocks[i] == b);
- n = insn->inputs_count;
- for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
- input = *p;
- if (!_xlat[input]) {
- /* input is not scheduled yet */
- if (input > 0) {
- if (_blocks[input] == b) {
- /* "input" should be before "i" to satisfy dependency */
#ifdef IR_DEBUG
- if (ctx->flags & IR_DEBUG_SCHEDULE) {
- fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
- }
-#endif
- /* remove "input" */
- _prev[_next[input]] = _prev[input];
- _next[_prev[input]] = _next[input];
- /* insert before "i" */
- _prev[input] = _prev[i];
- _next[input] = i;
- _next[_prev[i]] = input;
- _prev[i] = input;
- /* restart from "input" */
- i = input;
- insn = &ctx->ir_base[i];
- goto restart;
- }
- } else if (input < IR_TRUE) {
- consts_count += ir_count_constant(_xlat, input);
- }
- }
- }
- _xlat[i] = insns_count;
- insns_count += ir_insn_inputs_to_len(n);
- IR_ASSERT(_next[i] != IR_UNUSED);
- i = _next[i];
- insn = &ctx->ir_base[i];
+ if (ctx->flags & IR_DEBUG_SCHEDULE) {
+ ir_schedule_print_list(ctx, b, _next, start, bb->end, " FINAL");
}
+#endif
+
/* Schedule BB end */
+ i = bb->end;
+ insn = &ctx->ir_base[i];
_xlat[i] = bb->end = insns_count;
insns_count++;
if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
@@ -1179,15 +1220,6 @@ int ir_schedule(ir_ctx *ctx)
}
}
-#ifdef IR_DEBUG
- if (ctx->flags & IR_DEBUG_SCHEDULE) {
- fprintf(stderr, "After Schedule\n");
- for (i = 1; i != 0; i = _next[i]) {
- fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
- }
- }
-#endif
-
#if 1
/* Check if scheduling didn't make any modifications */
if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
@@ -1215,113 +1247,55 @@ int ir_schedule(ir_ctx *ctx)
ir_mem_free(_prev);
- ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
- new_ctx.insns_count = insns_count;
- new_ctx.flags2 = ctx->flags2;
- new_ctx.ret_type = ctx->ret_type;
- new_ctx.value_params = ctx->value_params;
- new_ctx.mflags = ctx->mflags;
- new_ctx.spill_base = ctx->spill_base;
- new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
- new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
- new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
- new_ctx.fixed_regset = ctx->fixed_regset;
- new_ctx.fixed_save_regset = ctx->fixed_save_regset;
- new_ctx.entries_count = ctx->entries_count;
-#if defined(IR_TARGET_AARCH64)
- new_ctx.deoptimization_exits = ctx->deoptimization_exits;
- new_ctx.get_exit_addr = ctx->get_exit_addr;
- new_ctx.get_veneer = ctx->get_veneer;
- new_ctx.set_veneer = ctx->set_veneer;
-#endif
- new_ctx.loader = ctx->loader;
+ uint32_t *map = ir_mem_calloc(insns_count, sizeof(uint32_t));
+ _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
+ lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
+ ir_ref *use_edges = edges = ir_mem_malloc(use_edges_count * sizeof(ir_ref));
+ base = ir_mem_malloc((consts_count + insns_count) * sizeof(ir_insn));
+ base += consts_count;
/* Copy constants */
- if (consts_count == ctx->consts_count) {
- new_ctx.consts_count = consts_count;
- ref = 1 - consts_count;
- insn = &ctx->ir_base[ref];
- new_insn = &new_ctx.ir_base[ref];
-
- memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
- if (ctx->strtab.data) {
- while (ref != IR_TRUE) {
- if (new_insn->op == IR_FUNC_ADDR) {
- if (new_insn->proto) {
- size_t len;
- const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
- new_insn->proto = ir_strl(&new_ctx, proto, len);
- }
- } else if (new_insn->op == IR_FUNC) {
- size_t len;
- const char *name = ir_get_strl(ctx, new_insn->val.name, &len);
- new_insn->val.u64 = ir_strl(&new_ctx, name, len);
- if (new_insn->proto) {
- const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
- new_insn->proto = ir_strl(&new_ctx, proto, len);
- }
- } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) {
- size_t len;
- const char *str = ir_get_strl(ctx, new_insn->val.name, &len);
- new_insn->val.u64 = ir_strl(&new_ctx, str, len);
- }
- new_insn++;
- ref++;
- }
+ if (ctx->consts_count == consts_count) {
+ memcpy(base - consts_count + 1, ctx->ir_base - consts_count + 1, sizeof(ir_insn) * consts_count);
+ for (j = -consts_count + 1; j < IR_TRUE; j++) {
+ _xlat[j] = j;
}
} else {
- new_ref = -new_ctx.consts_count;
- new_insn = &new_ctx.ir_base[new_ref];
- for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
- if (!_xlat[ref]) {
- continue;
+ ir_insn *src = ctx->ir_base - ctx->consts_count + 1;
+ ir_insn *dst = base - consts_count + 1;
+
+ i = -ctx->consts_count + 1;
+ j = -consts_count + 1;
+ while (i < IR_TRUE) {
+ if (_xlat[i]) {
+ *dst = *src;
+ dst->prev_const = 0;
+ _xlat[i] = j;
+ dst++;
+ j++;
}
- new_insn->optx = insn->optx;
- new_insn->prev_const = 0;
- if (insn->op == IR_FUNC_ADDR) {
- new_insn->val.u64 = insn->val.u64;
- if (insn->proto) {
- size_t len;
- const char *proto = ir_get_strl(ctx, insn->proto, &len);
- new_insn->proto = ir_strl(&new_ctx, proto, len);
- } else {
- new_insn->proto = 0;
- }
- } else if (insn->op == IR_FUNC) {
- size_t len;
- const char *name = ir_get_strl(ctx, insn->val.name, &len);
- new_insn->val.u64 = ir_strl(&new_ctx, name, len);
- if (insn->proto) {
- const char *proto = ir_get_strl(ctx, insn->proto, &len);
- new_insn->proto = ir_strl(&new_ctx, proto, len);
- } else {
- new_insn->proto = 0;
- }
- } else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) {
- size_t len;
- const char *str = ir_get_strl(ctx, insn->val.name, &len);
- new_insn->val.u64 = ir_strl(&new_ctx, str, len);
- } else {
- new_insn->val.u64 = insn->val.u64;
- }
- _xlat[ref] = new_ref;
- new_ref--;
- new_insn--;
+ src++;
+ i++;
}
- new_ctx.consts_count = -new_ref;
+ IR_ASSERT(j == IR_TRUE);
+ base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
+ base[IR_TRUE].val.u64 = 1;
+ base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
+ base[IR_FALSE].val.u64 = 0;
+ base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR);
+ base[IR_NULL].val.u64 = 0;
+ MAKE_NOP(&base[IR_UNUSED]);
}
- new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
- new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
- new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
- new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
-
/* Copy instructions, use lists and use edges */
+#ifdef IR_DEBUG
+ ir_ref orig_use_edges_count = use_edges_count;
+#endif
prev_ref = 0;
use_edges_count = 0;
for (i = 1; i != 0; i = _next[i]) {
new_ref = _xlat[i];
- new_ctx.cfg_map[new_ref] = _blocks[i];
+ map[new_ref] = ctx->cfg_map[i];
_prev[new_ref] = prev_ref;
prev_ref = new_ref;
@@ -1330,7 +1304,7 @@ int ir_schedule(ir_ctx *ctx)
k = 0;
if (n == 1) {
ref = ctx->use_edges[use_list->refs];
- if (_xlat[ref]) {
+ if (EXPECTED(_xlat[ref])) {
*edges = _xlat[ref];
edges++;
k = 1;
@@ -1339,7 +1313,7 @@ int ir_schedule(ir_ctx *ctx)
p = &ctx->use_edges[use_list->refs];
while (n--) {
ref = *p;
- if (_xlat[ref]) {
+ if (EXPECTED(_xlat[ref])) {
*edges = _xlat[ref];
edges++;
k++;
@@ -1353,7 +1327,7 @@ int ir_schedule(ir_ctx *ctx)
new_list->count = k;
insn = &ctx->ir_base[i];
- new_insn = &new_ctx.ir_base[new_ref];
+ new_insn = &base[new_ref];
new_insn->optx = insn->optx;
n = new_insn->inputs_count;
@@ -1365,11 +1339,7 @@ int ir_schedule(ir_ctx *ctx)
break;
case 1:
new_insn->op1 = _xlat[insn->op1];
- if (new_insn->op == IR_PARAM || new_insn->op == IR_VAR || new_insn->op == IR_PROTO) {
- size_t len;
- const char *str = ir_get_strl(ctx, insn->op2, &len);
- new_insn->op2 = ir_strl(&new_ctx, str, len);
- } else if (new_insn->op == IR_BEGIN && insn->op2) {
+ if (new_insn->op == IR_BEGIN && insn->op2) {
new_insn->op2 = _xlat[insn->op2];
} else {
new_insn->op2 = insn->op2;
@@ -1428,12 +1398,12 @@ int ir_schedule(ir_ctx *ctx)
}
/* Update list of terminators (IR_OPND_CONTROL_REF) */
- insn = &new_ctx.ir_base[1];
+ insn = &base[1];
ref = insn->op1;
if (ref) {
insn->op1 = ref = _xlat[ref];
while (1) {
- insn = &new_ctx.ir_base[ref];
+ insn = &base[ref];
ref = insn->op3;
if (!ref) {
break;
@@ -1442,36 +1412,33 @@ int ir_schedule(ir_ctx *ctx)
}
}
- IR_ASSERT(ctx->use_edges_count >= use_edges_count);
- new_ctx.use_edges_count = use_edges_count;
- new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
-
if (ctx->binding) {
ir_xlat_binding(ctx, _xlat);
- new_ctx.binding = ctx->binding;
- ctx->binding = NULL;
}
_xlat -= ctx->consts_count;
ir_mem_free(_xlat);
+ ir_mem_free(_next);
- new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
- new_ctx.cfg_edges_count = ctx->cfg_edges_count;
- new_ctx.cfg_blocks = ctx->cfg_blocks;
- new_ctx.cfg_edges = ctx->cfg_edges;
- ctx->cfg_blocks = NULL;
- ctx->cfg_edges = NULL;
- ctx->value_params = NULL;
- ir_code_buffer *saved_code_buffer = ctx->code_buffer;
-
- ir_free(ctx);
- IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
- IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
- memcpy(ctx, &new_ctx, sizeof(ir_ctx));
- ctx->code_buffer = saved_code_buffer;
- ctx->flags2 |= IR_LINEAR;
+ /* Switch to new IR buffer */
+ ir_mem_free(ctx->ir_base - ctx->consts_limit);
+ ctx->ir_base = base;
+ ctx->insns_count = ctx->insns_limit = insns_count;
+ ctx->consts_count = ctx->consts_limit = consts_count;
- ir_mem_free(_next);
+ ir_mem_free(ctx->use_lists);
+ ir_mem_free(ctx->use_edges);
+ IR_ASSERT(orig_use_edges_count >= use_edges_count);
+ ctx->use_lists = lists;
+ ctx->use_edges = use_edges;
+ ctx->use_edges_count = use_edges_count;
+
+ ir_mem_free(ctx->cfg_map);
+ ctx->cfg_map = map;
+
+ ctx->prev_ref = _prev;
+
+ ctx->flags2 |= IR_LINEAR;
return 1;
}
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index acd7e41a3e9..115c5121d75 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -908,7 +908,7 @@ IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
return 0;
}
-IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
+IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
{
if (ref == IR_TRUE) {
return 1;
@@ -1096,6 +1096,7 @@ void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref);
void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val);
/*** Iterative Optimization ***/
+void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist);
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist);
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist);
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist);
@@ -1179,16 +1180,17 @@ typedef enum _ir_fold_action {
IR_FOLD_DO_CONST
} ir_fold_action;
-ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn);
+ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
+ const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn);
/*** Alias Analyzes (see ir.c) ***/
-ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
-ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
+ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
+ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
ir_ref ir_find_aliasing_store(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);
ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);
/*** Predicates (see ir.c) ***/
-ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition);
+ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition);
/*** IR Live Info ***/
typedef ir_ref ir_live_pos;
@@ -1468,9 +1470,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
void ir_fix_stack_frame(ir_ctx *ctx);
/* Utility */
-ir_type ir_get_return_type(ir_ctx *ctx);
const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn);
-void ir_print_call_conv(uint32_t flags, FILE *f);
//#define IR_BITSET_LIVENESS
diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index 23f44482cb8..4a893410d49 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -3761,14 +3761,13 @@ static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t
{
char key[10];
- IR_ASSERT(reg != IR_REG_NONE);
if (!ctx->fused_regs) {
ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
ir_strtab_init(ctx->fused_regs, 8, 128);
}
memcpy(key, &root, sizeof(ir_ref));
memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
- ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
+ ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | (uint8_t)reg);
}
static void assign_regs(ir_ctx *ctx)
@@ -3874,93 +3873,88 @@ static void assign_regs(ir_ctx *ctx)
}
prev_use_ref = ref;
}
- } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
- && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
- if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
- && use_pos->hint != reg
-// && ctx->ir_base[ref].op != IR_CALL
-// && ctx->ir_base[ref].op != IR_TAILCALL) {
- && ctx->ir_base[ref].op != IR_SNAPSHOT
- && !needs_spill_load(ctx, ival, use_pos)) {
- /* fuse spill load (valid only when register is not reused) */
- reg = IR_REG_NONE;
- if (use_pos->next
- && use_pos->op_num == 1
- && use_pos->next->pos == use_pos->pos
- && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
- /* Support for R2 = BINOP(R1, R1) */
- if (use_pos->hint_ref < 0) {
- ref = -use_pos->hint_ref;
+ } else {
+ if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
+ && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
+ if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
+ && use_pos->hint != reg
+// && ctx->ir_base[ref].op != IR_CALL
+// && ctx->ir_base[ref].op != IR_TAILCALL) {
+ && ctx->ir_base[ref].op != IR_SNAPSHOT
+ && !needs_spill_load(ctx, ival, use_pos)) {
+ /* fuse spill load (valid only when register is not reused) */
+ reg = IR_REG_NONE;
+ if (use_pos->next
+ && use_pos->op_num == 1
+ && use_pos->next->pos == use_pos->pos
+ && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
+ /* Support for R2 = BINOP(R1, R1) */
+ if (use_pos->hint_ref < 0) {
+ ref = -use_pos->hint_ref;
+ }
+ ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
+ use_pos = use_pos->next;
}
- ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
- use_pos = use_pos->next;
- }
- } else {
- if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
- reg |= IR_REG_SPILL_SPECIAL;
} else {
- reg |= IR_REG_SPILL_LOAD;
- }
- if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
- uint32_t use_b = ctx->cfg_map[ref];
+ if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
+ reg |= IR_REG_SPILL_SPECIAL;
+ } else {
+ reg |= IR_REG_SPILL_LOAD;
+ }
+ if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
+ uint32_t use_b = ctx->cfg_map[ref];
- if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
- ir_bitset_incl(available, use_b);
+ if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
+ ir_bitset_incl(available, use_b);
+ }
+ prev_use_ref = ref;
}
- prev_use_ref = ref;
}
+ } else {
+ /* reuse register without spill load */
}
- if (use_pos->hint_ref < 0
- && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
- if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
- reg |= IR_REG_SPILL_SPECIAL;
+
+ if (use_pos->hint_ref < 0) {
+ if (use_pos->flags & IR_PHI_USE) {
+ IR_ASSERT(use_pos->hint_ref < 0);
+ IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
+ IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
+ if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
+ /* Spilled PHI var is passed through memory */
+ reg = IR_REG_NONE;
+ }
} else {
- reg |= IR_REG_SPILL_LOAD;
- }
- if (reg != old_reg) {
IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
- ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
- ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
- use_pos = use_pos->next;
- continue;
+ old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num);
+ if ((old_reg != IR_REG_NONE && reg != old_reg) || reg == IR_REG_NONE) {
+ ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
+ ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
+ use_pos = use_pos->next;
+ continue;
+ }
}
+ ref = -use_pos->hint_ref;
}
- } else if (use_pos->flags & IR_PHI_USE) {
- IR_ASSERT(use_pos->hint_ref < 0);
- IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
- IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
- if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
- /* Spilled PHI var is passed through memory */
- reg = IR_REG_NONE;
- }
- } else if (use_pos->hint_ref < 0
- && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
- if (reg != old_reg) {
- IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
- ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
- ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
- use_pos = use_pos->next;
- continue;
- }
- } else {
- /* reuse register without spill load */
- }
- if (use_pos->hint_ref < 0) {
- ref = -use_pos->hint_ref;
}
+
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
use_pos = use_pos->next;
}
- } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
+ } else {
use_pos = ival->use_pos;
while (use_pos) {
ref = IR_LIVE_POS_TO_REF(use_pos->pos);
- if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
+ if (ctx->ir_base[ref].op == IR_SNAPSHOT
+ && !(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
IR_ASSERT(use_pos->hint_ref >= 0);
/* A reference to a CPU spill slot */
reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
+ } else if (use_pos->hint_ref < 0 && !(use_pos->flags & IR_PHI_USE)) {
+ IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
+ ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
+ ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, IR_REG_NONE);
}
use_pos = use_pos->next;
}
diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index 51d7f96e518..3f1d943c687 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -18,7 +18,7 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f)
}
}
-void ir_print_call_conv(uint32_t flags, FILE *f)
+static void ir_print_call_conv(uint32_t flags, FILE *f)
{
switch (flags & IR_CALL_CONV_MASK) {
case IR_CC_BUILTIN:
@@ -75,6 +75,38 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c
}
}
+void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f)
+{
+ if (ctx->flags & IR_STATIC) {
+ fprintf(f, "static ");
+ }
+ fprintf(f, "func %s%s(",
+ prefix ? "@" : "",
+ name);
+ if (ctx->ir_base[2].op == IR_PARAM) {
+ ir_insn *insn = &ctx->ir_base[2];
+
+ fprintf(f, "%s", ir_type_cname[insn->type]);
+ insn++;
+ while (insn->op == IR_PARAM) {
+ fprintf(f, ", %s", ir_type_cname[insn->type]);
+ insn++;;
+ }
+ if (ctx->flags & IR_VARARG_FUNC) {
+ fprintf(f, ", ...");
+ }
+ } else if (ctx->flags & IR_VARARG_FUNC) {
+ fprintf(f, "...");
+ }
+ fprintf(f, "): %s", ir_type_cname[ctx->ret_type != (ir_type)-1 ? ctx->ret_type : IR_VOID]);
+ ir_print_call_conv(ctx->flags, f);
+ if (ctx->flags & IR_CONST_FUNC) {
+ fprintf(f, " __const");
+ } else if (ctx->flags & IR_PURE_FUNC) {
+ fprintf(f, " __pure");
+ }
+}
+
static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
{
uint32_t succ;
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index e2f38a058ae..bfec32b568f 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -19,7 +19,6 @@
#define IR_TOP IR_UNUSED
#define IR_BOTTOM IR_LAST_OP
-#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP)
@@ -27,17 +26,57 @@
#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref)
#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
-IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
+typedef struct {
+ union {
+ struct {
+ IR_STRUCT_LOHI(
+ union {
+ IR_STRUCT_LOHI(
+ union {
+ IR_STRUCT_LOHI(
+ uint8_t op, /* [IR_TOP - unreachable, IR_BOTTOM - reachable} for control */
+ /* {IR_TOP | IR_COPY() | IR_CONST() | IR_BOTTOM} for data */
+ /* {IR_TOP | IR_MERGE() | IR_BOTTOM} for IR_MERGE */
+ /* {IR_TOP | IR_IF() | IR_BOTTOM} for IR_IF and IR_SWITCH */
+ uint8_t type
+ );
+ uint16_t opt;
+ },
+ uint16_t _space_1
+ );
+ uint32_t optx;
+ },
+ union {
+ ir_ref copy; /* identity for IR_COPY */
+ ir_ref unfeasible_inputs; /* number of unfeasible inputs for IR_MERGE */
+ ir_ref single_output; /* reachable output for IR_IF */
+ ir_ref visited; /* for IR_TOP */
+ }
+ );
+ union {
+ struct {
+ ir_ref next; /* double-linked identities list for IR_COPY */
+ ir_ref prev; /* double-linked identities list for IR_COPY */
+ };
+ ir_val val; /* constant value for IR_CONST */
+ };
+ };
+ ir_insn insn; /* constant insn for IR_CONST */
+ };
+} ir_sccp_val;
+
+IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
}
-IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_add_uses(const ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
- ir_use_list *use_list;
- ir_ref n, *p, use;
+ const ir_use_list *use_list;
+ const ir_ref *p;
+ ir_ref n, use;
IR_ASSERT(!IR_IS_CONST_REF(ref));
use_list = &ctx->use_lists[ref];
@@ -50,23 +89,23 @@ IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueu
}
}
-IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_add_input(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
IR_ASSERT(_values[ref].op == IR_TOP);
/* do backward propagaton only once */
- if (!_values[ref].op1) {
- _values[ref].op1 = 1;
+ if (!_values[ref].visited) {
+ _values[ref].visited = 1;
ir_bitqueue_add(worklist, ref);
}
}
#if IR_COMBO_COPY_PROPAGATION
-IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
+IR_ALWAYS_INLINE ir_ref ir_sccp_identity(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
{
if (a > 0 && _values[a].op == IR_COPY) {
do {
- a = _values[a].op1;
+ a = _values[a].copy;
IR_ASSERT(a > 0);
} while (_values[a].op == IR_COPY);
IR_ASSERT(_values[a].op == IR_BOTTOM);
@@ -75,7 +114,7 @@ IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a
}
#if 0
-static void CHECK_LIST(ir_insn *_values, ir_ref ref)
+static void CHECK_LIST(ir_sccp_val *_values, ir_ref ref)
{
ir_ref member = _values[ref].op2;
while (member != ref) {
@@ -88,44 +127,44 @@ static void CHECK_LIST(ir_insn *_values, ir_ref ref)
# define CHECK_LIST(_values, ref)
#endif
-static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
+static void ir_sccp_add_identity(const ir_ctx *ctx, ir_sccp_val *_values, ir_ref src, ir_ref dst)
{
IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
_values[dst].optx = IR_COPY;
- _values[dst].op1 = src;
+ _values[dst].copy = src;
if (_values[src].op == IR_BOTTOM) {
/* initialize empty double-linked list */
- if (_values[src].op1 != src) {
- _values[src].op1 = src;
- _values[src].op2 = src;
- _values[src].op3 = src;
+ if (_values[src].copy != src) {
+ _values[src].copy = src;
+ _values[src].next = src;
+ _values[src].prev = src;
}
} else {
src = ir_sccp_identity(ctx, _values, src);
}
/* insert into circular double-linked list */
- ir_ref prev = _values[src].op3;
- _values[dst].op2 = src;
- _values[dst].op3 = prev;
- _values[src].op3 = dst;
- _values[prev].op2 = dst;
+ ir_ref prev = _values[src].prev;
+ _values[dst].next = src;
+ _values[dst].prev = prev;
+ _values[src].prev = dst;
+ _values[prev].next = dst;
CHECK_LIST(_values, dst);
}
-static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+static void ir_sccp_split_partition(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
ir_ref member, head, tail, next, prev;
CHECK_LIST(_values, ref);
IR_MAKE_BOTTOM(ref);
- _values[ref].op1 = ref;
+ _values[ref].copy = ref;
- member = _values[ref].op2;
+ member = _values[ref].next;
head = tail = IR_UNUSED;
while (member != ref) {
if (_values[member].op != IR_BOTTOM) {
@@ -133,19 +172,19 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
}
ir_sccp_add_uses(ctx, _values, worklist, member);
- next = _values[member].op2;
+ next = _values[member].next;
if (ir_sccp_identity(ctx, _values, member) == ref) {
/* remove "member" from the old circular double-linked list */
- prev = _values[member].op3;
- _values[prev].op2 = next;
- _values[next].op3 = prev;
+ prev = _values[member].prev;
+ _values[prev].next = next;
+ _values[next].prev = prev;
/* insert "member" into the new double-linked list */
if (!head) {
head = tail = member;
} else {
- _values[tail].op2 = member;
- _values[member].op3 = tail;
+ _values[tail].next = member;
+ _values[member].prev = tail;
tail = member;
}
}
@@ -153,26 +192,26 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
}
/* remove "ref" from the old circular double-linked list */
- next = _values[ref].op2;
- prev = _values[ref].op3;
- _values[prev].op2 = next;
- _values[next].op3 = prev;
+ next = _values[ref].next;
+ prev = _values[ref].prev;
+ _values[prev].next = next;
+ _values[next].prev = prev;
CHECK_LIST(_values, next);
/* close the new circle */
if (head) {
- _values[ref].op2 = head;
- _values[ref].op3 = tail;
- _values[tail].op2 = ref;
- _values[head].op3 = ref;
+ _values[ref].next = head;
+ _values[ref].prev = tail;
+ _values[tail].next = ref;
+ _values[head].prev = ref;
} else {
- _values[ref].op2 = ref;
- _values[ref].op3 = ref;
+ _values[ref].next = ref;
+ _values[ref].prev = ref;
}
CHECK_LIST(_values, ref);
}
-IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
if (_values[ref].op == IR_COPY) {
ir_sccp_split_partition(ctx, _values, worklist, ref);
@@ -187,7 +226,7 @@ IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_b
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
#endif
-IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
+IR_ALWAYS_INLINE bool ir_sccp_meet_const(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *val_insn)
{
IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
@@ -207,46 +246,51 @@ IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqu
return 1;
}
-IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
+IR_ALWAYS_INLINE bool ir_sccp_meet_copy(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
+{
+#if IR_COMBO_COPY_PROPAGATION
+ if (_values[ref].op == IR_COPY) {
+ /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
+ * (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
+ if (ir_sccp_identity(ctx, _values, ref) == ir_sccp_identity(ctx, _values, val)) {
+ return 0; /* not changed */
+ }
+ ir_sccp_split_partition(ctx, _values, worklist, ref);
+ return 1;
+ } else {
+ IR_ASSERT(_values[ref].op != IR_BOTTOM);
+ /* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
+ /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
+ ir_sccp_add_identity(ctx, _values, val, ref);
+ return 1;
+ }
+#endif
+ IR_MAKE_BOTTOM(ref);
+ return 1;
+}
+
+#if 0
+IR_ALWAYS_INLINE bool ir_sccp_meet(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
{
- ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
- ir_insn *val_insn;
+ const ir_insn *val_insn;
- if (IR_IS_CONST_REF(val_identity)) {
- val_insn = &ctx->ir_base[val_identity];
+ if (IR_IS_CONST_REF(val)) {
+ val_insn = &ctx->ir_base[val];
} else {
- val_insn = &_values[val_identity];
+ val_insn = &_values[val].insn;
if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
-#if IR_COMBO_COPY_PROPAGATION
- if (_values[ref].op == IR_COPY) {
- /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
- * (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
- if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
- return 0; /* not changed */
- }
- ir_sccp_split_partition(ctx, _values, worklist, ref);
- return 1;
- } else {
- IR_ASSERT(_values[ref].op != IR_BOTTOM);
- /* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
- /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
- ir_sccp_add_identity(ctx, _values, val, ref);
- return 1;
- }
-#endif
-
- IR_MAKE_BOTTOM(ref);
- return 1;
+ return ir_sccp_meet_copy(ctx, _values, worklist, ref, val);
}
}
return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
}
+#endif
-static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
+static ir_ref ir_sccp_fold(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *insn)
{
- ir_insn *op1_insn, *op2_insn, *op3_insn;
+ const ir_insn *op1_insn, *op2_insn, *op3_insn;
ir_ref op1, op2, op3, copy;
uint32_t opt = insn->opt;
@@ -255,11 +299,11 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist,
op3 = ir_sccp_identity(ctx, _values, insn->op3);
restart:
- op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
- op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
- op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
+ op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? &_values[op1].insn : ctx->ir_base + op1;
+ op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? &_values[op2].insn : ctx->ir_base + op2;
+ op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? &_values[op3].insn : ctx->ir_base + op3;
- switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
+ switch (ir_folding((ir_ctx*)ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
case IR_FOLD_DO_RESTART:
opt = ctx->fold_insn.optx;
op1 = ctx->fold_insn.op1;
@@ -272,19 +316,30 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist,
return 1;
case IR_FOLD_DO_COPY:
copy = ctx->fold_insn.op1;
- return ir_sccp_meet(ctx, _values, worklist, ref, copy);
+ if (IR_IS_CONST_REF(copy)) {
+ insn = &ctx->ir_base[copy];
+ } else {
+ insn = &_values[copy].insn;
+ if (!IR_IS_CONST_OP(insn->op) && !IR_IS_SYM_CONST(insn->op)) {
+ return ir_sccp_meet_copy(ctx, _values, worklist, ref, copy);
+ }
+ }
+ goto meet_const;
case IR_FOLD_DO_CONST:
- return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
+ insn = &ctx->fold_insn;
+meet_const:
+ return ir_sccp_meet_const(ctx, _values, worklist, ref, insn);
default:
IR_ASSERT(0);
return 0;
}
}
-static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
+static bool ir_sccp_analyze_phi(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref i, const ir_insn *insn)
{
- ir_ref j, n, input, *merge_input, *p;
- ir_insn *v, *new_const = NULL;
+ ir_ref j, n, input;
+ const ir_ref *merge_input, *p;
+ const ir_insn *v, *new_const = NULL;
#if IR_COMBO_COPY_PROPAGATION
ir_ref new_copy = IR_UNUSED;
ir_ref new_copy_identity = IR_UNUSED;
@@ -315,7 +370,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
} else if (input == i) {
continue;
} else {
- v = &_values[input];
+ v = &_values[input].insn;
if (v->op == IR_TOP) {
ir_sccp_add_input(ctx, _values, worklist, input);
continue;
@@ -369,7 +424,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
} else if (input == i) {
continue;
} else {
- v = &_values[input];
+ v = &_values[input].insn;
if (v->op == IR_TOP) {
ir_sccp_add_input(ctx, _values, worklist, input);
continue;
@@ -398,7 +453,9 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
#if IR_COMBO_COPY_PROPAGATION
if (new_copy) {
- return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
+ IR_ASSERT(!IR_IS_CONST_REF(new_copy));
+ IR_ASSERT(!IR_IS_CONST_OP(_values[new_copy].op) && !IR_IS_SYM_CONST(_values[new_copy].op));
+ return ir_sccp_meet_copy(ctx, _values, worklist, i, new_copy);
}
#endif
@@ -409,7 +466,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
return 1;
}
-static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
+static bool ir_is_dead_load_ex(const ir_ctx *ctx, ir_ref ref, uint32_t flags, const ir_insn *insn)
{
if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
return ctx->use_lists[ref].count == 1;
@@ -419,10 +476,10 @@ static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn
return 0;
}
-static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_dead_load(const ir_ctx *ctx, ir_ref ref)
{
if (ctx->use_lists[ref].count == 1) {
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
uint32_t flags = ir_op_flags[insn->op];
if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
@@ -434,7 +491,7 @@ static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
return 0;
}
-static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_dead(const ir_ctx *ctx, ir_ref ref)
{
if (ctx->use_lists[ref].count == 0) {
return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
@@ -444,28 +501,28 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
return 0;
}
-static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
+static bool ir_sccp_is_true(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
{
- ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
+ const ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
return ir_const_is_true(v);
}
-static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
+static bool ir_sccp_is_equal(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b)
{
- ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
- ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
+ const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
+ const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
return v1->val.u64 == v2->val.u64;
}
-static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
+static bool ir_sccp_in_range(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b, ir_ref c)
{
- ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
- ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
- ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
+ const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
+ const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
+ const ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c].insn;
IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
@@ -478,13 +535,13 @@ static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b,
}
#ifdef IR_SCCP_TRACE
-static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_val(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
if (IR_IS_BOTTOM(i)) {
fprintf(stderr, "BOTTOM");
} else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
fprintf(stderr, "CONST(");
- ir_print_const(ctx, &_values[i], stderr, true);
+ ir_print_const(ctx, &_values[i].insn, stderr, true);
fprintf(stderr, ")");
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[i].op == IR_COPY) {
@@ -501,13 +558,13 @@ static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
}
}
-static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_start(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
fprintf(stderr, "%d. ", i);
ir_sccp_trace_val(ctx, _values, i);
}
-static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_end(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
fprintf(stderr, " -> ");
ir_sccp_trace_val(ctx, _values, i);
@@ -518,11 +575,12 @@ static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
# define ir_sccp_trace_end(c, v, i)
#endif
-static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
+static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
{
- ir_ref i, j, n, *p, use;
- ir_use_list *use_list;
- ir_insn *insn, *use_insn;
+ ir_ref i, j, n, use;
+ const ir_ref *p;
+ const ir_use_list *use_list;
+ const ir_insn *insn, *use_insn;
uint32_t flags;
/* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
@@ -610,7 +668,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
}
for (p = insn->ops + 1; n > 0; p++, n--) {
- ir_ref input = *p;
+ const ir_ref input = *p;
IR_ASSERT(input > 0);
if (!IR_IS_REACHABLE(input)) {
unfeasible_inputs++;
@@ -618,9 +676,9 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
if (unfeasible_inputs == 0) {
IR_MAKE_BOTTOM(i);
- } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
+ } else if (_values[i].op != IR_MERGE || _values[i].unfeasible_inputs != unfeasible_inputs) {
_values[i].optx = IR_MERGE;
- _values[i].op1 = unfeasible_inputs;
+ _values[i].unfeasible_inputs = unfeasible_inputs;
} else {
continue;
}
@@ -674,10 +732,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
if (_values[i].op == IR_TOP) {
_values[i].optx = IR_IF;
- _values[i].op1 = use;
+ _values[i].single_output = use;
ir_bitqueue_add(worklist, use);
continue;
- } else if (_values[i].op == IR_IF && _values[i].op1 == use) {
+ } else if (_values[i].op == IR_IF && _values[i].single_output == use) {
continue;
}
}
@@ -715,10 +773,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
use_insn = &ctx->ir_base[use_case];
if (_values[i].op == IR_TOP) {
_values[i].optx = IR_IF;
- _values[i].op1 = use_case;
+ _values[i].single_output = use_case;
ir_bitqueue_add(worklist, use_case);
continue;
- } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
+ } else if (_values[i].op == IR_IF || _values[i].single_output == use_case) {
continue;
}
}
@@ -768,18 +826,20 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
for (i = 1; i < ctx->insns_count; i++) {
if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
fprintf(stderr, "%d. CONST(", i);
- ir_print_const(ctx, &_values[i], stderr, true);
+ ir_print_const(ctx, &_values[i].insn, stderr, true);
fprintf(stderr, ")\n");
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[i].op == IR_COPY) {
- fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
+ fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].copy);
#endif
} else if (IR_IS_TOP(i)) {
- fprintf(stderr, "%d. TOP\n", i);
+ if (ctx->ir_base[i].op != IR_TOP) {
+ fprintf(stderr, "%d. TOP\n", i);
+ }
} else if (_values[i].op == IR_IF) {
- fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
+ fprintf(stderr, "%d. IF(%d)\n", i, _values[i].single_output);
} else if (_values[i].op == IR_MERGE) {
- fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
+ fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].unfeasible_inputs);
} else if (!IR_IS_BOTTOM(i)) {
fprintf(stderr, "%d. %d\n", i, _values[i].op);
}
@@ -806,7 +866,7 @@ static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
}
}
-static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
+static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p;
ir_insn *insn;
@@ -829,7 +889,7 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi
}
}
-static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
+static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p, use, i;
ir_insn *insn;
@@ -907,7 +967,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r
CLEAR_USES(ref);
}
-static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
+static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref dst)
{
ir_ref next;
ir_insn *insn, *next_insn;
@@ -1054,10 +1114,10 @@ static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_i
return 1;
}
-static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
+static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
{
ir_ref i, j;
- ir_insn *value;
+ const ir_sccp_val *value;
for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
if (value->op == IR_BOTTOM) {
@@ -1072,7 +1132,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
#if IR_COMBO_COPY_PROPAGATION
} else if (value->op == IR_COPY) {
- ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
+ ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->copy), iter_worklist);
#endif
} else if (value->op == IR_TOP) {
/* remove unreachable instruction */
@@ -1104,7 +1164,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
}
} else if (value->op == IR_IF) {
/* remove one way IF/SWITCH */
- ir_sccp_remove_if(ctx, _values, i, value->op1);
+ ir_sccp_remove_if(ctx, _values, i, value->single_output);
} else if (value->op == IR_MERGE) {
/* schedule merge to remove unfeasible MERGE inputs */
ir_bitqueue_add(worklist, i);
@@ -1121,6 +1181,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
/* Iterative Optimizations */
/***************************/
+void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
+{
+ ir_use_list *use_list = &ctx->use_lists[ref];
+ ir_ref *p, n = use_list->count;
+
+ for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
+ ir_bitqueue_add(worklist, *p);
+ }
+}
+
/* Modification of some instruction may open new optimization oprtunities for other
* instructions that use this one.
*
@@ -1132,16 +1202,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
*
* TODO: Think abput a more general solution ???
*/
-static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
+static void ir_iter_add_related_uses(const ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
{
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_ADD || insn->op == IR_SUB) {
- ir_use_list *use_list = &ctx->use_lists[ref];
+ const ir_use_list *use_list = &ctx->use_lists[ref];
if (use_list->count == 1) {
ir_ref use = ctx->use_edges[use_list->refs];
- ir_insn *use_insn = &ctx->ir_base[ref];
+ const ir_insn *use_insn = &ctx->ir_base[ref];
if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
ir_bitqueue_add(worklist, use);
@@ -1266,16 +1336,17 @@ void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir
}
}
-static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
+static ir_ref ir_iter_find_cse1(const ir_ctx *ctx, uint32_t optx, ir_ref op1)
{
IR_ASSERT(!IR_IS_CONST_REF(op1));
- ir_use_list *use_list = &ctx->use_lists[op1];
- ir_ref *p, n = use_list->count;
+ const ir_use_list *use_list = &ctx->use_lists[op1];
+ const ir_ref *p;
+ ir_ref n = use_list->count;
for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
ir_ref use = *p;
- ir_insn *use_insn = &ctx->ir_base[use];
+ const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->optx == optx) {
IR_ASSERT(use_insn->op1 == op1);
@@ -1285,12 +1356,13 @@ static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
return IR_UNUSED;
}
-static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
+static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
{
uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
- ir_use_list *use_list = NULL;
- ir_ref *p, use;
- ir_insn *use_insn;
+ const ir_use_list *use_list = NULL;
+ const ir_ref *p;
+ ir_ref use;
+ const ir_insn *use_insn;
if (n == 2) {
if (!IR_IS_CONST_REF(op1)) {
@@ -1373,7 +1445,8 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
{
uint32_t opt;
ir_ref op1, op2, op3, copy;
- ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
+ const ir_insn *op1_insn, *op2_insn, *op3_insn;
+ ir_insn *insn;
insn = &ctx->ir_base[ref];
opt = insn->opt;
@@ -1408,9 +1481,6 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
|| insn->op2 != ctx->fold_insn.op2
|| insn->op3 != ctx->fold_insn.op3) {
- ir_use_list *use_list;
- ir_ref n, j, *p, use;
-
insn->optx = ctx->fold_insn.opt;
IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
@@ -1442,12 +1512,7 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
insn->op2 = ctx->fold_insn.op2;
insn->op3 = ctx->fold_insn.op3;
- use_list = &ctx->use_lists[ref];
- n = use_list->count;
- for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
- use = *p;
- ir_bitqueue_add(worklist, use);
- }
+ ir_iter_add_uses(ctx, ref, worklist);
}
break;
case IR_FOLD_DO_COPY:
@@ -1464,9 +1529,9 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
}
}
-static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
+static bool ir_may_promote_d2f(const ir_ctx *ctx, ir_ref ref)
{
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->type == IR_DOUBLE);
if (IR_IS_CONST_REF(ref)) {
@@ -1497,9 +1562,9 @@ static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
return 0;
}
-static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
+static bool ir_may_promote_f2d(const ir_ctx *ctx, ir_ref ref)
{
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->type == IR_FLOAT);
if (IR_IS_CONST_REF(ref)) {
@@ -1668,10 +1733,11 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *w
return ref;
}
-static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
+static bool ir_may_promote_trunc(const ir_ctx *ctx, ir_type type, ir_ref ref)
{
- ir_insn *insn = &ctx->ir_base[ref];
- ir_ref *p, n, input;
+ const ir_insn *insn = &ctx->ir_base[ref];
+ const ir_ref *p;
+ ir_ref n, input;
if (IR_IS_CONST_REF(ref)) {
return !IR_IS_SYM_CONST(insn->op);
@@ -1777,6 +1843,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
}
}
insn->type = type;
+ ir_iter_add_uses(ctx, ref, worklist);
return ref;
}
@@ -1857,7 +1924,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
return ref;
}
-static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
+static ir_ref ir_ext_const(ir_ctx *ctx, const ir_insn *val_insn, ir_op op, ir_type type)
{
ir_val new_val;
@@ -1921,10 +1988,11 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
return ref;
}
-static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
+static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop)
{
- ir_insn *insn;
- ir_ref n, *p, input, result, ctrl;
+ const ir_insn *insn;
+ const ir_ref *p;
+ ir_ref n, input, result, ctrl;
if (IR_IS_CONST_REF(val)) {
return 1; /* IR_START */
@@ -1955,18 +2023,18 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
return result;
}
-static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
+static bool ir_is_loop_invariant(const ir_ctx *ctx, ir_ref ref, ir_ref loop)
{
ref = _ir_estimated_control(ctx, ref, loop);
return ref < loop; // TODO: check dominance instead of order
}
-static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
+static bool ir_is_cheaper_ext(const ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
{
if (IR_IS_CONST_REF(ref)) {
return 1;
} else {
- ir_insn *insn = &ctx->ir_base[ref];
+ const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_LOAD) {
if (ir_is_loop_invariant(ctx, ref, loop)) {
@@ -1982,7 +2050,7 @@ static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_r
for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
use = *p;
if (use != ext_ref) {
- ir_insn *use_insn = &ctx->ir_base[use];
+ const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op != op
&& (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
@@ -2018,7 +2086,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
if (use == op_ref || use == ext_ref) {
continue;
} else {
- ir_insn *use_insn = &ctx->ir_base[use];
+ const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
@@ -2057,7 +2125,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
if (use == phi_ref || use == ext_ref) {
continue;
} else {
- ir_insn *use_insn = &ctx->ir_base[use];
+ const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
@@ -2194,7 +2262,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
}
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
- {
+{
ir_ref ref = insn->op1;
/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
@@ -2445,7 +2513,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
}
}
-static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
{
return IR_IS_CONST_REF(ref)
&& !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
@@ -2470,7 +2538,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_ref root_ref = start1->op1;
ir_insn *root = &ctx->ir_base[root_ref];
- if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
+ if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2)) {
ir_ref cond_ref = root->op2;
ir_insn *cond = &ctx->ir_base[cond_ref];
ir_type type = insn->type;
@@ -2550,7 +2618,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_use_list_remove_all(ctx, insn->op2, cond_ref);
}
- MAKE_NOP(cond); CLEAR_USES(cond_ref);
+ if (ctx->use_lists[cond_ref].count == 1) {
+ MAKE_NOP(cond); CLEAR_USES(cond_ref);
+ } else {
+ ir_use_list_remove_one(ctx, cond_ref, root_ref);
+ }
MAKE_NOP(root); CLEAR_USES(root_ref);
MAKE_NOP(start1); CLEAR_USES(start1_ref);
MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2636,7 +2708,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_use_list_remove_all(ctx, insn->op1, cond_ref);
}
- MAKE_NOP(cond); CLEAR_USES(cond_ref);
+ if (ctx->use_lists[cond_ref].count == 1) {
+ MAKE_NOP(cond); CLEAR_USES(cond_ref);
+ } else {
+ ir_use_list_remove_one(ctx, cond_ref, root_ref);
+ }
MAKE_NOP(root); CLEAR_USES(root_ref);
MAKE_NOP(start1); CLEAR_USES(start1_ref);
MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2650,8 +2726,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
}
return 1;
-#if 0
- } else {
+ } else if (cond->op != IR_OVERFLOW && insn->op2 <= cond_ref && insn->op3 <= cond_ref) {
/* COND
*
* prev prev
@@ -2705,12 +2780,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
MAKE_NOP(end2); CLEAR_USES(end2_ref);
MAKE_NOP(merge); CLEAR_USES(merge_ref);
+ ir_bitqueue_add(worklist, ref);
if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
ir_bitqueue_add(worklist, next->op1);
}
return 1;
-#endif
}
}
}
@@ -2719,7 +2794,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
return 0;
}
-static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
+static bool ir_cmp_is_true(ir_op op, const ir_insn *op1, const ir_insn *op2)
{
IR_ASSERT(op1->type == op2->type);
if (IR_IS_TYPE_INT(op1->type)) {
@@ -3246,7 +3321,7 @@ static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge
}
}
-static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
+static ir_ref ir_find_ext_use(const ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n, use;
@@ -3628,6 +3703,7 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
insn->op1 = val;
insn->op2 = IR_UNUSED;
ir_bitqueue_add(worklist, i);
+ ir_iter_add_uses(ctx, i, worklist);
}
}
} else if (insn->op == IR_STORE) {
@@ -3677,11 +3753,11 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
int ir_sccp(ir_ctx *ctx)
{
ir_bitqueue sccp_worklist, iter_worklist;
- ir_insn *_values;
+ ir_sccp_val *_values;
ir_bitqueue_init(&iter_worklist, ctx->insns_count);
ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
- _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
+ _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_sccp_val));
ctx->flags2 |= IR_OPT_IN_SCCP;
ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 9072b0dd591..9b369fadbcc 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -1167,6 +1167,7 @@ const ir_call_conv_dsc ir_call_conv_x86_fastcall = {
_(CMP_AND_BRANCH_FP) \
_(TEST_AND_BRANCH_INT) \
_(JCC_INT) \
+ _(COND_TEST_INT) \
_(COND_CMP_INT) \
_(COND_CMP_FP) \
_(GUARD_CMP_INT) \
@@ -1405,6 +1406,7 @@ op2_const:
}
IR_FALLTHROUGH;
case IR_COND_CMP_INT:
+ case IR_COND_TEST_INT:
insn = &ctx->ir_base[ref];
if (IR_IS_TYPE_INT(insn->type)) {
if (IR_IS_CONST_REF(insn->op3) || ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) {
@@ -2125,6 +2127,34 @@ static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func)
return 0;
}
+static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
+{
+ ir_insn *insn = &ctx->ir_base[ref];
+
+ if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
+ ir_use_list *use_list = &ctx->use_lists[ref];
+ ir_ref n = use_list->count;
+
+ if (n > 0) {
+ ir_ref *p = ctx->use_edges + use_list->refs;
+
+ do {
+ insn = &ctx->ir_base[*p];
+ if (insn->op != IR_IF
+ && insn->op != IR_GUARD
+ && insn->op != IR_GUARD_NOT
+ && (insn->op != IR_COND || insn->op2 == ref || insn->op3 == ref)) {
+ return 0;
+ }
+ p++;
+ n--;
+ } while (n);
+ return 1;
+ }
+ }
+ return 0;
+}
+
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
{
ir_insn *op2_insn;
@@ -2877,7 +2907,7 @@ store_int:
return IR_RETURN_FP;
}
case IR_IF:
- if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -2889,7 +2919,9 @@ store_int:
if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) {
/* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */
- ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+ }
ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT;
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP;
return IR_TEST_AND_BRANCH_INT;
@@ -2901,10 +2933,14 @@ store_int:
op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) {
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
- ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+ }
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
} else {
- ir_match_fuse_load(ctx, op1_insn->op2, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load(ctx, op1_insn->op2, ref);
+ }
ctx->rules[op2_insn->op1] = IR_BINOP_INT;
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
@@ -2912,12 +2948,16 @@ store_int:
}
}
/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
- ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+ }
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_CMP_AND_BRANCH_INT;
} else {
/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
- ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+ }
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
return IR_CMP_AND_BRANCH_FP;
}
@@ -3005,31 +3045,43 @@ store_int:
break;
}
case IR_COND:
- if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op1) && (ctx->use_lists[insn->op1].count == 1 || all_usages_are_fusable(ctx, insn->op1))) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) {
- ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
+ if (ctx->use_lists[insn->op1].count == 1) {
+ ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
+ }
ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT;
return IR_COND_CMP_INT;
} else {
- ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
+ if (ctx->use_lists[insn->op1].count == 1) {
+ ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
+ }
ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP;
return IR_COND_CMP_FP;
}
+ } else if (op1_insn->op == IR_AND) {
+ /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */
+ ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+ ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT;
+ return IR_COND_TEST_INT;
}
}
+ if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
+ ir_match_fuse_load(ctx, insn->op1, ref);
+ }
return IR_COND;
case IR_GUARD:
case IR_GUARD_NOT:
- if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
- if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
+ if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
- && (insn->op2 == ref - 1 ||
- (insn->op2 == ctx->prev_ref[ref] - 1
- && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
+//??? && (insn->op2 == ref - 1 ||
+//??? (insn->op2 == ctx->prev_ref[ref] - 1
+//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (IR_IS_CONST_REF(op2_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op)
@@ -3043,10 +3095,14 @@ store_int:
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
- ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+ }
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
} else {
- ir_match_fuse_load(ctx, op1_insn->op2, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load(ctx, op1_insn->op2, ref);
+ }
ctx->rules[op2_insn->op1] = IR_BINOP_INT;
}
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */
@@ -3054,6 +3110,7 @@ store_int:
return IR_GUARD_JCC_INT;
}
} else if ((ctx->flags & IR_OPT_CODEGEN)
+ && ctx->use_lists[insn->op2].count == 1
&& op2_insn->op1 == insn->op2 - 2 /* before previous instruction */
&& ir_in_same_block(ctx, op2_insn->op1)
&& ctx->use_lists[op2_insn->op1].count == 2) {
@@ -3101,12 +3158,16 @@ store_int:
}
}
/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
- ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+ }
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_GUARD_CMP_INT;
} else {
/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
- ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+ if (ctx->use_lists[insn->op2].count == 1) {
+ ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+ }
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
return IR_GUARD_CMP_FP;
}
@@ -6051,8 +6112,15 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
- ir_reg op1_reg = ctx->regs[ref][1];
- ir_reg op2_reg = ctx->regs[ref][2];
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[ref][1];
+ op2_reg = ctx->regs[ref][2];
+ }
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -6218,8 +6286,15 @@ static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op
ir_type type = binop_insn->type;
ir_ref op1 = binop_insn->op1;
ir_ref op2 = binop_insn->op2;
- ir_reg op1_reg = ctx->regs[ref][1];
- ir_reg op2_reg = ctx->regs[ref][2];
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[ref][1];
+ op2_reg = ctx->regs[ref][2];
+ }
IR_ASSERT(binop_insn->op == IR_AND);
if (op1_reg != IR_REG_NONE) {
@@ -6329,8 +6404,13 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_
op1 = cmp_insn->op1;
op2 = cmp_insn->op2;
- op1_reg = ctx->regs[cmp_ref][1];
- op2_reg = ctx->regs[cmp_ref][2];
+ if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[cmp_ref][1];
+ op2_reg = ctx->regs[cmp_ref][2];
+ }
if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
ir_reg tmp_reg;
@@ -6603,8 +6683,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
- ir_reg op1_reg = ctx->regs[insn->op2][1];
- ir_reg op2_reg = ctx->regs[insn->op2][2];
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[insn->op2][1];
+ op2_reg = ctx->regs[insn->op2][2];
+ }
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -6735,37 +6822,24 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
IR_ASSERT(def_reg != IR_REG_NONE);
- if (op2 != op3) {
- if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
- op2_reg = IR_REG_NUM(op2_reg);
- ir_emit_load(ctx, type, op2_reg, op2);
- if (op1 == op2) {
- op1_reg = op2_reg;
- }
- }
- if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
- op3_reg = IR_REG_NUM(op3_reg);
- ir_emit_load(ctx, type, op3_reg, op3);
- if (op1 == op2) {
- op1_reg = op3_reg;
- }
- }
- } else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+ if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, type, op2_reg, op2);
- op3_reg = op2_reg;
if (op1 == op2) {
op1_reg = op2_reg;
}
- } else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+ if (op3 == op2) {
+ op3_reg = op2_reg;
+ }
+ }
+ if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
- op2_reg = op3_reg;
if (op1 == op3) {
- op1_reg = op3_reg;
+ op1_reg = op2_reg;
}
}
- if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
+ if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
ir_emit_load(ctx, op1_type, op1_reg, op1);
}
@@ -6774,7 +6848,13 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (op1_reg != IR_REG_NONE) {
| ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg
} else {
- ir_mem mem = ir_ref_spill_slot(ctx, op1);
+ ir_mem mem;
+
+ if (ir_rule(ctx, insn->op1) & IR_FUSED) {
+ mem = ir_fuse_load(ctx, def, insn->op1);
+ } else {
+ mem = ir_ref_spill_slot(ctx, insn->op1);
+ }
| ASM_MEM_IMM_OP cmp, op1_type, mem, 0
}
@@ -6864,6 +6944,115 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
+static void ir_emit_cond_test_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
+{
+ ir_backend_data *data = ctx->data;
+ dasm_State **Dst = &data->dasm_state;
+ ir_type type = insn->type;
+ ir_ref op2 = insn->op2;
+ ir_ref op3 = insn->op3;
+ ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+ ir_reg op2_reg = ctx->regs[def][2];
+ ir_reg op3_reg = ctx->regs[def][3];
+
+ if (op2 != op3) {
+ if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+ op2_reg = IR_REG_NUM(op2_reg);
+ ir_emit_load(ctx, type, op2_reg, op2);
+ }
+ if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+ op3_reg = IR_REG_NUM(op3_reg);
+ ir_emit_load(ctx, type, op3_reg, op3);
+ }
+ } else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+ op2_reg = IR_REG_NUM(op2_reg);
+ ir_emit_load(ctx, type, op2_reg, op2);
+ op3_reg = op2_reg;
+ } else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+ op3_reg = IR_REG_NUM(op3_reg);
+ ir_emit_load(ctx, type, op3_reg, op3);
+ op2_reg = op3_reg;
+ }
+
+ ir_emit_test_int_common(ctx, def, insn->op1, IR_NE);
+
+ if (IR_IS_TYPE_INT(type)) {
+ bool eq = 0;
+
+ if (op3_reg != IR_REG_NONE) {
+ if (op3_reg == def_reg) {
+ IR_ASSERT(op2_reg != IR_REG_NONE);
+ op3_reg = op2_reg;
+ eq = 1; // reverse
+ } else {
+ if (op2_reg != IR_REG_NONE) {
+ if (def_reg != op2_reg) {
+// if (IR_IS_TYPE_INT(type)) {
+ ir_emit_mov(ctx, type, def_reg, op2_reg);
+// } else {
+// ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
+// }
+ }
+ } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) {
+ /* prevent "xor" and flags clobbering */
+ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64);
+ } else {
+ ir_emit_load_ex(ctx, type, def_reg, op2, def);
+ }
+ }
+ } else {
+ IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg);
+ if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) {
+ /* prevent "xor" and flags clobbering */
+ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64);
+ } else {
+ ir_emit_load_ex(ctx, type, def_reg, op3, def);
+ }
+ op3_reg = op2_reg;
+ eq = 1; // reverse
+ }
+
+ if (eq) {
+ | ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg
+ } else {
+ | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg
+ }
+ } else {
+ | jne >2
+ |1:
+
+ if (op2_reg != IR_REG_NONE) {
+ if (def_reg != op2_reg) {
+ if (IR_IS_TYPE_INT(type)) {
+ ir_emit_mov(ctx, type, def_reg, op2_reg);
+ } else {
+ ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
+ }
+ }
+ } else {
+ ir_emit_load_ex(ctx, type, def_reg, op2, def);
+ }
+ | jmp >3
+ |2:
+ if (op3_reg != IR_REG_NONE) {
+ if (def_reg != op3_reg) {
+ if (IR_IS_TYPE_INT(type)) {
+ ir_emit_mov(ctx, type, def_reg, op3_reg);
+ } else {
+ ir_emit_fp_mov(ctx, type, def_reg, op3_reg);
+ }
+ }
+ } else {
+ ir_emit_load_ex(ctx, type, def_reg, op3, def);
+ }
+ |3:
+ }
+
+ if (IR_REG_SPILLED(ctx->regs[def][0])) {
+ ir_emit_store(ctx, type, def, def_reg);
+ }
+}
+
static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
@@ -10454,9 +10643,16 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
- ir_reg op1_reg = ctx->regs[insn->op2][1];
- ir_reg op2_reg = ctx->regs[insn->op2][2];
void *addr;
+ ir_reg op1_reg, op2_reg;
+
+ if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+ op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+ op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+ } else {
+ op1_reg = ctx->regs[insn->op2][1];
+ op2_reg = ctx->regs[insn->op2][2];
+ }
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -11714,6 +11910,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_COND:
ir_emit_cond(ctx, i, insn);
break;
+ case IR_COND_TEST_INT:
+ ir_emit_cond_test_int(ctx, i, insn);
+ break;
case IR_COND_CMP_INT:
ir_emit_cond_cmp_int(ctx, i, insn);
break;
@@ -12180,7 +12379,7 @@ const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_pe
return entry;
}
-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
{
return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr);
}