Commit 8fa692b0898 for php.net
commit 8fa692b0898341dfc47667c25d8e648c5d611129
Author: Dmitry Stogov <dmitry@php.net>
Date: Tue May 12 13:57:41 2026 +0300
Update IR (#22019)
IR commit: d2439a528cfb0c4b1607a7771f7889bbe8d35814
diff --git a/ext/opcache/jit/ir/.gitignore b/ext/opcache/jit/ir/.gitignore
index 7a37a4fd059..367a68671bc 100644
--- a/ext/opcache/jit/ir/.gitignore
+++ b/ext/opcache/jit/ir/.gitignore
@@ -20,3 +20,6 @@ tests/**/*.log
win32/vcpkg
win32/build_*
+
+fuzz/build/
+fuzz/corpus/
diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index a02332e0d39..f6a0cb60af9 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -161,6 +161,8 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
case IR_CHAR:
if (insn->val.c == '\\') {
fprintf(f, "'\\\\'");
+ } else if (insn->val.c == '\'') {
+ fprintf(f, "'\\\''");
} else if (insn->val.c >= ' ') {
fprintf(f, "'%c'", insn->val.c);
} else if (insn->val.c == '\t') {
@@ -283,6 +285,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_kind_src IR_OPND_CONTROL
#define ir_op_kind_reg IR_OPND_CONTROL_DEP
#define ir_op_kind_ret IR_OPND_CONTROL_REF
+#define ir_op_kind_grd IR_OPND_CONTROL_GUARD
#define ir_op_kind_str IR_OPND_STR
#define ir_op_kind_num IR_OPND_NUM
#define ir_op_kind_fld IR_OPND_STR
@@ -1843,7 +1846,7 @@ int ir_mem_unprotect(void *ptr, size_t size)
int ir_mem_flush(void *ptr, size_t size)
{
- return 1;
+ return FlushInstructionCache(GetCurrentProcess(), ptr, size) == TRUE ? 1 : 0;
}
#else
@@ -2168,7 +2171,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, i
if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
break;
}
- } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_VSTORE) {
+ } else if (insn->op == IR_MERGE
+ || insn->op == IR_LOOP_BEGIN
+ || insn->op == IR_VSTORE
+ || (insn->op == IR_BEGIN && insn->op2)) {
return IR_UNUSED;
}
ref = insn->op1;
@@ -2233,7 +2239,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref,
if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
break;
}
- } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_STORE) {
+ } else if (insn->op == IR_MERGE
+ || insn->op == IR_LOOP_BEGIN
+ || insn->op == IR_STORE
+ || (insn->op == IR_BEGIN && insn->op2)) {
break;
}
ref = insn->op1;
@@ -2326,7 +2335,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_store_i(ir_ctx *ctx, ir_ref ref, ir_ref
}
} else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
guarded = 1;
- } else if (insn->op >= IR_START || insn->op == IR_CALL) {
+ } else if (insn->op >= IR_START) {
+ if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) {
+ /* skip END */
+ ref = insn->op1;
+ insn = &ctx->ir_base[ref];
+ } else {
+ break;
+ }
+ } else if (insn->op == IR_CALL) {
break;
}
next = ref;
@@ -2407,7 +2424,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vstore_i(ir_ctx *ctx, ir_ref ref, ir_re
}
} else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
guarded = 1;
- } else if (insn->op >= IR_START || insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) {
+ } else if (insn->op >= IR_START) {
+ if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) {
+ /* skip END */
+ ref = insn->op1;
+ insn = &ctx->ir_base[ref];
+ } else {
+ break;
+ }
+ } else if (insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) {
break;
}
next = ref;
@@ -2422,6 +2447,37 @@ ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref var, ir_ref val)
}
/* IR Construction API */
+static ir_ref ir_last_guard(ir_ctx *ctx)
+{
+ ir_ref ref;
+ ir_insn *insn;
+
+ IR_ASSERT(ctx->control);
+ ref = ctx->control;
+ while (1) {
+ insn = &ctx->ir_base[ref];
+ if (IR_IS_BB_START(insn->op) || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
+ if (insn->op == IR_START) ref = IR_UNUSED;
+ break;
+ }
+ ref = insn->op1;
+ }
+ return ref;
+}
+
+ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2)
+{
+ ir_ref guard = (IR_IS_TYPE_FP(type) || (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0)) ?
+ IR_UNUSED : ir_last_guard(ctx);
+ return ir_fold3(ctx, IR_OPT(IR_DIV, type), op1, op2, guard);
+}
+
+ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2)
+{
+ ir_ref guard = (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0) ?
+ IR_UNUSED : ir_last_guard(ctx);
+ return ir_fold3(ctx, IR_OPT(IR_MOD, type), op1, op2, guard);
+}
ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num)
{
diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index b0a96b511bd..01db4ecf6b1 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -210,6 +210,7 @@ typedef enum _ir_type {
* arg - argument reference CALL/TAILCALL/CARG->CARG
* src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN)
* reg - data-control dependency on region (PHI, VAR, PARAM)
+ * grd - optional data-control dependency guard (DIV, MOD)
* ret - reference to a previous RETURN instruction (RETURN)
* str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
* num - number: argument number (PARAM)
@@ -265,8 +266,8 @@ typedef enum _ir_type {
_(ADD, d2C, def, def, ___) /* addition */ \
_(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \
_(MUL, d2C, def, def, ___) /* multiplication */ \
- _(DIV, d2, def, def, ___) /* division */ \
- _(MOD, d2, def, def, ___) /* modulo */ \
+ _(DIV, d3, def, def, grd) /* division */ \
+ _(MOD, d3, def, def, grd) /* modulo */ \
_(NEG, d1, def, ___, ___) /* change sign */ \
_(ABS, d1, def, ___, ___) /* absolute value */ \
/* (LDEXP, MIN, MAX, FPMATH) */ \
@@ -383,6 +384,14 @@ typedef enum _ir_type {
_(RETURN, T2X1, src, def, ret) /* function return */ \
_(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
\
+ /* inline assembler */ \
+ _(ASM, xN, src, def, def) /* GCC inline assembler */ \
+ /* op2 - asm template string */ \
+ /* op3 - asm constraint string */ \
+ /* opN - asm input argument */ \
+ _(ASM_OUT, x1, src, ___, ___) /* ASM data output projection */ \
+ _(ASM_GOTO, E1, src, ___, ___) /* ASM goto (bb end after ASM) */ \
+ \
/* deoptimization helper */ \
_(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index bdf6b027b9f..fc4bb84f1e0 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -402,6 +402,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
const ir_proto_t *proto;
const ir_call_conv_dsc *cc;
+ ir_ref next;
constraints->def_reg = IR_REG_NONE;
constraints->hints_count = 0;
@@ -562,11 +563,13 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
}
- if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
- insn = &ctx->ir_base[insn->op2];
- if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
- constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
- n++;
+ if (IR_IS_CONST_REF(insn->op2)) {
+ if (insn->op1 != insn->op2) {
+ insn = &ctx->ir_base[insn->op2];
+ if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
+ constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+ n++;
+ }
}
} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
@@ -751,6 +754,10 @@ get_arg_hints:
break;
case IR_SNAPSHOT:
flags = 0;
+ next = ir_next_control(ctx, ref);
+ if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) {
+ flags = IR_EXTEND_INPUTS_TO_NEXT;
+ }
break;
case IR_VA_START:
flags = IR_OP2_MUST_BE_IN_REG;
@@ -1199,10 +1206,6 @@ binop_fp:
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
- // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-//??? && (insn->op2 == ref - 1 ||
-//??? (insn->op2 == ctx->prev_ref[ref] - 1
-//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_GUARD_CMP_INT;
@@ -1265,6 +1268,12 @@ binop_fp:
return IR_FUSED | IR_ARGVAL;
case IR_NOP:
return IR_SKIPPED | IR_NOP;
+ case IR_ASM:
+ case IR_ASM_OUT:
+ case IR_ASM_GOTO:
+ fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n");
+ exit(1);
+ return IR_SKIPPED | IR_NOP;
default:
break;
}
@@ -4996,7 +5005,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
| .addr &addr
- if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
+ if (ctx->ir_base[bb->start].op1 == def
+ && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
bb->flags |= IR_BB_EMPTY;
}
continue;
diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h
index 084216a0634..9492945b136 100644
--- a/ext/opcache/jit/ir/ir_builder.h
+++ b/ext/opcache/jit/ir/ir_builder.h
@@ -118,31 +118,31 @@ extern "C" {
#define ir_MUL_D(_op1, _op2) ir_BINARY_OP_D(IR_MUL, (_op1), (_op2))
#define ir_MUL_F(_op1, _op2) ir_BINARY_OP_F(IR_MUL, (_op1), (_op2))
-#define ir_DIV(_type, _op1, _op2) ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2))
-#define ir_DIV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2))
-#define ir_DIV_A(_op1, _op2) ir_BINARY_OP_A(IR_DIV, (_op1), (_op2))
-#define ir_DIV_C(_op1, _op2) ir_BINARY_OP_C(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2))
+#define ir_DIV(_type, _op1, _op2) _ir_DIV(_ir_CTX, (_type), (_op1), (_op2))
+#define ir_DIV_U8(_op1, _op2) ir_DIV(IR_U8, (_op1), (_op2))
+#define ir_DIV_U16(_op1, _op2) ir_DIV(IR_U16, (_op1), (_op2))
+#define ir_DIV_U32(_op1, _op2) ir_DIV(IR_U32, (_op1), (_op2))
+#define ir_DIV_U64(_op1, _op2) ir_DIV(IR_U64, (_op1), (_op2))
+#define ir_DIV_A(_op1, _op2) ir_DIV(IR_ADDR, (_op1), (_op2))
+#define ir_DIV_C(_op1, _op2) ir_DIV(IR_CHAR, (_op1), (_op2))
+#define ir_DIV_I8(_op1, _op2) ir_DIV(IR_I8, (_op1), (_op2))
+#define ir_DIV_I16(_op1, _op2) ir_DIV(IR_I16, (_op1), (_op2))
+#define ir_DIV_I32(_op1, _op2) ir_DIV(IR_I32, (_op1), (_op2))
+#define ir_DIV_I64(_op1, _op2) ir_DIV(IR_I64, (_op1), (_op2))
#define ir_DIV_D(_op1, _op2) ir_BINARY_OP_D(IR_DIV, (_op1), (_op2))
#define ir_DIV_F(_op1, _op2) ir_BINARY_OP_F(IR_DIV, (_op1), (_op2))
-#define ir_MOD(_type, _op1, _op2) ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2))
-#define ir_MOD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2))
-#define ir_MOD_A(_op1, _op2) ir_BINARY_OP_A(IR_MOD, (_op1), (_op2))
-#define ir_MOD_C(_op1, _op2) ir_BINARY_OP_C(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2))
+#define ir_MOD(_type, _op1, _op2) _ir_MOD(_ir_CTX, (_type), (_op1), (_op2))
+#define ir_MOD_U8(_op1, _op2) ir_MOD(IR_U8, (_op1), (_op2))
+#define ir_MOD_U16(_op1, _op2) ir_MOD(IR_U16, (_op1), (_op2))
+#define ir_MOD_U32(_op1, _op2) ir_MOD(IR_U32, (_op1), (_op2))
+#define ir_MOD_U64(_op1, _op2) ir_MOD(IR_U64, (_op1), (_op2))
+#define ir_MOD_A(_op1, _op2) ir_MOD(IR_ADDR, (_op1), (_op2))
+#define ir_MOD_C(_op1, _op2) ir_MOD(IR_CHAR, (_op1), (_op2))
+#define ir_MOD_I8(_op1, _op2) ir_MOD(IR_I8, (_op1), (_op2))
+#define ir_MOD_I16(_op1, _op2) ir_MOD(IR_I16, (_op1), (_op2))
+#define ir_MOD_I32(_op1, _op2) ir_MOD(IR_I32, (_op1), (_op2))
+#define ir_MOD_I64(_op1, _op2) ir_MOD(IR_I64, (_op1), (_op2))
#define ir_NEG(_type, _op1) ir_UNARY_OP(IR_NEG, (_type), (_op1))
#define ir_NEG_C(_op1) ir_UNARY_OP_C(IR_NEG, (_op1))
@@ -633,6 +633,8 @@ extern "C" {
#define ir_MERGE_WITH_EMPTY_TRUE(_if) do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0)
#define ir_MERGE_WITH_EMPTY_FALSE(_if) do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0)
+ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2);
+ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2);
ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset);
ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2);
ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs);
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 40041004c56..92042ea8cbb 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -1502,6 +1502,23 @@ static bool ir_is_merged_loop_back_edge(ir_ctx *ctx, uint32_t hdr, uint32_t b)
}
#endif
+static bool ir_should_align_loop(ir_ctx *ctx, ir_chain *chains, uint32_t b, ir_block *bb)
+{
+ uint32_t n = bb->predecessors_count;
+ uint32_t *p = ctx->cfg_edges + bb->predecessors;
+
+ for (; n > 0; p++, n--) {
+ uint32_t pred = *p;
+ if (chains[pred].head) {
+ if (ir_chain_head(chains, pred) == b) return 1;
+ } else {
+ if (ir_should_align_loop(ctx, chains, b, &ctx->cfg_blocks[pred])) return 1;
+ }
+ }
+
+ return 0;
+}
+
static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
{
uint32_t max_edges_count = ctx->cfg_edges_count / 2;
@@ -1862,7 +1879,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
if (chains[b].head == b) {
bb = &ctx->cfg_blocks[b];
if (bb->loop_depth) {
- if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
+ if (ir_should_align_loop(ctx, chains, b, bb)) {
bb->flags |= IR_BB_ALIGN_LOOP;
}
}
diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c
index ee951291b1b..e1be7f6544d 100644
--- a/ext/opcache/jit/ir/ir_check.c
+++ b/ext/opcache/jit/ir/ir_check.c
@@ -148,6 +148,12 @@ bool ir_check(const ir_ctx *ctx)
bool ok = 1;
ir_check_ctx check_ctx;
+ if (ctx->insns_count < 1 || ctx->ir_base[1].op != IR_START) {
+ fprintf(stderr, "ir_base[1].op invalid opcode (%d)\n",
+ (ctx->insns_count < 1) ? IR_NOP : ctx->ir_base[0].op);
+ ok = 0;
+ }
+
check_ctx.arena = NULL;
check_ctx.use_set = NULL;
check_ctx.input_set = NULL;
@@ -297,6 +303,14 @@ bool ir_check(const ir_ctx *ctx)
ok = 0;
}
break;
+ case IR_OPND_CONTROL_GUARD:
+ if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_START)
+ && use_insn->op != IR_GUARD
+ && use_insn->op != IR_GUARD_NOT) {
+ fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_START or GUARD\n", i, j, use);
+ ok = 0;
+ }
+ break;
default:
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use);
ok = 0;
@@ -306,6 +320,8 @@ bool ir_check(const ir_ctx *ctx)
/* pass (function returns void) */
} else if (insn->op == IR_BEGIN && j == 1) {
/* pass (start of unreachable basic block) */
+ } else if (IR_OPND_KIND(flags, j) == IR_OPND_CONTROL_GUARD) {
+ /* reference to control guard is optional */
} else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF
&& (insn->op != IR_SNAPSHOT || j == 1)) {
fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use);
@@ -413,6 +429,7 @@ bool ir_check(const ir_ctx *ctx)
}
break;
case IR_IGOTO:
+ case IR_ASM_GOTO:
break;
default:
/* skip data references */
@@ -464,6 +481,10 @@ bool ir_check(const ir_ctx *ctx)
// if (!ok) {
// ir_dump_codegen(ctx, stderr);
// }
+
+#ifndef IR_CHECK_NO_ABORT
IR_ASSERT(ok);
+#endif
+
return ok;
}
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index 037003f021a..3b34294d1c7 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -142,6 +142,7 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE
break;
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
+ case IR_OPND_CONTROL_GUARD:
fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT);
break;
case IR_OPND_LABEL_REF:
@@ -650,6 +651,7 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
case IR_OPND_CONTROL:
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
+ case IR_OPND_CONTROL_GUARD:
fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
first = 0;
break;
@@ -680,6 +682,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
+ } else if (opnd_kind == IR_OPND_CONTROL_GUARD) {
+ /* skip */
} else if (j != n &&
(IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) {
fprintf(f, "%snull", first ? "(" : ", ");
diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index 136bbb0e08e..cbe049be932 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -1679,44 +1679,6 @@ IR_FOLD(EQ(SEXT, C_I16))
IR_FOLD(EQ(SEXT, C_I32))
IR_FOLD(EQ(SEXT, C_I64))
IR_FOLD(EQ(SEXT, C_ADDR))
-{
- if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) {
- /* pass */
- } else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) {
- opt = IR_OPT(IR_NOT, IR_BOOL);
- op1 = op1_insn->op1;
- op2 = IR_UNUSED;
- IR_FOLD_RESTART;
- } else {
- ir_type type = ctx->ir_base[op1_insn->op1].type;
-
- if (op1_insn->op == IR_ZEXT
- && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) {
- IR_FOLD_NEXT;
- }
- if (IR_IS_TYPE_SIGNED(type)) {
- switch (ir_type_size[type]) {
- case 1: val.i64 = op2_insn->val.i8; break;
- case 2: val.i64 = op2_insn->val.i16; break;
- case 4: val.i64 = op2_insn->val.i32; break;
- default: val.u64 = op2_insn->val.u64; break;
- }
- } else {
- switch (ir_type_size[type]) {
- case 1: val.u64 = op2_insn->val.u8; break;
- case 2: val.u64 = op2_insn->val.u16; break;
- case 4: val.u64 = op2_insn->val.u32; break;
- default: val.u64 = op2_insn->val.u64; break;
- }
- }
- op1 = op1_insn->op1;
- op2 = ir_const(ctx, val, type);
- IR_FOLD_RESTART;
- }
-
- IR_FOLD_NEXT;
-}
-
IR_FOLD(NE(ZEXT, C_U16))
IR_FOLD(NE(ZEXT, C_U32))
IR_FOLD(NE(ZEXT, C_U64))
@@ -1731,16 +1693,93 @@ IR_FOLD(NE(SEXT, C_I16))
IR_FOLD(NE(SEXT, C_I32))
IR_FOLD(NE(SEXT, C_I64))
IR_FOLD(NE(SEXT, C_ADDR))
-{
- if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) {
+IR_FOLD(ULT(ZEXT, C_U16))
+IR_FOLD(ULT(ZEXT, C_U32))
+IR_FOLD(ULT(ZEXT, C_U64))
+IR_FOLD(ULT(ZEXT, C_I16))
+IR_FOLD(ULT(ZEXT, C_I32))
+IR_FOLD(ULT(ZEXT, C_I64))
+IR_FOLD(ULT(ZEXT, C_ADDR))
+IR_FOLD(UGE(ZEXT, C_U16))
+IR_FOLD(UGE(ZEXT, C_U32))
+IR_FOLD(UGE(ZEXT, C_U64))
+IR_FOLD(UGE(ZEXT, C_I16))
+IR_FOLD(UGE(ZEXT, C_I32))
+IR_FOLD(UGE(ZEXT, C_I64))
+IR_FOLD(UGE(ZEXT, C_ADDR))
+IR_FOLD(ULE(ZEXT, C_U16))
+IR_FOLD(ULE(ZEXT, C_U32))
+IR_FOLD(ULE(ZEXT, C_U64))
+IR_FOLD(ULE(ZEXT, C_I16))
+IR_FOLD(ULE(ZEXT, C_I32))
+IR_FOLD(ULE(ZEXT, C_I64))
+IR_FOLD(ULE(ZEXT, C_ADDR))
+IR_FOLD(UGT(ZEXT, C_U16))
+IR_FOLD(UGT(ZEXT, C_U32))
+IR_FOLD(UGT(ZEXT, C_U64))
+IR_FOLD(UGT(ZEXT, C_I16))
+IR_FOLD(UGT(ZEXT, C_I32))
+IR_FOLD(UGT(ZEXT, C_I64))
+IR_FOLD(UGT(ZEXT, C_ADDR))
+IR_FOLD(LT(SEXT, C_U16))
+IR_FOLD(LT(SEXT, C_U32))
+IR_FOLD(LT(SEXT, C_U64))
+IR_FOLD(LT(SEXT, C_I16))
+IR_FOLD(LT(SEXT, C_I32))
+IR_FOLD(LT(SEXT, C_I64))
+IR_FOLD(LT(SEXT, C_ADDR))
+IR_FOLD(GE(SEXT, C_U16))
+IR_FOLD(GE(SEXT, C_U32))
+IR_FOLD(GE(SEXT, C_U64))
+IR_FOLD(GE(SEXT, C_I16))
+IR_FOLD(GE(SEXT, C_I32))
+IR_FOLD(GE(SEXT, C_I64))
+IR_FOLD(GE(SEXT, C_ADDR))
+IR_FOLD(LE(SEXT, C_U16))
+IR_FOLD(LE(SEXT, C_U32))
+IR_FOLD(LE(SEXT, C_U64))
+IR_FOLD(LE(SEXT, C_I16))
+IR_FOLD(LE(SEXT, C_I32))
+IR_FOLD(LE(SEXT, C_I64))
+IR_FOLD(LE(SEXT, C_ADDR))
+IR_FOLD(GT(SEXT, C_U16))
+IR_FOLD(GT(SEXT, C_U32))
+IR_FOLD(GT(SEXT, C_U64))
+IR_FOLD(GT(SEXT, C_I16))
+IR_FOLD(GT(SEXT, C_I32))
+IR_FOLD(GT(SEXT, C_I64))
+IR_FOLD(GT(SEXT, C_ADDR))
+{
+ if (ctx->use_lists && ctx->use_lists[op1].count != 1) {
/* pass */
- } else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) {
- IR_FOLD_COPY(op1_insn->op1);
} else {
ir_type type = ctx->ir_base[op1_insn->op1].type;
- if (op1_insn->op == IR_ZEXT
- && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) {
+ if (type == IR_BOOL && op2_insn->val.u64 == 0) {
+ if ((opt & IR_OPT_OP_MASK) == IR_EQ) {
+ opt = IR_OPT(IR_NOT, IR_BOOL);
+ op1 = op1_insn->op1;
+ op2 = IR_UNUSED;
+ IR_FOLD_RESTART;
+ } else if ((opt & IR_OPT_OP_MASK) == IR_NE) {
+ IR_FOLD_COPY(op1_insn->op1);
+ }
+ }
+ if ((op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0
+ && (op1_insn->op != IR_SEXT || (op2_insn->val.i64 >> (ir_type_size[type] * 8)) != -1)) {
+ if ((opt & IR_OPT_OP_MASK) == IR_EQ
+ || (opt & IR_OPT_OP_MASK) == IR_UGT
+ || (opt & IR_OPT_OP_MASK) == IR_UGE) {
+ IR_FOLD_COPY(IR_FALSE);
+ } else if ((opt & IR_OPT_OP_MASK) == IR_NE
+ || (opt & IR_OPT_OP_MASK) == IR_ULT
+ || (opt & IR_OPT_OP_MASK) == IR_ULE) {
+ IR_FOLD_COPY(IR_TRUE);
+ } else if ((opt & IR_OPT_OP_MASK) == IR_GT || (opt & IR_OPT_OP_MASK) == IR_GE) {
+ IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_FALSE : IR_TRUE);
+ } else if ((opt & IR_OPT_OP_MASK) == IR_LT || (opt & IR_OPT_OP_MASK) == IR_LE) {
+ IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_TRUE : IR_FALSE);
+ }
IR_FOLD_NEXT;
}
if (IR_IS_TYPE_SIGNED(type)) {
@@ -1765,6 +1804,43 @@ IR_FOLD(NE(SEXT, C_ADDR))
IR_FOLD_NEXT;
}
+IR_FOLD(EQ(ZEXT, ZEXT))
+IR_FOLD(NE(ZEXT, ZEXT))
+IR_FOLD(ULT(ZEXT, ZEXT))
+IR_FOLD(UGE(ZEXT, ZEXT))
+IR_FOLD(ULE(ZEXT, ZEXT))
+IR_FOLD(UGT(ZEXT, ZEXT))
+IR_FOLD(EQ(SEXT, SEXT))
+IR_FOLD(NE(SEXT, SEXT))
+IR_FOLD(LT(SEXT, SEXT))
+IR_FOLD(GE(SEXT, SEXT))
+IR_FOLD(LE(SEXT, SEXT))
+IR_FOLD(GT(SEXT, SEXT))
+{
+ if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type
+ && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) {
+ op1 = op1_insn->op1;
+ op2 = op2_insn->op1;
+ IR_FOLD_RESTART;
+ }
+ IR_FOLD_NEXT;
+}
+
+IR_FOLD(LT(ZEXT, ZEXT))
+IR_FOLD(GE(ZEXT, ZEXT))
+IR_FOLD(LE(ZEXT, ZEXT))
+IR_FOLD(GT(ZEXT, ZEXT))
+{
+ if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type
+ && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) {
+ op1 = op1_insn->op1;
+ op2 = op2_insn->op1;
+ opt += 4; /* LT -> ULT, ... */
+ IR_FOLD_RESTART;
+ }
+ IR_FOLD_NEXT;
+}
+
IR_FOLD(NOT(EQ))
IR_FOLD(NOT(NE))
IR_FOLD(NOT(LT))
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index 1b45eb834ce..b194eeb8177 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -148,7 +148,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
}
#if IR_GCM_SPLIT
-/* Partially Dead Code Elimination through splitting the node and sunking the clones
+/* Partially Dead Code Elimination through splitting the node and sinking the clones
*
* This code is based on the Benedikt Meurer's idea first implemented in V8.
* See: https://codereview.chromium.org/899433005
@@ -309,6 +309,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
struct {
ir_ref ref;
uint32_t block;
+ uint32_t lca;
uint32_t use_count;
uint32_t use;
} *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
@@ -344,8 +345,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
clone = clones_count++;
ir_hashtab_add(&hash, j, clone);
clones[clone].block = j;
+ clones[clone].lca = i;
clones[clone].use_count = 0;
clones[clone].use = (uint32_t)-1;
+ } else {
+ clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i);
}
uses[uses_count].ref = use;
uses[uses_count].block = i;
@@ -367,8 +371,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
clone = clones_count++;
ir_hashtab_add(&hash, j, clone);
clones[clone].block = j;
+ clones[clone].lca = i;
clones[clone].use_count = 0;
clones[clone].use = -1;
+ } else {
+ clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i);
}
uses[uses_count].ref = use;
uses[uses_count].block = i;
@@ -378,6 +385,42 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
}
}
+ /* Select best blocks to insert clones */
+ for (i = 0; i < clones_count; i++) {
+ uint32_t b0 = clones[i].block;
+ uint32_t lca = clones[i].lca;
+
+ if (b0 != lca) {
+ ir_block *bb = &ctx->cfg_blocks[lca];
+ uint32_t loop_depth = bb->loop_depth;
+
+ if (loop_depth) {
+ uint32_t b;
+ uint32_t best;
+
+ best = b = lca;
+ do {
+ b = bb->dom_parent;
+ bb = &ctx->cfg_blocks[b];
+ if (bb->loop_depth < loop_depth) {
+ if (!bb->loop_depth) {
+ best = b;
+ break;
+ }
+ loop_depth = bb->loop_depth;
+ best = b;
+ }
+ } while (b != b0);
+ lca = best;
+ }
+ clones[i].block = lca;
+ }
+ }
+
+ // TODO: instead of inserting clone into the block where the expressin is partially available,
+ // we should insert PHI and the actual clones into the block sources where it's not available
+ // (similar to SSAPRE)
+
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
for (i = 0; i < clones_count; i++) {
@@ -1170,11 +1213,11 @@ int ir_schedule(ir_ctx *ctx)
ir_ref use = *p;
ir_insn *use_insn = &ctx->ir_base[use];
if (!_xlat[use] && ctx->cfg_map[use]) {
- IR_ASSERT(ctx->cfg_map[use] == b);
if (use_insn->op == IR_PARAM
|| use_insn->op == IR_VAR
|| use_insn->op == IR_PI
|| use_insn->op == IR_PHI) {
+ IR_ASSERT(ctx->cfg_map[use] == b);
if (_prev[use] != phis) {
/* remove "use" */
_prev[_next[use]] = _prev[use];
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index 9e3a3a171b4..3e1051ca337 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -949,10 +949,11 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
#define IR_OPND_LABEL_REF 0x3
#define IR_OPND_CONTROL_DEP 0x4
#define IR_OPND_CONTROL_REF 0x5
-#define IR_OPND_STR 0x6
-#define IR_OPND_NUM 0x7
-#define IR_OPND_PROB 0x8
-#define IR_OPND_PROTO 0x9
+#define IR_OPND_CONTROL_GUARD 0x6
+#define IR_OPND_STR 0x7
+#define IR_OPND_NUM 0x8
+#define IR_OPND_PROB 0x9
+#define IR_OPND_PROTO 0xa
#define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \
((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28))
@@ -966,7 +967,7 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
(((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf)
#define IR_IS_REF_OPND_KIND(kind) \
- ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF)
+ ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_GUARD)
IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn)
{
@@ -1223,6 +1224,7 @@ typedef struct _ir_use_pos ir_use_pos;
#define IR_USE_SHOULD_BE_IN_REG (1<<1)
#define IR_DEF_REUSES_OP1_REG (1<<2)
#define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3)
+#define IR_EXTEND_INPUTS_TO_NEXT (1<<4) /* used for SNAPSHOT followed by GUARD */
#define IR_FUSED_USE (1<<6)
#define IR_PHI_USE (1<<7)
diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index aff9aa7bab3..f22e0608378 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -799,6 +799,34 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
continue;
}
+ } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
+ ir_ref next = ir_next_control(ctx, ref);
+ ir_live_pos use_pos;
+
+ IR_ASSERT(insn->op == IR_SNAPSHOT);
+ j = 2;
+ p = insn->ops + 2;
+ for (; j <= insn->inputs_count; j++, p++) {
+ ir_ref input = *p;
+ uint32_t v;
+
+ if (input > 0) {
+ v = ctx->vregs[input];
+ IR_ASSERT(v);
+ use_pos = IR_USE_LIVE_POS_FROM_REF(next);
+ if (!ir_bitset_in(live, v)) {
+ /* live.add(opd) */
+ ir_bitset_incl(live, v);
+ /* intervals[opd].addRange(b.from, op.id) */
+ ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
+ } else {
+ ival = ctx->live_intervals[v];
+ }
+ use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
+ ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
+ }
+ }
+ continue;
}
IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
@@ -1418,6 +1446,34 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
continue;
}
+ } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
+ ir_ref next = ir_next_control(ctx, ref);
+ ir_live_pos use_pos;
+
+ IR_ASSERT(insn->op == IR_SNAPSHOT);
+ j = 2;
+ p = insn->ops + 2;
+ for (; j <= insn->inputs_count; j++, p++) {
+ ir_ref input = *p;
+ uint32_t v;
+
+ if (input > 0) {
+ v = ctx->vregs[input];
+ IR_ASSERT(v);
+ use_pos = IR_USE_LIVE_POS_FROM_REF(next);
+ if (!IS_LIVE_IN_BLOCK(v, b)) {
+ /* live.add(opd) */
+ SET_LIVE_IN_BLOCK(v, b);
+ /* intervals[opd].addRange(b.from, op.id) */
+ ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
+ } else {
+ ival = ctx->live_intervals[v];
+ }
+ use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
+ ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
+ }
+ }
+ continue;
}
IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
@@ -3004,6 +3060,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
{
ir_live_pos nextUsePos[IR_REG_NUM];
ir_live_pos blockPos[IR_REG_NUM];
+ int score, best_score, scores[IR_REG_NUM];
int i, reg;
ir_live_pos pos, next_use_pos;
ir_live_interval *other, *prev;
@@ -3032,6 +3089,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
nextUsePos[i] = 0x7fffffff;
blockPos[i] = 0x7fffffff;
+ scores[i] = 0;
}
} else {
available = IR_REGSET_GP;
@@ -3050,6 +3108,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
nextUsePos[i] = 0x7fffffff;
blockPos[i] = 0x7fffffff;
+ scores[i] = 0;
}
}
@@ -3080,6 +3139,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
if (pos < nextUsePos[reg]) {
nextUsePos[reg] = pos;
+ /* Prefer splitting interval that was already splitted before */
+ scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
}
}
}
@@ -3100,6 +3161,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
IR_REGSET_FOREACH(regset, reg) {
if (overlap < nextUsePos[reg]) {
nextUsePos[reg] = overlap;
+ scores[reg] = 0;
}
if (overlap < blockPos[reg]) {
blockPos[reg] = overlap;
@@ -3113,6 +3175,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
if (overlap < nextUsePos[reg]) {
nextUsePos[reg] = overlap;
+ scores[reg] = 0;
}
if (overlap < blockPos[reg]) {
blockPos[reg] = overlap;
@@ -3122,6 +3185,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
if (pos < nextUsePos[reg]) {
nextUsePos[reg] = pos;
+ /* Prefer splitting interval that was already splitted before */
+ scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
}
}
}
@@ -3141,12 +3206,17 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
/* reg = register with highest nextUsePos */
pos = nextUsePos[reg];
+ best_score = (scores[reg] << 28) + nextUsePos[reg];
tmp_regset = available;
IR_REGSET_EXCL(tmp_regset, reg);
IR_REGSET_FOREACH(tmp_regset, i) {
if (nextUsePos[i] > pos) {
pos = nextUsePos[i];
+ }
+ score = (scores[i] << 28) + nextUsePos[i];
+ if (score > best_score) {
reg = i;
+ best_score = score;
}
} IR_REGSET_FOREACH_END();
diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index 3f1d943c687..8b3f3b5c6b5 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -283,7 +283,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
n = ir_operands_count(ctx, insn);
if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) {
fprintf(f, "/%d", n);
- } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) {
+ } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL || insn->op == IR_ASM) && n != 2) {
fprintf(f, "/%d", n - 2);
} else if (insn->op == IR_PHI && n != 3) {
fprintf(f, "/%d", n - 1);
@@ -321,6 +321,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
case IR_OPND_CONTROL:
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
+ case IR_OPND_CONTROL_GUARD:
fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
first = 0;
break;
@@ -352,6 +353,8 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
+ } else if (opnd_kind == IR_OPND_CONTROL_GUARD) {
+ /* skip */
} else if (j != n &&
(IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) {
fprintf(f, "%snull", first ? "(" : ", ");
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index 921790fd92b..f2b8616e2af 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -609,6 +609,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_val
IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
n = IR_INPUT_EDGES_COUNT(flags);
+ if (insn->op == IR_DIV || insn->op == IR_MOD) {
+ /* skip data-control guard edge */
+ n--;
+ }
for (p = insn->ops + 1; n > 0; p++, n--) {
ir_ref input = *p;
if (input > 0) {
@@ -1419,7 +1423,7 @@ static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_r
if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
use_list = &ctx->use_lists[op2];
}
- if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
+ if (op3 > 0 && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
use_list = &ctx->use_lists[op3];
}
if (use_list) {
@@ -1907,6 +1911,46 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
}
insn->type = type;
+ if (IR_IS_TYPE_SIGNED(type)) {
+ ir_insn *cond = &ctx->ir_base[insn->op1];
+ if (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE) {
+ if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
+ insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MIN : IR_MAX;
+ ir_use_list_remove_one(ctx, insn->op1, ref);
+ ir_bitqueue_add(worklist, insn->op1);
+ insn->op1 = insn->op2;
+ insn->op2 = insn->op3;
+ insn->op3 = IR_UNUSED;
+ } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
+ insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MAX : IR_MIN;
+ ir_use_list_remove_one(ctx, insn->op1, ref);
+ ir_bitqueue_add(worklist, insn->op1);
+ insn->op1 = insn->op2;
+ insn->op2 = insn->op3;
+ insn->op3 = IR_UNUSED;
+ }
+ }
+ } else {
+ IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
+ ir_insn *cond = &ctx->ir_base[insn->op1];
+ if (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE) {
+ if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
+ insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MIN : IR_MAX;
+ ir_use_list_remove_one(ctx, insn->op1, ref);
+ ir_bitqueue_add(worklist, insn->op1);
+ insn->op1 = insn->op2;
+ insn->op2 = insn->op3;
+ insn->op3 = IR_UNUSED;
+ } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
+ insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MAX : IR_MIN;
+ ir_use_list_remove_one(ctx, insn->op1, ref);
+ ir_bitqueue_add(worklist, insn->op1);
+ insn->op1 = insn->op2;
+ insn->op2 = insn->op3;
+ insn->op3 = IR_UNUSED;
+ }
+ }
+ }
return ref;
case IR_PHI:
for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
@@ -1995,7 +2039,7 @@ static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop
const ir_ref *p;
ir_ref n, input, result, ctrl;
- if (IR_IS_CONST_REF(val)) {
+ if (val <= 0) { /* constant or IR_UNUSED */
return 1; /* IR_START */
}
@@ -2129,14 +2173,14 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
- if (use_insn->op1 == phi_ref) {
+ if (use_insn->op1 == op_ref) {
if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
return 0;
}
if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
continue;
}
- } else if (use_insn->op2 == phi_ref) {
+ } else if (use_insn->op2 == op_ref) {
if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
return 0;
}
@@ -2521,6 +2565,52 @@ static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
&& ctx->ir_base[ref].val.u32 == 0;
}
+static bool ir_fix_min_max_const(ir_ctx *ctx, ir_insn *cond, ir_ref ref)
+{
+ if (cond->op == IR_ULE) {
+ /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) => max(x, 4) */
+ /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) => min(x, 4) */
+ if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+ && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+ && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
+ && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+ cond->op2 = ref;
+ return 1;
+ }
+ } else if (cond->op == IR_UGE) {
+ /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) => min(x, 2) */
+ /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) => max(x, 2) */
+ if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+ && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+ && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 + 1
+ && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+ cond->op2 = ref;
+ return 1;
+ }
+ } else if (cond->op == IR_LE) {
+ /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) => max(x, 4) */
+ /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) => min(x, 4) */
+ if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+ && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+ && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
+ && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+ cond->op2 = ref;
+ return 1;
+ }
+ } else if (cond->op == IR_GE) {
+ /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) => min(x, 2) */
+ /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) => max(x, 2) */
+ if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+ && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+ && ctx->ir_base[cond->op2].val.i64 == ctx->ir_base[ref].val.i64 + 1
+ && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+ cond->op2 = ref;
+ return 1;
+ }
+ }
+ return 0;
+}
+
static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
{
IR_ASSERT(insn->inputs_count == 3);
@@ -2560,8 +2650,18 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
}
if (is_cmp
- && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
- || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
+ && ((insn->op2 == cond->op1
+ && (insn->op3 == cond->op2
+ || (IR_IS_CONST_REF(cond->op2)
+ && (IR_IS_CONST_REF(insn->op3)
+ && IR_IS_TYPE_INT(insn->type)
+ && ir_fix_min_max_const(ctx, cond, insn->op3)))))
+ || (insn->op3 == cond->op1
+ && (insn->op2 == cond->op2
+ || (IR_IS_CONST_REF(cond->op2)
+ && (IR_IS_CONST_REF(insn->op2)
+ && IR_IS_TYPE_INT(insn->type)
+ && ir_fix_min_max_const(ctx, cond, insn->op2))))))) {
/* MAX/MIN
*
* prev prev
@@ -2612,14 +2712,14 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
next->op1 = root->op1;
ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
- if (!IR_IS_CONST_REF(insn->op1)) {
- ir_use_list_remove_one(ctx, insn->op1, cond_ref);
- }
- if (!IR_IS_CONST_REF(insn->op2)) {
- ir_use_list_remove_one(ctx, insn->op2, cond_ref);
- }
if (ctx->use_lists[cond_ref].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op1)) {
+ ir_use_list_remove_one(ctx, insn->op1, cond_ref);
+ }
+ if (!IR_IS_CONST_REF(insn->op2)) {
+ ir_use_list_remove_one(ctx, insn->op2, cond_ref);
+ }
MAKE_NOP(cond); CLEAR_USES(cond_ref);
} else {
ir_use_list_remove_one(ctx, cond_ref, root_ref);
@@ -2705,11 +2805,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
next->op1 = root->op1;
ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
ir_use_list_remove_one(ctx, insn->op1, neg_ref);
- if (!IR_IS_CONST_REF(insn->op1)) {
- ir_use_list_remove_one(ctx, insn->op1, cond_ref);
- }
if (ctx->use_lists[cond_ref].count == 1) {
+ if (!IR_IS_CONST_REF(insn->op1)) {
+ ir_use_list_remove_one(ctx, insn->op1, cond_ref);
+ }
MAKE_NOP(cond); CLEAR_USES(cond_ref);
} else {
ir_use_list_remove_one(ctx, cond_ref, root_ref);
@@ -2727,7 +2827,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
}
return 1;
- } else if (insn->op2 <= cond_ref && insn->op3 <= cond_ref
+ } else if (insn->op2 <= root_ref && insn->op3 <= root_ref
&& cond->op != IR_OVERFLOW
// TODO: temporary disable IF-conversion for RLOAD.
// We don't track anti-dependencies in GCM and Local Scheduling.
@@ -3437,6 +3537,13 @@ static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref con
}
}
+ if (condition_insn->op == IR_SHL && IR_IS_CONST_REF(condition_insn->op1)) {
+ ir_insn *val_insn = &ctx->ir_base[condition_insn->op1];
+ if (!IR_IS_SYM_CONST(val_insn->op) && val_insn->val.u64 == 1) {
+ return IR_TRUE;
+ }
+ }
+
while ((condition_insn->op == IR_BITCAST
|| condition_insn->op == IR_ZEXT
|| condition_insn->op == IR_SEXT)
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 9cd41c37ffe..ca42001a881 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -1273,6 +1273,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
const ir_proto_t *proto;
const ir_call_conv_dsc *cc;
+ ir_ref next;
constraints->def_reg = IR_REG_NONE;
constraints->hints_count = 0;
@@ -1345,9 +1346,11 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG;
op2_const:
insn = &ctx->ir_base[ref];
- if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
- constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
- n++;
+ if (IR_IS_CONST_REF(insn->op2)) {
+ if (insn->op1 != insn->op2) {
+ constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+ n++;
+ }
} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
@@ -1712,6 +1715,10 @@ get_arg_hints:
break;
case IR_SNAPSHOT:
flags = 0;
+ next = ir_next_control(ctx, ref);
+ if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) {
+ flags = IR_EXTEND_INPUTS_TO_NEXT;
+ }
break;
case IR_VA_START:
flags = IR_OP2_MUST_BE_IN_REG;
@@ -3078,10 +3085,6 @@ store_int:
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
- // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-//??? && (insn->op2 == ref - 1 ||
-//??? (insn->op2 == ctx->prev_ref[ref] - 1
-//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (IR_IS_CONST_REF(op2_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op)
@@ -3262,6 +3265,12 @@ store_int:
return IR_FUSED | IR_ARGVAL;
case IR_NOP:
return IR_SKIPPED | IR_NOP;
+ case IR_ASM:
+ case IR_ASM_OUT:
+ case IR_ASM_GOTO:
+ fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n");
+ exit(1);
+ return IR_SKIPPED | IR_NOP;
default:
break;
}
@@ -9429,7 +9438,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
| .aword &addr
- if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
+ if (ctx->ir_base[bb->start].op1 == def
+ && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
bb->flags |= IR_BB_EMPTY;
}
continue;