Commit 8fa692b0898 for php.net

commit 8fa692b0898341dfc47667c25d8e648c5d611129
Author: Dmitry Stogov <dmitry@php.net>
Date:   Tue May 12 13:57:41 2026 +0300

    Update IR (#22019)

    IR commit: d2439a528cfb0c4b1607a7771f7889bbe8d35814

diff --git a/ext/opcache/jit/ir/.gitignore b/ext/opcache/jit/ir/.gitignore
index 7a37a4fd059..367a68671bc 100644
--- a/ext/opcache/jit/ir/.gitignore
+++ b/ext/opcache/jit/ir/.gitignore
@@ -20,3 +20,6 @@ tests/**/*.log

 win32/vcpkg
 win32/build_*
+
+fuzz/build/
+fuzz/corpus/
diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index a02332e0d39..f6a0cb60af9 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -161,6 +161,8 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
 		case IR_CHAR:
 			if (insn->val.c == '\\') {
 				fprintf(f, "'\\\\'");
+			} else if (insn->val.c == '\'') {
+				fprintf(f, "'\\\''");
 			} else if (insn->val.c >= ' ') {
 				fprintf(f, "'%c'", insn->val.c);
 			} else if (insn->val.c == '\t') {
@@ -283,6 +285,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
 #define ir_op_kind_src     IR_OPND_CONTROL
 #define ir_op_kind_reg     IR_OPND_CONTROL_DEP
 #define ir_op_kind_ret     IR_OPND_CONTROL_REF
+#define ir_op_kind_grd     IR_OPND_CONTROL_GUARD
 #define ir_op_kind_str     IR_OPND_STR
 #define ir_op_kind_num     IR_OPND_NUM
 #define ir_op_kind_fld     IR_OPND_STR
@@ -1843,7 +1846,7 @@ int ir_mem_unprotect(void *ptr, size_t size)

 int ir_mem_flush(void *ptr, size_t size)
 {
-	return 1;
+	return FlushInstructionCache(GetCurrentProcess(), ptr, size) == TRUE ? 1 : 0;
 }
 #else

@@ -2168,7 +2171,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, i
 			if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
 				break;
 			}
-		} else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_VSTORE) {
+		} else if (insn->op == IR_MERGE
+				|| insn->op == IR_LOOP_BEGIN
+				|| insn->op == IR_VSTORE
+				|| (insn->op == IR_BEGIN && insn->op2)) {
 			return IR_UNUSED;
 		}
 		ref = insn->op1;
@@ -2233,7 +2239,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref,
 			if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) {
 				break;
 			}
-		} else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_STORE) {
+		} else if (insn->op == IR_MERGE
+				|| insn->op == IR_LOOP_BEGIN
+				|| insn->op == IR_STORE
+				|| (insn->op == IR_BEGIN && insn->op2)) {
 			break;
 		}
 		ref = insn->op1;
@@ -2326,7 +2335,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_store_i(ir_ctx *ctx, ir_ref ref, ir_ref
 			}
 		} else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
 			guarded = 1;
-		} else if (insn->op >= IR_START || insn->op == IR_CALL) {
+		} else if (insn->op >= IR_START) {
+			if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) {
+				/* skip END */
+				ref = insn->op1;
+				insn = &ctx->ir_base[ref];
+			} else {
+				break;
+			}
+		} else if (insn->op == IR_CALL) {
 			break;
 		}
 		next = ref;
@@ -2407,7 +2424,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vstore_i(ir_ctx *ctx, ir_ref ref, ir_re
 			}
 		} else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
 			guarded = 1;
-		} else if (insn->op >= IR_START || insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) {
+		} else if (insn->op >= IR_START) {
+			if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) {
+				/* skip END */
+				ref = insn->op1;
+				insn = &ctx->ir_base[ref];
+			} else {
+				break;
+			}
+		} else if (insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) {
 			break;
 		}
 		next = ref;
@@ -2422,6 +2447,37 @@ ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref var, ir_ref val)
 }

 /* IR Construction API */
+static ir_ref ir_last_guard(ir_ctx *ctx)
+{
+	ir_ref ref;
+	ir_insn *insn;
+
+	IR_ASSERT(ctx->control);
+	ref = ctx->control;
+	while (1) {
+		insn = &ctx->ir_base[ref];
+		if (IR_IS_BB_START(insn->op) || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
+			if (insn->op == IR_START) ref = IR_UNUSED;
+			break;
+		}
+		ref = insn->op1;
+	}
+	return ref;
+}
+
+ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2)
+{
+	ir_ref guard = (IR_IS_TYPE_FP(type) || (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0)) ?
+		IR_UNUSED : ir_last_guard(ctx);
+	return ir_fold3(ctx, IR_OPT(IR_DIV, type), op1, op2, guard);
+}
+
+ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2)
+{
+	ir_ref guard = (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0) ?
+		IR_UNUSED : ir_last_guard(ctx);
+	return ir_fold3(ctx, IR_OPT(IR_MOD, type), op1, op2, guard);
+}

 ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num)
 {
diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index b0a96b511bd..01db4ecf6b1 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -210,6 +210,7 @@ typedef enum _ir_type {
  * arg - argument reference CALL/TAILCALL/CARG->CARG
  * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN)
  * reg - data-control dependency on region (PHI, VAR, PARAM)
+ * grd - optional data-control dependency guard (DIV, MOD)
  * ret - reference to a previous RETURN instruction (RETURN)
  * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
  * num - number: argument number (PARAM)
@@ -265,8 +266,8 @@ typedef enum _ir_type {
 	_(ADD,          d2C,  def, def, ___) /* addition                    */ \
 	_(SUB,          d2,   def, def, ___) /* subtraction (must be ADD+1) */ \
 	_(MUL,          d2C,  def, def, ___) /* multiplication              */ \
-	_(DIV,          d2,   def, def, ___) /* division                    */ \
-	_(MOD,          d2,   def, def, ___) /* modulo                      */ \
+	_(DIV,          d3,   def, def, grd) /* division                    */ \
+	_(MOD,          d3,   def, def, grd) /* modulo                      */ \
 	_(NEG,          d1,   def, ___, ___) /* change sign                 */ \
 	_(ABS,          d1,   def, ___, ___) /* absolute value              */ \
 	/* (LDEXP, MIN, MAX, FPMATH)                                        */ \
@@ -383,6 +384,14 @@ typedef enum _ir_type {
 	_(RETURN,       T2X1, src, def, ret) /* function return             */ \
 	_(UNREACHABLE,  T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
 	\
+	/* inline assembler                                                 */ \
+	_(ASM,          xN,   src, def, def) /* GCC inline assembler        */ \
+	                                     /* op2 - asm template string   */ \
+	                                     /* op3 - asm constraint string */ \
+	                                     /* opN - asm input argument    */ \
+	_(ASM_OUT,      x1,   src, ___, ___) /* ASM data output projection  */ \
+	_(ASM_GOTO,     E1,   src, ___, ___) /* ASM goto (bb end after ASM) */ \
+	\
 	/* deoptimization helper                                            */ \
 	_(EXITCALL,     x2,   src, def, ___) /* save CPU regs and call op2  */ \

diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index bdf6b027b9f..fc4bb84f1e0 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -402,6 +402,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
 	const ir_proto_t *proto;
 	const ir_call_conv_dsc *cc;
+	ir_ref next;

 	constraints->def_reg = IR_REG_NONE;
 	constraints->hints_count = 0;
@@ -562,11 +563,13 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 				constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
 				n = 1;
 			}
-			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
-				insn = &ctx->ir_base[insn->op2];
-				if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
-					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
-					n++;
+			if (IR_IS_CONST_REF(insn->op2)) {
+				if (insn->op1 != insn->op2) {
+					insn = &ctx->ir_base[insn->op2];
+					if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
+						constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+						n++;
+					}
 				}
 			} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
 				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
@@ -751,6 +754,10 @@ get_arg_hints:
 			break;
 		case IR_SNAPSHOT:
 			flags = 0;
+			next = ir_next_control(ctx, ref);
+			if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) {
+				flags = IR_EXTEND_INPUTS_TO_NEXT;
+			}
 			break;
 		case IR_VA_START:
 			flags = IR_OP2_MUST_BE_IN_REG;
@@ -1199,10 +1206,6 @@ binop_fp:
 			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
 				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
-					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-//???				 && (insn->op2 == ref - 1 ||
-//???				     (insn->op2 == ctx->prev_ref[ref] - 1
-//???				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
 						return IR_GUARD_CMP_INT;
@@ -1265,6 +1268,12 @@ binop_fp:
 			return IR_FUSED | IR_ARGVAL;
 		case IR_NOP:
 			return IR_SKIPPED | IR_NOP;
+		case IR_ASM:
+		case IR_ASM_OUT:
+		case IR_ASM_GOTO:
+			fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n");
+			exit(1);
+			return IR_SKIPPED | IR_NOP;
 		default:
 			break;
 	}
@@ -4996,7 +5005,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 						void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);

 						|	.addr &addr
-						if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
+						if (ctx->ir_base[bb->start].op1 == def
+						 && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
 							bb->flags |= IR_BB_EMPTY;
 						}
 						continue;
diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h
index 084216a0634..9492945b136 100644
--- a/ext/opcache/jit/ir/ir_builder.h
+++ b/ext/opcache/jit/ir/ir_builder.h
@@ -118,31 +118,31 @@ extern "C" {
 #define ir_MUL_D(_op1, _op2)              ir_BINARY_OP_D(IR_MUL, (_op1), (_op2))
 #define ir_MUL_F(_op1, _op2)              ir_BINARY_OP_F(IR_MUL, (_op1), (_op2))

-#define ir_DIV(_type, _op1, _op2)         ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2))
-#define ir_DIV_U8(_op1, _op2)             ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U16(_op1, _op2)            ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U32(_op1, _op2)            ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2))
-#define ir_DIV_U64(_op1, _op2)            ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2))
-#define ir_DIV_A(_op1, _op2)              ir_BINARY_OP_A(IR_DIV, (_op1), (_op2))
-#define ir_DIV_C(_op1, _op2)              ir_BINARY_OP_C(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I8(_op1, _op2)             ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I16(_op1, _op2)            ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I32(_op1, _op2)            ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2))
-#define ir_DIV_I64(_op1, _op2)            ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2))
+#define ir_DIV(_type, _op1, _op2)         _ir_DIV(_ir_CTX, (_type), (_op1), (_op2))
+#define ir_DIV_U8(_op1, _op2)             ir_DIV(IR_U8, (_op1), (_op2))
+#define ir_DIV_U16(_op1, _op2)            ir_DIV(IR_U16, (_op1), (_op2))
+#define ir_DIV_U32(_op1, _op2)            ir_DIV(IR_U32, (_op1), (_op2))
+#define ir_DIV_U64(_op1, _op2)            ir_DIV(IR_U64, (_op1), (_op2))
+#define ir_DIV_A(_op1, _op2)              ir_DIV(IR_ADDR, (_op1), (_op2))
+#define ir_DIV_C(_op1, _op2)              ir_DIV(IR_CHAR, (_op1), (_op2))
+#define ir_DIV_I8(_op1, _op2)             ir_DIV(IR_I8, (_op1), (_op2))
+#define ir_DIV_I16(_op1, _op2)            ir_DIV(IR_I16, (_op1), (_op2))
+#define ir_DIV_I32(_op1, _op2)            ir_DIV(IR_I32, (_op1), (_op2))
+#define ir_DIV_I64(_op1, _op2)            ir_DIV(IR_I64, (_op1), (_op2))
 #define ir_DIV_D(_op1, _op2)              ir_BINARY_OP_D(IR_DIV, (_op1), (_op2))
 #define ir_DIV_F(_op1, _op2)              ir_BINARY_OP_F(IR_DIV, (_op1), (_op2))

-#define ir_MOD(_type, _op1, _op2)         ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2))
-#define ir_MOD_U8(_op1, _op2)             ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U16(_op1, _op2)            ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U32(_op1, _op2)            ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2))
-#define ir_MOD_U64(_op1, _op2)            ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2))
-#define ir_MOD_A(_op1, _op2)              ir_BINARY_OP_A(IR_MOD, (_op1), (_op2))
-#define ir_MOD_C(_op1, _op2)              ir_BINARY_OP_C(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I8(_op1, _op2)             ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I16(_op1, _op2)            ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I32(_op1, _op2)            ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2))
-#define ir_MOD_I64(_op1, _op2)            ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2))
+#define ir_MOD(_type, _op1, _op2)         _ir_MOD(_ir_CTX, (_type), (_op1), (_op2))
+#define ir_MOD_U8(_op1, _op2)             ir_MOD(IR_U8, (_op1), (_op2))
+#define ir_MOD_U16(_op1, _op2)            ir_MOD(IR_U16, (_op1), (_op2))
+#define ir_MOD_U32(_op1, _op2)            ir_MOD(IR_U32, (_op1), (_op2))
+#define ir_MOD_U64(_op1, _op2)            ir_MOD(IR_U64, (_op1), (_op2))
+#define ir_MOD_A(_op1, _op2)              ir_MOD(IR_ADDR, (_op1), (_op2))
+#define ir_MOD_C(_op1, _op2)              ir_MOD(IR_CHAR, (_op1), (_op2))
+#define ir_MOD_I8(_op1, _op2)             ir_MOD(IR_I8, (_op1), (_op2))
+#define ir_MOD_I16(_op1, _op2)            ir_MOD(IR_I16, (_op1), (_op2))
+#define ir_MOD_I32(_op1, _op2)            ir_MOD(IR_I32, (_op1), (_op2))
+#define ir_MOD_I64(_op1, _op2)            ir_MOD(IR_I64, (_op1), (_op2))

 #define ir_NEG(_type, _op1)               ir_UNARY_OP(IR_NEG, (_type), (_op1))
 #define ir_NEG_C(_op1)                    ir_UNARY_OP_C(IR_NEG, (_op1))
@@ -633,6 +633,8 @@ extern "C" {
 #define ir_MERGE_WITH_EMPTY_TRUE(_if)     do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0)
 #define ir_MERGE_WITH_EMPTY_FALSE(_if)    do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0)

+ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2);
+ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2);
 ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset);
 ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2);
 ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs);
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 40041004c56..92042ea8cbb 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -1502,6 +1502,23 @@ static bool ir_is_merged_loop_back_edge(ir_ctx *ctx, uint32_t hdr, uint32_t b)
 }
 #endif

+static bool ir_should_align_loop(ir_ctx *ctx, ir_chain *chains, uint32_t b, ir_block *bb)
+{
+	uint32_t n = bb->predecessors_count;
+	uint32_t *p = ctx->cfg_edges + bb->predecessors;
+
+	for (; n > 0; p++, n--) {
+		uint32_t pred = *p;
+		if (chains[pred].head) {
+			if (ir_chain_head(chains, pred) == b) return 1;
+		} else {
+			if (ir_should_align_loop(ctx, chains, b, &ctx->cfg_blocks[pred])) return 1;
+		}
+	}
+
+	return 0;
+}
+
 static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
 {
 	uint32_t max_edges_count = ctx->cfg_edges_count / 2;
@@ -1862,7 +1879,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
 		if (chains[b].head == b) {
 			bb = &ctx->cfg_blocks[b];
 			if (bb->loop_depth) {
-				if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
+				if (ir_should_align_loop(ctx, chains, b, bb)) {
 					bb->flags |= IR_BB_ALIGN_LOOP;
 				}
 			}
diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c
index ee951291b1b..e1be7f6544d 100644
--- a/ext/opcache/jit/ir/ir_check.c
+++ b/ext/opcache/jit/ir/ir_check.c
@@ -148,6 +148,12 @@ bool ir_check(const ir_ctx *ctx)
 	bool ok = 1;
 	ir_check_ctx check_ctx;

+	if (ctx->insns_count < 1 || ctx->ir_base[1].op != IR_START) {
+		fprintf(stderr, "ir_base[1].op invalid opcode (%d)\n",
+			(ctx->insns_count < 1) ? IR_NOP : ctx->ir_base[0].op);
+		ok = 0;
+	}
+
 	check_ctx.arena = NULL;
 	check_ctx.use_set = NULL;
 	check_ctx.input_set = NULL;
@@ -297,6 +303,14 @@ bool ir_check(const ir_ctx *ctx)
 								ok = 0;
 							}
 							break;
+						case IR_OPND_CONTROL_GUARD:
+							if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_START)
+							 && use_insn->op != IR_GUARD
+							 && use_insn->op != IR_GUARD_NOT) {
+								fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_START or GUARD\n", i, j, use);
+								ok = 0;
+							}
+							break;
 						default:
 							fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use);
 							ok = 0;
@@ -306,6 +320,8 @@ bool ir_check(const ir_ctx *ctx)
 				/* pass (function returns void) */
 			} else if (insn->op == IR_BEGIN && j == 1) {
 				/* pass (start of unreachable basic block) */
+			} else if (IR_OPND_KIND(flags, j) == IR_OPND_CONTROL_GUARD) {
+				/* reference to control guard is optional */
 			} else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF
 					&& (insn->op != IR_SNAPSHOT || j == 1)) {
 				fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use);
@@ -413,6 +429,7 @@ bool ir_check(const ir_ctx *ctx)
 						}
 						break;
 					case IR_IGOTO:
+					case IR_ASM_GOTO:
 						break;
 					default:
 						/* skip data references */
@@ -464,6 +481,10 @@ bool ir_check(const ir_ctx *ctx)
 //	if (!ok) {
 //		ir_dump_codegen(ctx, stderr);
 //	}
+
+#ifndef IR_CHECK_NO_ABORT
 	IR_ASSERT(ok);
+#endif
+
 	return ok;
 }
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index 037003f021a..3b34294d1c7 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -142,6 +142,7 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE
 						break;
 					case IR_OPND_CONTROL_DEP:
 					case IR_OPND_CONTROL_REF:
+					case IR_OPND_CONTROL_GUARD:
 						fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT);
 						break;
 					case IR_OPND_LABEL_REF:
@@ -650,6 +651,7 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
 						case IR_OPND_CONTROL:
 						case IR_OPND_CONTROL_DEP:
 						case IR_OPND_CONTROL_REF:
+						case IR_OPND_CONTROL_GUARD:
 							fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
 							first = 0;
 							break;
@@ -680,6 +682,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
 				} else if (opnd_kind == IR_OPND_NUM) {
 					fprintf(f, "%s%d", first ? "(" : ", ", ref);
 					first = 0;
+				} else if (opnd_kind == IR_OPND_CONTROL_GUARD) {
+					/* skip */
 				} else if (j != n &&
 						(IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) {
 					fprintf(f, "%snull", first ? "(" : ", ");
diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index 136bbb0e08e..cbe049be932 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -1679,44 +1679,6 @@ IR_FOLD(EQ(SEXT, C_I16))
 IR_FOLD(EQ(SEXT, C_I32))
 IR_FOLD(EQ(SEXT, C_I64))
 IR_FOLD(EQ(SEXT, C_ADDR))
-{
-	if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) {
-		/* pass */
-	} else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) {
-		opt = IR_OPT(IR_NOT, IR_BOOL);
-		op1 = op1_insn->op1;
-		op2 = IR_UNUSED;
-		IR_FOLD_RESTART;
-	} else {
-		ir_type type = ctx->ir_base[op1_insn->op1].type;
-
-		if (op1_insn->op == IR_ZEXT
-		 && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) {
-			IR_FOLD_NEXT;
-		}
-		if (IR_IS_TYPE_SIGNED(type)) {
-			switch (ir_type_size[type]) {
-				case 1:  val.i64 = op2_insn->val.i8;  break;
-				case 2:  val.i64 = op2_insn->val.i16; break;
-				case 4:  val.i64 = op2_insn->val.i32; break;
-				default: val.u64 = op2_insn->val.u64; break;
-			 }
-	    } else {
-			switch (ir_type_size[type]) {
-				case 1:  val.u64 = op2_insn->val.u8;  break;
-				case 2:  val.u64 = op2_insn->val.u16; break;
-				case 4:  val.u64 = op2_insn->val.u32; break;
-				default: val.u64 = op2_insn->val.u64; break;
-			 }
-		}
-		op1 = op1_insn->op1;
-		op2 = ir_const(ctx, val, type);
-		IR_FOLD_RESTART;
-	}
-
-	IR_FOLD_NEXT;
-}
-
 IR_FOLD(NE(ZEXT, C_U16))
 IR_FOLD(NE(ZEXT, C_U32))
 IR_FOLD(NE(ZEXT, C_U64))
@@ -1731,16 +1693,93 @@ IR_FOLD(NE(SEXT, C_I16))
 IR_FOLD(NE(SEXT, C_I32))
 IR_FOLD(NE(SEXT, C_I64))
 IR_FOLD(NE(SEXT, C_ADDR))
-{
-	if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) {
+IR_FOLD(ULT(ZEXT, C_U16))
+IR_FOLD(ULT(ZEXT, C_U32))
+IR_FOLD(ULT(ZEXT, C_U64))
+IR_FOLD(ULT(ZEXT, C_I16))
+IR_FOLD(ULT(ZEXT, C_I32))
+IR_FOLD(ULT(ZEXT, C_I64))
+IR_FOLD(ULT(ZEXT, C_ADDR))
+IR_FOLD(UGE(ZEXT, C_U16))
+IR_FOLD(UGE(ZEXT, C_U32))
+IR_FOLD(UGE(ZEXT, C_U64))
+IR_FOLD(UGE(ZEXT, C_I16))
+IR_FOLD(UGE(ZEXT, C_I32))
+IR_FOLD(UGE(ZEXT, C_I64))
+IR_FOLD(UGE(ZEXT, C_ADDR))
+IR_FOLD(ULE(ZEXT, C_U16))
+IR_FOLD(ULE(ZEXT, C_U32))
+IR_FOLD(ULE(ZEXT, C_U64))
+IR_FOLD(ULE(ZEXT, C_I16))
+IR_FOLD(ULE(ZEXT, C_I32))
+IR_FOLD(ULE(ZEXT, C_I64))
+IR_FOLD(ULE(ZEXT, C_ADDR))
+IR_FOLD(UGT(ZEXT, C_U16))
+IR_FOLD(UGT(ZEXT, C_U32))
+IR_FOLD(UGT(ZEXT, C_U64))
+IR_FOLD(UGT(ZEXT, C_I16))
+IR_FOLD(UGT(ZEXT, C_I32))
+IR_FOLD(UGT(ZEXT, C_I64))
+IR_FOLD(UGT(ZEXT, C_ADDR))
+IR_FOLD(LT(SEXT, C_U16))
+IR_FOLD(LT(SEXT, C_U32))
+IR_FOLD(LT(SEXT, C_U64))
+IR_FOLD(LT(SEXT, C_I16))
+IR_FOLD(LT(SEXT, C_I32))
+IR_FOLD(LT(SEXT, C_I64))
+IR_FOLD(LT(SEXT, C_ADDR))
+IR_FOLD(GE(SEXT, C_U16))
+IR_FOLD(GE(SEXT, C_U32))
+IR_FOLD(GE(SEXT, C_U64))
+IR_FOLD(GE(SEXT, C_I16))
+IR_FOLD(GE(SEXT, C_I32))
+IR_FOLD(GE(SEXT, C_I64))
+IR_FOLD(GE(SEXT, C_ADDR))
+IR_FOLD(LE(SEXT, C_U16))
+IR_FOLD(LE(SEXT, C_U32))
+IR_FOLD(LE(SEXT, C_U64))
+IR_FOLD(LE(SEXT, C_I16))
+IR_FOLD(LE(SEXT, C_I32))
+IR_FOLD(LE(SEXT, C_I64))
+IR_FOLD(LE(SEXT, C_ADDR))
+IR_FOLD(GT(SEXT, C_U16))
+IR_FOLD(GT(SEXT, C_U32))
+IR_FOLD(GT(SEXT, C_U64))
+IR_FOLD(GT(SEXT, C_I16))
+IR_FOLD(GT(SEXT, C_I32))
+IR_FOLD(GT(SEXT, C_I64))
+IR_FOLD(GT(SEXT, C_ADDR))
+{
+	if (ctx->use_lists && ctx->use_lists[op1].count != 1) {
 		/* pass */
-	} else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) {
-		IR_FOLD_COPY(op1_insn->op1);
 	} else {
 		ir_type type = ctx->ir_base[op1_insn->op1].type;

-		if (op1_insn->op == IR_ZEXT
-		 && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) {
+		if (type == IR_BOOL && op2_insn->val.u64 == 0) {
+			if ((opt & IR_OPT_OP_MASK) == IR_EQ) {
+				opt = IR_OPT(IR_NOT, IR_BOOL);
+				op1 = op1_insn->op1;
+				op2 = IR_UNUSED;
+				IR_FOLD_RESTART;
+			} else if ((opt & IR_OPT_OP_MASK) == IR_NE) {
+				IR_FOLD_COPY(op1_insn->op1);
+			}
+		}
+		if ((op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0
+		 && (op1_insn->op != IR_SEXT || (op2_insn->val.i64 >> (ir_type_size[type] * 8)) != -1)) {
+			if ((opt & IR_OPT_OP_MASK) == IR_EQ
+			 || (opt & IR_OPT_OP_MASK) == IR_UGT
+			 || (opt & IR_OPT_OP_MASK) == IR_UGE) {
+				IR_FOLD_COPY(IR_FALSE);
+			} else if ((opt & IR_OPT_OP_MASK) == IR_NE
+			 || (opt & IR_OPT_OP_MASK) == IR_ULT
+			 || (opt & IR_OPT_OP_MASK) == IR_ULE) {
+				IR_FOLD_COPY(IR_TRUE);
+			} else if ((opt & IR_OPT_OP_MASK) == IR_GT || (opt & IR_OPT_OP_MASK) == IR_GE) {
+				IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_FALSE : IR_TRUE);
+			} else if ((opt & IR_OPT_OP_MASK) == IR_LT || (opt & IR_OPT_OP_MASK) == IR_LE) {
+				IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_TRUE : IR_FALSE);
+			}
 			IR_FOLD_NEXT;
 		}
 		if (IR_IS_TYPE_SIGNED(type)) {
@@ -1765,6 +1804,43 @@ IR_FOLD(NE(SEXT, C_ADDR))
 	IR_FOLD_NEXT;
 }

+IR_FOLD(EQ(ZEXT, ZEXT))
+IR_FOLD(NE(ZEXT, ZEXT))
+IR_FOLD(ULT(ZEXT, ZEXT))
+IR_FOLD(UGE(ZEXT, ZEXT))
+IR_FOLD(ULE(ZEXT, ZEXT))
+IR_FOLD(UGT(ZEXT, ZEXT))
+IR_FOLD(EQ(SEXT, SEXT))
+IR_FOLD(NE(SEXT, SEXT))
+IR_FOLD(LT(SEXT, SEXT))
+IR_FOLD(GE(SEXT, SEXT))
+IR_FOLD(LE(SEXT, SEXT))
+IR_FOLD(GT(SEXT, SEXT))
+{
+	if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type
+	 && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) {
+		op1 = op1_insn->op1;
+		op2 = op2_insn->op1;
+		IR_FOLD_RESTART;
+	}
+	IR_FOLD_NEXT;
+}
+
+IR_FOLD(LT(ZEXT, ZEXT))
+IR_FOLD(GE(ZEXT, ZEXT))
+IR_FOLD(LE(ZEXT, ZEXT))
+IR_FOLD(GT(ZEXT, ZEXT))
+{
+	if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type
+	 && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) {
+		op1 = op1_insn->op1;
+		op2 = op2_insn->op1;
+		opt += 4; /* LT -> ULT, ... */
+		IR_FOLD_RESTART;
+	}
+	IR_FOLD_NEXT;
+}
+
 IR_FOLD(NOT(EQ))
 IR_FOLD(NOT(NE))
 IR_FOLD(NOT(LT))
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index 1b45eb834ce..b194eeb8177 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -148,7 +148,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
 }

 #if IR_GCM_SPLIT
-/* Partially Dead Code Elimination through splitting the node and sunking the clones
+/* Partially Dead Code Elimination through splitting the node and sinking the clones
  *
  * This code is based on the Benedikt Meurer's idea first implemented in V8.
  * See: https://codereview.chromium.org/899433005
@@ -309,6 +309,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 	struct {
 		ir_ref   ref;
 		uint32_t block;
+		uint32_t lca;
 		uint32_t use_count;
 		uint32_t use;
 	} *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
@@ -344,8 +345,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 						clone = clones_count++;
 						ir_hashtab_add(&hash, j, clone);
 						clones[clone].block = j;
+						clones[clone].lca = i;
 						clones[clone].use_count = 0;
 						clones[clone].use = (uint32_t)-1;
+					} else {
+						clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i);
 					}
 					uses[uses_count].ref = use;
 					uses[uses_count].block = i;
@@ -367,8 +371,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 				clone = clones_count++;
 				ir_hashtab_add(&hash, j, clone);
 				clones[clone].block = j;
+				clones[clone].lca = i;
 				clones[clone].use_count = 0;
 				clones[clone].use = -1;
+			} else {
+				clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i);
 			}
 			uses[uses_count].ref = use;
 			uses[uses_count].block = i;
@@ -378,6 +385,42 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 		}
 	}

+	/* Select best blocks to insert clones */
+	for (i = 0; i < clones_count; i++) {
+		uint32_t b0 = clones[i].block;
+		uint32_t lca = clones[i].lca;
+
+		if (b0 != lca) {
+			ir_block *bb = &ctx->cfg_blocks[lca];
+			uint32_t loop_depth = bb->loop_depth;
+
+			if (loop_depth) {
+				uint32_t b;
+				uint32_t best;
+
+				best = b = lca;
+				do {
+					b = bb->dom_parent;
+					bb = &ctx->cfg_blocks[b];
+					if (bb->loop_depth < loop_depth) {
+						if (!bb->loop_depth) {
+							best = b;
+							break;
+						}
+						loop_depth = bb->loop_depth;
+						best = b;
+					}
+				} while (b != b0);
+				lca = best;
+			}
+			clones[i].block = lca;
+		}
+	}
+
+	// TODO: instead of inserting clone into the block where the expressin is partially available,
+	//       we should insert PHI and the actual clones into the block sources where it's not available
+	//       (similar to SSAPRE)
+
 #ifdef IR_DEBUG
 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
 		for (i = 0; i < clones_count; i++) {
@@ -1170,11 +1213,11 @@ int ir_schedule(ir_ctx *ctx)
 					ir_ref use = *p;
 					ir_insn *use_insn = &ctx->ir_base[use];
 					if (!_xlat[use] && ctx->cfg_map[use]) {
-						IR_ASSERT(ctx->cfg_map[use] == b);
 						if (use_insn->op == IR_PARAM
 						 || use_insn->op == IR_VAR
 						 || use_insn->op == IR_PI
 						 || use_insn->op == IR_PHI) {
+							IR_ASSERT(ctx->cfg_map[use] == b);
 							if (_prev[use] != phis) {
 								/* remove "use" */
 								_prev[_next[use]] = _prev[use];
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index 9e3a3a171b4..3e1051ca337 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -949,10 +949,11 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
 #define IR_OPND_LABEL_REF         0x3
 #define IR_OPND_CONTROL_DEP       0x4
 #define IR_OPND_CONTROL_REF       0x5
-#define IR_OPND_STR               0x6
-#define IR_OPND_NUM               0x7
-#define IR_OPND_PROB              0x8
-#define IR_OPND_PROTO             0x9
+#define IR_OPND_CONTROL_GUARD     0x6
+#define IR_OPND_STR               0x7
+#define IR_OPND_NUM               0x8
+#define IR_OPND_PROB              0x9
+#define IR_OPND_PROTO             0xa

 #define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \
 	((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28))
@@ -966,7 +967,7 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
 	(((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf)

 #define IR_IS_REF_OPND_KIND(kind) \
-	((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF)
+	((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_GUARD)

 IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn)
 {
@@ -1223,6 +1224,7 @@ typedef struct _ir_use_pos       ir_use_pos;
 #define IR_USE_SHOULD_BE_IN_REG          (1<<1)
 #define IR_DEF_REUSES_OP1_REG            (1<<2)
 #define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3)
+#define IR_EXTEND_INPUTS_TO_NEXT         (1<<4) /* used for SNAPSHOT followed by GUARD */

 #define IR_FUSED_USE                     (1<<6)
 #define IR_PHI_USE                       (1<<7)
diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index aff9aa7bab3..f22e0608378 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -799,6 +799,34 @@ int ir_compute_live_ranges(ir_ctx *ctx)
 					ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
 					continue;
 				}
+			} else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
+				ir_ref next = ir_next_control(ctx, ref);
+				ir_live_pos use_pos;
+
+				IR_ASSERT(insn->op == IR_SNAPSHOT);
+				j = 2;
+				p = insn->ops + 2;
+				for (; j <= insn->inputs_count; j++, p++) {
+					ir_ref input = *p;
+					uint32_t v;
+
+					if (input > 0) {
+						v = ctx->vregs[input];
+						IR_ASSERT(v);
+						use_pos = IR_USE_LIVE_POS_FROM_REF(next);
+						if (!ir_bitset_in(live, v)) {
+							/* live.add(opd) */
+							ir_bitset_incl(live, v);
+							/* intervals[opd].addRange(b.from, op.id) */
+							ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
+						} else {
+							ival = ctx->live_intervals[v];
+						}
+						use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
+						ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
+					}
+				}
+				continue;
 			}

 			IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
@@ -1418,6 +1446,34 @@ int ir_compute_live_ranges(ir_ctx *ctx)
 					ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
 					continue;
 				}
+			} else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
+				ir_ref next = ir_next_control(ctx, ref);
+				ir_live_pos use_pos;
+
+				IR_ASSERT(insn->op == IR_SNAPSHOT);
+				j = 2;
+				p = insn->ops + 2;
+				for (; j <= insn->inputs_count; j++, p++) {
+					ir_ref input = *p;
+					uint32_t v;
+
+					if (input > 0) {
+						v = ctx->vregs[input];
+						IR_ASSERT(v);
+						use_pos = IR_USE_LIVE_POS_FROM_REF(next);
+						if (!IS_LIVE_IN_BLOCK(v, b)) {
+							/* live.add(opd) */
+							SET_LIVE_IN_BLOCK(v, b);
+							/* intervals[opd].addRange(b.from, op.id) */
+							ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
+						} else {
+							ival = ctx->live_intervals[v];
+						}
+						use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
+						ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
+					}
+				}
+				continue;
 			}

 			IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
@@ -3004,6 +3060,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 {
 	ir_live_pos nextUsePos[IR_REG_NUM];
 	ir_live_pos blockPos[IR_REG_NUM];
+	int score, best_score, scores[IR_REG_NUM];
 	int i, reg;
 	ir_live_pos pos, next_use_pos;
 	ir_live_interval *other, *prev;
@@ -3032,6 +3089,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 		for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
 			nextUsePos[i] = 0x7fffffff;
 			blockPos[i] = 0x7fffffff;
+			scores[i] = 0;
 		}
 	} else {
 		available = IR_REGSET_GP;
@@ -3050,6 +3108,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 		for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
 			nextUsePos[i] = 0x7fffffff;
 			blockPos[i] = 0x7fffffff;
+			scores[i] = 0;
 		}
 	}

@@ -3080,6 +3139,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 					IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
 				if (pos < nextUsePos[reg]) {
 					nextUsePos[reg] = pos;
+						/* Prefer splitting interval that was already splitted before */
+					scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
 				}
 			}
 		}
@@ -3100,6 +3161,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 				IR_REGSET_FOREACH(regset, reg) {
 					if (overlap < nextUsePos[reg]) {
 						nextUsePos[reg] = overlap;
+						scores[reg] = 0;
 					}
 					if (overlap < blockPos[reg]) {
 						blockPos[reg] = overlap;
@@ -3113,6 +3175,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 				if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
 					if (overlap < nextUsePos[reg]) {
 						nextUsePos[reg] = overlap;
+						scores[reg] = 0;
 					}
 					if (overlap < blockPos[reg]) {
 						blockPos[reg] = overlap;
@@ -3122,6 +3185,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 						IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
 					if (pos < nextUsePos[reg]) {
 						nextUsePos[reg] = pos;
+						/* Prefer splitting interval that was already splitted before */
+						scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
 					}
 				}
 			}
@@ -3141,12 +3206,17 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li

 	/* reg = register with highest nextUsePos */
 	pos = nextUsePos[reg];
+	best_score = (scores[reg] << 28) + nextUsePos[reg];
 	tmp_regset = available;
 	IR_REGSET_EXCL(tmp_regset, reg);
 	IR_REGSET_FOREACH(tmp_regset, i) {
 		if (nextUsePos[i] > pos) {
 			pos = nextUsePos[i];
+		}
+		score = (scores[i] << 28) + nextUsePos[i];
+		if (score > best_score) {
 			reg = i;
+			best_score = score;
 		}
 	} IR_REGSET_FOREACH_END();

diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index 3f1d943c687..8b3f3b5c6b5 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -283,7 +283,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
 		n = ir_operands_count(ctx, insn);
 		if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) {
 			fprintf(f, "/%d", n);
-		} else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) {
+		} else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL || insn->op == IR_ASM) && n != 2) {
 			fprintf(f, "/%d", n - 2);
 		} else if (insn->op == IR_PHI && n != 3) {
 			fprintf(f, "/%d", n - 1);
@@ -321,6 +321,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
 					case IR_OPND_CONTROL:
 					case IR_OPND_CONTROL_DEP:
 					case IR_OPND_CONTROL_REF:
+					case IR_OPND_CONTROL_GUARD:
 						fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
 						first = 0;
 						break;
@@ -352,6 +353,8 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
 			} else if (opnd_kind == IR_OPND_NUM) {
 				fprintf(f, "%s%d", first ? "(" : ", ", ref);
 				first = 0;
+			} else if (opnd_kind == IR_OPND_CONTROL_GUARD) {
+				/* skip */
 			} else if (j != n &&
 					(IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) {
 				fprintf(f, "%snull", first ? "(" : ", ");
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index 921790fd92b..f2b8616e2af 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -609,6 +609,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_val

 				IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
 				n = IR_INPUT_EDGES_COUNT(flags);
+				if (insn->op == IR_DIV || insn->op == IR_MOD) {
+					/* skip data-control guard edge */
+					n--;
+				}
 				for (p = insn->ops + 1; n > 0; p++, n--) {
 					ir_ref input = *p;
 					if (input > 0) {
@@ -1419,7 +1423,7 @@ static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_r
 		if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
 			use_list = &ctx->use_lists[op2];
 		}
-		if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
+		if (op3 > 0 && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
 			use_list = &ctx->use_lists[op3];
 		}
 		if (use_list) {
@@ -1907,6 +1911,46 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
 					insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
 				}
 				insn->type = type;
+				if (IR_IS_TYPE_SIGNED(type)) {
+					ir_insn *cond = &ctx->ir_base[insn->op1];
+					if (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE) {
+						if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
+							insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MIN : IR_MAX;
+							ir_use_list_remove_one(ctx, insn->op1, ref);
+							ir_bitqueue_add(worklist, insn->op1);
+							insn->op1 = insn->op2;
+							insn->op2 = insn->op3;
+							insn->op3 = IR_UNUSED;
+						} else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
+							insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MAX : IR_MIN;
+							ir_use_list_remove_one(ctx, insn->op1, ref);
+							ir_bitqueue_add(worklist, insn->op1);
+							insn->op1 = insn->op2;
+							insn->op2 = insn->op3;
+							insn->op3 = IR_UNUSED;
+						}
+					}
+				} else {
+					IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
+					ir_insn *cond = &ctx->ir_base[insn->op1];
+					if (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE) {
+						if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
+							insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MIN : IR_MAX;
+							ir_use_list_remove_one(ctx, insn->op1, ref);
+							ir_bitqueue_add(worklist, insn->op1);
+							insn->op1 = insn->op2;
+							insn->op2 = insn->op3;
+							insn->op3 = IR_UNUSED;
+						} else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
+							insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MAX : IR_MIN;
+							ir_use_list_remove_one(ctx, insn->op1, ref);
+							ir_bitqueue_add(worklist, insn->op1);
+							insn->op1 = insn->op2;
+							insn->op2 = insn->op3;
+							insn->op3 = IR_UNUSED;
+						}
+					}
+				}
 				return ref;
 			case IR_PHI:
 				for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
@@ -1995,7 +2039,7 @@ static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop
 	const ir_ref *p;
 	ir_ref n, input, result, ctrl;

-	if (IR_IS_CONST_REF(val)) {
+	if (val <= 0) { /* constant or IR_UNUSED */
 		return 1; /* IR_START */
 	}

@@ -2129,14 +2173,14 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
 				const ir_insn *use_insn = &ctx->ir_base[use];

 				if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
-					if (use_insn->op1 == phi_ref) {
+					if (use_insn->op1 == op_ref) {
 						if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
 							return 0;
 						}
 						if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
 							continue;
 					    }
-					} else if (use_insn->op2 == phi_ref) {
+					} else if (use_insn->op2 == op_ref) {
 						if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
 							return 0;
 						}
@@ -2521,6 +2565,52 @@ static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
 		&& ctx->ir_base[ref].val.u32 == 0;
 }

+static bool ir_fix_min_max_const(ir_ctx *ctx, ir_insn *cond, ir_ref ref)
+{
+	if (cond->op == IR_ULE) {
+		/* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) =>  max(x, 4) */
+		/* (x <= 3 ? x : 4) => (x < 4 ? x : 4) =>  min(x, 4) */
+		if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+		 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+		 && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
+		 && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+			cond->op2 = ref;
+			return 1;
+		}
+	} else if (cond->op == IR_UGE) {
+		/* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) =>  min(x, 2) */
+		/* (x >= 3 ? x : 2) => (x > 2 ? x : 2) =>  max(x, 2) */
+		if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+		 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+		 && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 + 1
+		 && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+			cond->op2 = ref;
+			return 1;
+		}
+	} else if (cond->op == IR_LE) {
+		/* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) =>  max(x, 4) */
+		/* (x <= 3 ? x : 4) => (x < 4 ? x : 4) =>  min(x, 4) */
+		if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+		 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+		 && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
+		 && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+			cond->op2 = ref;
+			return 1;
+		}
+	} else if (cond->op == IR_GE) {
+		/* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) =>  min(x, 2) */
+		/* (x >= 3 ? x : 2) => (x > 2 ? x : 2) =>  max(x, 2) */
+		if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
+		 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
+		 && ctx->ir_base[cond->op2].val.i64 == ctx->ir_base[ref].val.i64 + 1
+		 && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
+			cond->op2 = ref;
+			return 1;
+		}
+	}
+	return 0;
+}
+
 static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
 {
 	IR_ASSERT(insn->inputs_count == 3);
@@ -2560,8 +2650,18 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 				}

 				if (is_cmp
-				 && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
-				   || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
+				 && ((insn->op2 == cond->op1
+				   && (insn->op3 == cond->op2
+				    || (IR_IS_CONST_REF(cond->op2)
+				     && (IR_IS_CONST_REF(insn->op3)
+					 && IR_IS_TYPE_INT(insn->type)
+				     && ir_fix_min_max_const(ctx, cond, insn->op3)))))
+				  || (insn->op3 == cond->op1
+				   && (insn->op2 == cond->op2
+				    || (IR_IS_CONST_REF(cond->op2)
+				     && (IR_IS_CONST_REF(insn->op2)
+					 && IR_IS_TYPE_INT(insn->type)
+				     && ir_fix_min_max_const(ctx, cond, insn->op2))))))) {
 					/* MAX/MIN
 					 *
 					 *    prev                     prev
@@ -2612,14 +2712,14 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re

 					next->op1 = root->op1;
 					ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
-					if (!IR_IS_CONST_REF(insn->op1)) {
-						ir_use_list_remove_one(ctx, insn->op1, cond_ref);
-					}
-					if (!IR_IS_CONST_REF(insn->op2)) {
-						ir_use_list_remove_one(ctx, insn->op2, cond_ref);
-					}

 					if (ctx->use_lists[cond_ref].count == 1) {
+						if (!IR_IS_CONST_REF(insn->op1)) {
+							ir_use_list_remove_one(ctx, insn->op1, cond_ref);
+						}
+						if (!IR_IS_CONST_REF(insn->op2)) {
+							ir_use_list_remove_one(ctx, insn->op2, cond_ref);
+						}
 						MAKE_NOP(cond);   CLEAR_USES(cond_ref);
 					} else {
 						ir_use_list_remove_one(ctx, cond_ref, root_ref);
@@ -2705,11 +2805,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					next->op1 = root->op1;
 					ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
 					ir_use_list_remove_one(ctx, insn->op1, neg_ref);
-					if (!IR_IS_CONST_REF(insn->op1)) {
-						ir_use_list_remove_one(ctx, insn->op1, cond_ref);
-					}

 					if (ctx->use_lists[cond_ref].count == 1) {
+						if (!IR_IS_CONST_REF(insn->op1)) {
+							ir_use_list_remove_one(ctx, insn->op1, cond_ref);
+						}
 						MAKE_NOP(cond);   CLEAR_USES(cond_ref);
 					} else {
 						ir_use_list_remove_one(ctx, cond_ref, root_ref);
@@ -2727,7 +2827,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					}

 					return 1;
-				} else if (insn->op2 <= cond_ref && insn->op3 <= cond_ref
+				} else if (insn->op2 <= root_ref && insn->op3 <= root_ref
 					&& cond->op != IR_OVERFLOW
 					// TODO: temporary disable IF-conversion for RLOAD.
 					// We don't track anti-dependencies in GCM and Local Scheduling.
@@ -3437,6 +3537,13 @@ static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref con
 		}
 	}

+	if (condition_insn->op == IR_SHL && IR_IS_CONST_REF(condition_insn->op1)) {
+		ir_insn *val_insn = &ctx->ir_base[condition_insn->op1];
+		if (!IR_IS_SYM_CONST(val_insn->op) && val_insn->val.u64 == 1) {
+			return IR_TRUE;
+		}
+	}
+
 	while ((condition_insn->op == IR_BITCAST
 	  || condition_insn->op == IR_ZEXT
 	  || condition_insn->op == IR_SEXT)
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 9cd41c37ffe..ca42001a881 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -1273,6 +1273,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
 	const ir_proto_t *proto;
 	const ir_call_conv_dsc *cc;
+	ir_ref next;

 	constraints->def_reg = IR_REG_NONE;
 	constraints->hints_count = 0;
@@ -1345,9 +1346,11 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 			flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG;
 op2_const:
 			insn = &ctx->ir_base[ref];
-			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
-				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
-				n++;
+			if (IR_IS_CONST_REF(insn->op2)) {
+				if (insn->op1 != insn->op2) {
+					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
+					n++;
+				}
 			} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
 				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
 				n = 1;
@@ -1712,6 +1715,10 @@ get_arg_hints:
 			break;
 		case IR_SNAPSHOT:
 			flags = 0;
+			next = ir_next_control(ctx, ref);
+			if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) {
+				flags = IR_EXTEND_INPUTS_TO_NEXT;
+			}
 			break;
 		case IR_VA_START:
 			flags = IR_OP2_MUST_BE_IN_REG;
@@ -3078,10 +3085,6 @@ store_int:
 			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
 				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
-					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-//???				 && (insn->op2 == ref - 1 ||
-//???				     (insn->op2 == ctx->prev_ref[ref] - 1
-//???				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
 						if (IR_IS_CONST_REF(op2_insn->op2)
 						 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op)
@@ -3262,6 +3265,12 @@ store_int:
 			return IR_FUSED | IR_ARGVAL;
 		case IR_NOP:
 			return IR_SKIPPED | IR_NOP;
+		case IR_ASM:
+		case IR_ASM_OUT:
+		case IR_ASM_GOTO:
+			fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n");
+			exit(1);
+			return IR_SKIPPED | IR_NOP;
 		default:
 			break;
 	}
@@ -9429,7 +9438,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 						void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);

 						|	.aword &addr
-						if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
+						if (ctx->ir_base[bb->start].op1 == def
+						 && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
 							bb->flags |= IR_BB_EMPTY;
 						}
 						continue;