Commit dd9421d8258 for php.net

commit dd9421d8258a0833258721397bce6f28631e7cf2
Author: Dmitry Stogov <dmitry@php.net>
Date:   Tue Feb 10 01:34:09 2026 +0300

    Update IR (#21183)

    IR commit: a098f9ed6c2f1c2852d6c0921283212aafb4afed

diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index 745a66b2163..3476b9bb061 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -858,7 +858,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
 static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
 {
 	ir_ref ref = ctx->prev_insn_chain[opt & IR_OPT_OP_MASK];
-	ir_insn *insn;
+	const ir_insn *insn;

 	if (ref) {
 		ir_ref limit = ctx->fold_cse_limit;
@@ -954,7 +954,8 @@ IR_ALWAYS_INLINE ir_ref _ir_fold_cast(ir_ctx *ctx, ir_ref ref, ir_type type)
  * ANY and UNUSED ops are represented by 0
  */

-ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn)
+ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
+                  const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn)
 {
 	uint8_t op;
 	ir_ref ref;
@@ -1136,9 +1137,9 @@ void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val)
 	ir_insn_set_op(insn, n, val);
 }

-ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n)
+ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
+	const ir_insn *insn = &ctx->ir_base[ref];

 #ifdef IR_DEBUG
 	if (n > 3) {
@@ -2025,7 +2026,7 @@ static ir_alias ir_check_aliasing(ir_ctx *ctx, ir_ref addr1, ir_ref addr2)

 ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2, ir_type type1, ir_type type2)
 {
-	ir_insn *insn1, *insn2;
+	const ir_insn *insn1, *insn2;
 	ir_ref base1, base2, off1, off2;

 	/* this must be already check */
@@ -2117,9 +2118,9 @@ ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2
 	return IR_MAY_ALIAS;
 }

-IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
+IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
 {
-	ir_insn *insn;
+	const ir_insn *insn;
 	uint32_t modified_regset = 0;

 	while (ref > limit) {
@@ -2159,7 +2160,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
 		} else if (insn->op == IR_RSTORE) {
 			modified_regset |= (1 << insn->op3);
 		} else if (insn->op == IR_CALL) {
-			ir_insn *func = &ctx->ir_base[insn->op2];
+			const ir_insn *func = &ctx->ir_base[insn->op2];
 			ir_ref func_proto;
 			const ir_proto_t *proto;

@@ -2186,14 +2187,14 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
 	return IR_UNUSED;
 }

-ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
+ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
 {
 	return ir_find_aliasing_load_i(ctx, ref, type, addr, (addr > 0 && addr < ref) ? addr : 1);
 }

-IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
+IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
 {
-	ir_insn *insn;
+	const ir_insn *insn;

 	while (ref > var) {
 		insn = &ctx->ir_base[ref];
@@ -2224,7 +2225,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
 				}
 			}
 		} else if (insn->op == IR_CALL) {
-			ir_insn *func = &ctx->ir_base[insn->op2];
+			const ir_insn *func = &ctx->ir_base[insn->op2];
 			ir_ref func_proto;
 			const ir_proto_t *proto;

@@ -2251,7 +2252,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
 	return IR_UNUSED;
 }

-ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
+ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
 {
 	return ir_find_aliasing_vload_i(ctx, ref, type, var);
 }
@@ -2547,12 +2548,12 @@ void _ir_BEGIN(ir_ctx *ctx, ir_ref src)
 	}
 }

-static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
+static ir_ref _ir_fold_condition(const ir_ctx *ctx, ir_ref ref)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
+	const ir_insn *insn = &ctx->ir_base[ref];

 	if (insn->op == IR_NE && IR_IS_CONST_REF(insn->op2)) {
-		ir_insn *op2_insn = &ctx->ir_base[insn->op2];
+		const ir_insn *op2_insn = &ctx->ir_base[insn->op2];

 		if (IR_IS_TYPE_INT(op2_insn->type) && op2_insn->val.u64 == 0) {
 			ref = insn->op1;
@@ -2565,7 +2566,7 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
 		ref = insn->op1;
 		insn = &ctx->ir_base[ref];
 	} else if (insn->op == IR_EQ && insn->op2 == IR_NULL) {
-		ir_insn *op1_insn = &ctx->ir_base[insn->op1];
+		const ir_insn *op1_insn = &ctx->ir_base[insn->op1];
 		if (op1_insn->op == IR_ALLOCA || op1_insn->op == IR_VADDR) {
 			return IR_FALSE;
 		}
@@ -2577,10 +2578,10 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
 	return ref;
 }

-IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
+IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(const ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
 {
-	ir_insn *prev = NULL;
-	ir_insn *insn;
+	const ir_insn *prev = NULL;
+	const ir_insn *insn;

 	while (ref > limit) {
 		insn = &ctx->ir_base[ref];
@@ -2610,7 +2611,7 @@ IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref
 	return condition;
 }

-ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition)
+ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition)
 {
 	IR_ASSERT(!IR_IS_CONST_REF(condition));
 	return ir_check_dominating_predicates_i(ctx, ref, condition, (condition < ref) ? condition : 1);
@@ -2751,7 +2752,7 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)

 		/* count inputs count */
 		do {
-			ir_insn *insn = &ctx->ir_base[ref];
+			const ir_insn *insn = &ctx->ir_base[ref];

 			IR_ASSERT(insn->op == IR_END);
 			ref = insn->op2;
@@ -2781,8 +2782,10 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)

 ir_ref _ir_PHI_LIST(ir_ctx *ctx, ir_ref list)
 {
-	ir_insn *merge, *end;
-	ir_ref phi, *ops, i;
+	const ir_insn *merge;
+	const ir_ref *ops;
+	ir_insn *end;
+	ir_ref phi, i;
 	ir_type type;

 	if (list == IR_UNUSED) {
@@ -3246,7 +3249,8 @@ ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var)
 	if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
 		ref = ir_find_aliasing_vload_i(ctx, ctx->control, type, var);
 		if (ref) {
-			ir_insn *insn = &ctx->ir_base[ref];
+			const ir_insn *insn = &ctx->ir_base[ref];
+
 			if (insn->type == type) {
 				return ref;
 			} else if (ir_type_size[insn->type] == ir_type_size[type]) {
@@ -3312,7 +3316,8 @@ ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr)
 		}
 		ref = ir_find_aliasing_load_i(ctx, ctx->control, type, addr, (addr > 0) ? addr : 1);
 		if (ref) {
-			ir_insn *insn = &ctx->ir_base[ref];
+			const ir_insn *insn = &ctx->ir_base[ref];
+
 			if (insn->type == type) {
 				return ref;
 			} else if (ir_type_size[insn->type] == ir_type_size[type]) {
diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index a274ceb5b16..b0a96b511bd 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -569,8 +569,6 @@ void ir_strtab_free(ir_strtab *strtab);
 #define IR_OPT_CFG             (1<<21) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
 #define IR_OPT_MEM2SSA         (1<<22)
 #define IR_OPT_CODEGEN         (1<<23)
-#define IR_GEN_NATIVE          (1<<24)
-#define IR_GEN_CODE            (1<<25)

 /* debug related */
 #ifdef IR_DEBUG
@@ -771,7 +769,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);

 ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
 void   ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
-ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);
+ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n);

 IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
 {
@@ -865,13 +863,13 @@ int ir_reg_alloc(ir_ctx *ctx);
 int ir_regs_number(void);
 bool ir_reg_is_int(int32_t reg);
 const char *ir_reg_name(int8_t reg, ir_type type);
-int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);
+int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref);

 /* Target CPU instruction selection and code generation (see ir_x86.c) */
 int ir_match(ir_ctx *ctx);
 void *ir_emit_code(ir_ctx *ctx, size_t *size);

-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr);
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr);
 void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr);
 void ir_fix_thunk(void *thunk_entry, void *addr);

@@ -947,13 +945,14 @@ int ir_load_llvm_asm(ir_loader *loader, const char *filename);
 #define IR_SAVE_REGS       (1<<4) /* add info about selected registers */
 #define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */

+void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f);
 void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
 void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
 void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);

 /* IR debug dump API (implementation in ir_dump.c) */
 void ir_dump(const ir_ctx *ctx, FILE *f);
-void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f);
+void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f);
 void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
 void ir_dump_cfg(ir_ctx *ctx, FILE *f);
 void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index 88996cb6f98..5a6718b77c1 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -60,7 +60,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_
 #define ADR_IMM         (1<<20)        // signed imm21
 #define ADRP_IMM        (1LL<<32)      // signed imm21 * 4096

-static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr)
+static bool aarch64_may_use_b(const ir_code_buffer *code_buffer, const void *addr)
 {
 	if (code_buffer) {
 		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
@@ -824,6 +824,34 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
 	}
 }

+static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
+{
+	ir_insn *insn = &ctx->ir_base[ref];
+
+	if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
+		ir_use_list *use_list = &ctx->use_lists[ref];
+		ir_ref n = use_list->count;
+
+		if (n > 0) {
+			ir_ref *p = ctx->use_edges + use_list->refs;
+
+			do {
+				insn = &ctx->ir_base[*p];
+				if (insn->op != IR_IF
+				 && insn->op != IR_GUARD
+				 && insn->op != IR_GUARD_NOT) {
+					return 0;
+				}
+				p++;
+				n--;
+			} while (n);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
 static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
 {
 	ir_insn *op2_insn;
@@ -1145,7 +1173,7 @@ binop_fp:
 				return IR_RETURN_FP;
 			}
 		case IR_IF:
-			if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
 				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -1168,13 +1196,13 @@ binop_fp:
 			}
 		case IR_GUARD:
 		case IR_GUARD_NOT:
-			if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
-				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
+				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
 					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-				 && (insn->op2 == ref - 1 ||
-				     (insn->op2 == ctx->prev_ref[ref] - 1
-				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
+//???				 && (insn->op2 == ref - 1 ||
+//???				     (insn->op2 == ctx->prev_ref[ref] - 1
+//???				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
 						return IR_GUARD_CMP_INT;
@@ -3084,7 +3112,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	}
 }

-static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
+static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
@@ -3093,16 +3121,12 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
 	ir_ref op1, op2;
 	ir_reg op1_reg, op2_reg;

-	if (op == IR_LT || op == IR_LE) {
-		/* swap operands to avoid P flag check */
-		op ^= 3;
-		op1 = cmp_insn->op2;
-		op2 = cmp_insn->op1;
-		op1_reg = ctx->regs[cmp_ref][2];
-		op2_reg = ctx->regs[cmp_ref][1];
+	op1 = cmp_insn->op1;
+	op2 = cmp_insn->op2;
+	if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
 	} else {
-		op1 = cmp_insn->op1;
-		op2 = cmp_insn->op2;
 		op1_reg = ctx->regs[cmp_ref][1];
 		op2_reg = ctx->regs[cmp_ref][2];
 	}
@@ -3131,7 +3155,7 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
-	ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
+	ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn);
 	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
 //???	ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag

@@ -3348,8 +3372,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
 	ir_type type = ctx->ir_base[cmp_insn->op1].type;
 	ir_ref op1 = cmp_insn->op1;
 	ir_ref op2 = cmp_insn->op2;
-	ir_reg op1_reg = ctx->regs[insn->op2][1];
-	ir_reg op2_reg = ctx->regs[insn->op2][2];
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[insn->op2][1];
+		op2_reg = ctx->regs[insn->op2][2];
+	}

 	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
@@ -3390,7 +3421,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i

 static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
 {
-	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
+	ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
 	ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
 }

@@ -3459,14 +3490,14 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 			op3_reg = op2_reg;
 		}
 	}
-	if (op3 != op2 && IR_REG_SPILLED(op3_reg)) {
+	if (IR_REG_SPILLED(op3_reg)) {
 		op3_reg = IR_REG_NUM(op3_reg);
 		ir_emit_load(ctx, type, op3_reg, op3);
-		if (op1 == op2) {
+		if (op1 == op3) {
 			op1_reg = op3_reg;
 		}
 	}
-	if (op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
+	if (IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
 		ir_emit_load(ctx, op1_type, op1_reg, op1);
 	}
@@ -5682,9 +5713,16 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
 	ir_type type = ctx->ir_base[cmp_insn->op1].type;
 	ir_ref op1 = cmp_insn->op1;
 	ir_ref op2 = cmp_insn->op2;
-	ir_reg op1_reg = ctx->regs[insn->op2][1];
-	ir_reg op2_reg = ctx->regs[insn->op2][2];
 	void *addr;
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[insn->op2][1];
+		op2_reg = ctx->regs[insn->op2][2];
+	}

 	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
@@ -5738,7 +5776,7 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *

 static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 {
-	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
+	ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
 	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);

 	if (insn->op == IR_GUARD) {
@@ -7143,7 +7181,7 @@ static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, ui
 	return n;
 }

-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
 {
 	return !aarch64_may_use_b(code_buffer, addr);
 }
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 46755067b24..bd314dcedb1 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -77,12 +77,86 @@ void ir_reset_cfg(ir_ctx *ctx)
 	}
 }

+static void ir_remove_phis_inputs(ir_ctx *ctx, ir_use_list *use_list, int new_inputs_count, ir_bitset life_inputs)
+{
+	ir_ref i, j, n, k, *p, *q, use;
+	ir_insn *use_insn;
+
+	if (new_inputs_count == 1) {
+		for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
+			use = *p;
+			use_insn = &ctx->ir_base[use];
+			if (use_insn->op == IR_PHI) {
+				/* Convert PHI to COPY */
+				n = use_insn->inputs_count;
+				i = 2;
+				for (j = 2; j <= n; j++) {
+					ir_ref input = ir_insn_op(use_insn, j);
+
+					if (ir_bitset_in(life_inputs, j - 1)) {
+						use_insn->op1 = ir_insn_op(use_insn, j);
+					} else if (input > 0) {
+						ir_use_list_remove_one(ctx, input, use);
+					}
+				}
+				use_insn->op = IR_COPY;
+				use_insn->inputs_count = 1;
+				for (j = 2; j <= n; j++) {
+					ir_insn_set_op(use_insn, j, IR_UNUSED);
+				}
+				continue;
+			}
+
+			/*compact use list */
+			if (p != q){
+				*q = use;
+			}
+			q++;
+		}
+
+		if (p != q) {
+			use_list->count -= (p - q);
+			do {
+				*q = IR_UNUSED; /* clenu-op the removed tail */
+				q++;
+			} while (p != q);
+		}
+	} else {
+		for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
+			use = *p;
+			use_insn = &ctx->ir_base[use];
+			if (use_insn->op == IR_PHI) {
+				n = use_insn->inputs_count;
+				i = 2;
+				for (j = 2; j <= n; j++) {
+					ir_ref input = ir_insn_op(use_insn, j);
+
+					if (ir_bitset_in(life_inputs, j - 1)) {
+						IR_ASSERT(input);
+						if (i != j) {
+							ir_insn_set_op(use_insn, i, input);
+						}
+						i++;
+					} else if (input > 0) {
+						ir_use_list_remove_one(ctx, input, use);
+					}
+				}
+				use_insn->inputs_count = i - 1;
+				for (j = i; j <= n; j++) {
+					ir_insn_set_op(use_insn, j, IR_UNUSED);
+				}
+			}
+		}
+	}
+}
+
 static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t *_blocks, ir_block *blocks, uint32_t bb_count)
 {
 	uint32_t b, count = 0;
 	ir_block *bb = blocks + 1;
 	ir_insn *insn;
 	ir_ref i, j, n, *ops, input;
+	ir_bitset life_inputs = NULL;

 	for (b = 1; b <= bb_count; b++, bb++) {
 		bb->successors = count;
@@ -96,12 +170,27 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
 			for (i = 1, j = 1; i <= n; i++) {
 				input = ops[i];
 				if (_blocks[input]) {
+					if (life_inputs) {
+						ir_bitset_incl(life_inputs, i);
+					}
 					if (i != j) {
 						ops[j] = ops[i];
 					}
 					j++;
-				} else if (input > 0) {
-					ir_use_list_remove_one(ctx, input, bb->start);
+				} else {
+					if (ctx->use_lists[bb->start].count > 1) {
+						/* Some inputs of this MERGE are deleted and we have to update the depended PHIs */
+						if (!life_inputs) {
+							int k;
+							life_inputs = ir_bitset_malloc(n + 1);
+							for (k = 1; k < i; k++) {
+								ir_bitset_incl(life_inputs, k);
+							}
+						}
+					}
+					if (input > 0) {
+						ir_use_list_remove_one(ctx, input, bb->start);
+					}
 				}
 			}
 			j--;
@@ -115,6 +204,10 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
 				for (;j <= n; j++) {
 					ops[j] = IR_UNUSED;
 				}
+				if (life_inputs) {
+					ir_remove_phis_inputs(ctx, &ctx->use_lists[bb->start], insn->inputs_count, life_inputs);
+					ir_mem_free(life_inputs);
+				}
 			}
 		}
 		count += bb->predecessors_count;
@@ -375,8 +468,7 @@ static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from)

 static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
 {
-	ir_ref i, j, n, k, *p, *q, use;
-	ir_insn *use_insn;
+	ir_ref i, j, n;
 	ir_use_list *use_list;
 	ir_bitset life_inputs;
 	ir_insn *insn = &ctx->ir_base[merge];
@@ -402,80 +494,14 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
 	}
 	if (i == 1) {
 		insn->op = IR_BEGIN;
-		insn->inputs_count = 1;
-		use_list = &ctx->use_lists[merge];
-		if (use_list->count > 1) {
-			n++;
-			for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
-				use = *p;
-				use_insn = &ctx->ir_base[use];
-				if (use_insn->op == IR_PHI) {
-					/* Convert PHI to COPY */
-					i = 2;
-					for (j = 2; j <= n; j++) {
-						ir_ref input = ir_insn_op(use_insn, j);
-
-						if (ir_bitset_in(life_inputs, j - 1)) {
-							use_insn->op1 = ir_insn_op(use_insn, j);
-						} else if (input > 0) {
-							ir_use_list_remove_one(ctx, input, use);
-						}
-					}
-					use_insn->op = IR_COPY;
-					use_insn->inputs_count = 1;
-					for (j = 2; j <= n; j++) {
-						ir_insn_set_op(use_insn, j, IR_UNUSED);
-					}
-					continue;
-				}
-
-				/*compact use list */
-				if (p != q){
-					*q = use;
-				}
-				q++;
-			}
-
-			if (p != q) {
-				use_list->count -= (p - q);
-				do {
-					*q = IR_UNUSED; /* clenu-op the removed tail */
-					q++;
-				} while (p != q);
-			}
-		}
-	} else {
-		insn->inputs_count = i;
+	}
+	insn->inputs_count = i;

-		use_list = &ctx->use_lists[merge];
-		if (use_list->count > 1) {
-			n++;
-			for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
-				use = *p;
-				use_insn = &ctx->ir_base[use];
-				if (use_insn->op == IR_PHI) {
-					i = 2;
-					for (j = 2; j <= n; j++) {
-						ir_ref input = ir_insn_op(use_insn, j);
-
-						if (ir_bitset_in(life_inputs, j - 1)) {
-							IR_ASSERT(input);
-							if (i != j) {
-								ir_insn_set_op(use_insn, i, input);
-							}
-							i++;
-						} else if (input > 0) {
-							ir_use_list_remove_one(ctx, input, use);
-						}
-					}
-					use_insn->inputs_count = i - 1;
-					for (j = i; j <= n; j++) {
-						ir_insn_set_op(use_insn, j, IR_UNUSED);
-					}
-				}
-			}
-		}
+	use_list = &ctx->use_lists[merge];
+	if (use_list->count > 1) {
+		ir_remove_phis_inputs(ctx, use_list, i, life_inputs);
 	}
+
 	ir_mem_free(life_inputs);
 	ir_use_list_remove_all(ctx, from, merge);
 }
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index 92962313d99..037003f021a 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -60,7 +60,7 @@ void ir_dump(const ir_ctx *ctx, FILE *f)
 	}
 }

-void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
+void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f)
 {
 	int DATA_WEIGHT    = 0;
 	int CONTROL_WEIGHT = 5;
@@ -70,6 +70,13 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
 	uint32_t flags;

 	fprintf(f, "digraph %s {\n", name);
+	fprintf(f, "\tlabelloc=t;\n");
+	fprintf(f, "\tlabel=\"");
+	ir_print_func_proto(ctx, name, 0, f);
+	if (comments) {
+		fprintf(f, " # %s", comments);
+	}
+	fprintf(f, "\"\n");
 	fprintf(f, "\trankdir=TB;\n");
 	for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) {
 		fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]);
diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c
index a6dfde77f57..1cadb099bce 100644
--- a/ext/opcache/jit/ir/ir_emit.c
+++ b/ext/opcache/jit/ir/ir_emit.c
@@ -971,7 +971,7 @@ int ir_match(ir_ctx *ctx)
 	return 1;
 }

-int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
+int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref)
 {
 	int32_t offset;

diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index bab6b291607..136bbb0e08e 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -3439,5 +3439,84 @@ IR_FOLD(COND(_, _)) // TODO: COND(_, _, _)
 	if (op2 == op3) {
 		IR_FOLD_COPY(op2);
 	}
+
+	if (op1_insn->type == IR_BOOL) {
+		if (op2 == IR_TRUE) {
+			if (op3 == IR_FALSE) {
+				/* a ? true : false => a */
+				IR_FOLD_COPY(op1);
+			} else {
+				/* a ? true : b => a | b */
+				opt = IR_OPT(IR_OR, IR_BOOL);
+				op2 = op3;
+				op3 = IR_UNUSED;
+				IR_FOLD_RESTART;
+			}
+		} else if (op3 == IR_FALSE) {
+			/* a ? b : false => a & b */
+			opt = IR_OPT(IR_AND, IR_BOOL);
+			op3 = IR_UNUSED;
+			IR_FOLD_RESTART;
+		} else if (op2 == IR_FALSE) {
+			if (op3 == IR_TRUE) {
+				/* a ? flase : true => !a */
+				opt = IR_OPT(IR_NOT, IR_BOOL);
+				op2 = IR_UNUSED;
+				op3 = IR_UNUSED;
+				IR_FOLD_RESTART;
+			}
+		} else if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))
+		 && IR_IS_CONST_REF(op2)
+		 && IR_IS_CONST_REF(op3)
+		 && op2_insn->val.u64 == 1
+		 && op3_insn->val.u64 == 0) {
+			if (ir_type_size[IR_OPT_TYPE(opt)] > 1) {
+				/* a ? 1 : 0 => ZEXT(a) */
+				opt = IR_OPT(IR_ZEXT, IR_OPT_TYPE(opt));
+			} else {
+				/* a ? 1 : 0 => BITCAST(a) */
+				opt = IR_OPT(IR_BITCAST, IR_OPT_TYPE(opt));
+			}
+			op2 = IR_UNUSED;
+			op3 = IR_UNUSED;
+			IR_FOLD_RESTART;
+	   }
+	} else if (IR_IS_TYPE_INT(op1_insn->type)) {
+		if (op2 == IR_TRUE) {
+			if (op3 == IR_FALSE) {
+				opt = IR_OPT(IR_NE, IR_BOOL);
+				val.u64 = 0;
+				op2 = ir_const(ctx, val, op1_insn->type);
+				op3 = IR_UNUSED;
+				IR_FOLD_RESTART;
+			}
+		} else if (op2 == IR_FALSE) {
+			if (op3 == IR_TRUE) {
+				opt = IR_OPT(IR_EQ, IR_BOOL);
+				val.u64 = 0;
+				op2 = ir_const(ctx, val, op1_insn->type);
+				op3 = IR_UNUSED;
+				IR_FOLD_RESTART;
+			}
+		}
+	}
+
+	if (op1_insn->op == IR_NE) {
+		if (IR_IS_CONST_REF(op1_insn->op2)
+		 && IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
+		 && ctx->ir_base[op1_insn->op2].val.u64 == 0) {
+			op1 = op1_insn->op1;
+			IR_FOLD_RESTART;
+		}
+	} else if (op1_insn->op == IR_EQ) {
+		if (IR_IS_CONST_REF(op1_insn->op2)
+		 && IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
+		 && ctx->ir_base[op1_insn->op2].val.u64 == 0) {
+			op1 = op1_insn->op1;
+			SWAP_REFS(op2, op3);
+			IR_FOLD_RESTART;
+		}
+	}
+
 	IR_FOLD_NEXT;
 }
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index 67c97611eaa..c644c188dca 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -262,7 +262,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 #endif

 	/* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
-	 *      add them into TOTALLY_USEFUL set if all of their sucessors are already there.
+	 *      add them into TOTALLY_USEFUL set if all of their successors are already there.
 	 */
 	IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
 		_push_predecessors(ctx, &ctx->cfg_blocks[i], data);
@@ -788,7 +788,7 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)

 IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
 {
-	ir_insn	*insn = &ctx->ir_base[start];
+	ir_insn *insn = &ctx->ir_base[start];
 	uint32_t n = insn->inputs_count;
 	ir_ref *p = insn->ops + 1;

@@ -924,25 +924,120 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
 	ctx->cfg_blocks = new_blocks;
 }

+#if IR_DEBUG
+static void ir_schedule_print_list(const ir_ctx *ctx, uint32_t b, const ir_ref *_next,
+                                   ir_ref start, ir_ref end, const char *label)
+{
+	ir_ref ref;
+
+	fprintf(stderr, "  %s [%d", label, start);
+	ref = _next[start];
+	while (ref != end) {
+		fprintf(stderr, ",%d", ref);
+		ref = _next[ref];
+	}
+	fprintf(stderr, ",%d]\n", ref);
+}
+#endif
+
+/* Simple Stable Topological Sort */
+static void ir_schedule_topsort(const ir_ctx *ctx, uint32_t b, const ir_block *bb,
+                                ir_ref *_xlat, ir_ref *_next, ir_ref *_prev,
+                                ir_ref ref, ir_ref end,
+                                ir_ref *insns_count, ir_ref *consts_count)
+{
+	ir_ref i = ref;
+	const ir_insn *insn;
+
+	if (bb->successors_count > 1) {
+		ir_ref input, j = bb->end;
+		ir_insn *end = &ctx->ir_base[j];
+
+		if (end->op == IR_IF) {
+			/* Move condition closer to IF */
+			input = end->op2;
+			if (input > 0
+			 && ctx->cfg_map[input] == b
+			 && !_xlat[input]
+			 && _prev[j] != input
+			 && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
+				if (input == i) {
+					i = _next[i];
+					insn = &ctx->ir_base[i];
+				}
+				/* remove "input" */
+				_prev[_next[input]] = _prev[input];
+				_next[_prev[input]] = _next[input];
+				/* insert before "j" */
+				_prev[input] = _prev[j];
+				_next[input] = j;
+				_next[_prev[j]] = input;
+				_prev[j] = input;
+			}
+		}
+	}
+
+	while (i != end) {
+		ir_ref n, j, input;
+		const ir_ref *p;
+
+restart:
+		IR_ASSERT(ctx->cfg_map[i] == b);
+		insn = &ctx->ir_base[i];
+		n = insn->inputs_count;
+		for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
+			input = *p;
+			if (!_xlat[input]) {
+				/* input is not scheduled yet */
+				if (input > 0) {
+					if (ctx->cfg_map[input] == b) {
+						/* "input" should be before "i" to satisfy dependency */
+#ifdef IR_DEBUG
+						if (ctx->flags & IR_DEBUG_SCHEDULE) {
+							fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
+						}
+#endif
+						/* remove "input" */
+						_prev[_next[input]] = _prev[input];
+						_next[_prev[input]] = _next[input];
+						/* insert before "i" */
+						_prev[input] = _prev[i];
+						_next[input] = i;
+						_next[_prev[i]] = input;
+						_prev[i] = input;
+						/* restart from "input" */
+						i = input;
+						goto restart;
+					}
+				} else if (input < IR_TRUE) {
+					*consts_count += ir_count_constant(_xlat, input);
+				}
+			}
+		}
+
+		_xlat[i] = *insns_count;
+		*insns_count += ir_insn_inputs_to_len(n);
+		IR_ASSERT(_next[i] != IR_UNUSED);
+		i = _next[i];
+	}
+}
+
 int ir_schedule(ir_ctx *ctx)
 {
-	ir_ctx new_ctx;
 	ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
 	ir_ref *_xlat;
 	ir_ref *edges;
 	ir_ref prev_b_end;
 	uint32_t b;
-	uint32_t *_blocks = ctx->cfg_map;
 	ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
 	ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
 	ir_block *bb;
-	ir_insn *insn, *new_insn;
+	ir_insn *insn, *new_insn, *base;
 	ir_use_list *lists, *use_list, *new_list;
 	bool bad_bb_order = 0;

-
 	/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
-	IR_ASSERT(_blocks[1] == 1);
+	IR_ASSERT(ctx->cfg_map[1] == 1);

 	/* link BB boundaries */
 	_prev[1] = 0;
@@ -950,30 +1045,34 @@ int ir_schedule(ir_ctx *ctx)
 	_next[1] = prev_b_end;
 	_prev[prev_b_end] = 1;
 	for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) {
-		_next[prev_b_end] = bb->start;
-		_prev[bb->start] = prev_b_end;
-		_next[bb->start] = bb->end;
-		_prev[bb->end] = bb->start;
-		prev_b_end = bb->end;
-		if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
+		ir_ref start = bb->start;
+		ir_ref end = bb->end;
+		_next[prev_b_end] = start;
+		_prev[start] = prev_b_end;
+		_next[start] = end;
+		_prev[end] = start;
+		prev_b_end = end;
+		if (!ir_is_good_bb_order(ctx, b, bb, start)) {
 			bad_bb_order = 1;
 		}
 	}
 	_next[prev_b_end] = 0;

 	/* insert intermediate BB nodes */
-	for (i = 2, j = 1; i < ctx->insns_count; i++) {
-		b = _blocks[i];
+	use_edges_count = ctx->use_lists[1].count;
+	for (i = 2, use_list = &ctx->use_lists[i]; i < ctx->insns_count; use_list++, i++) {
+		b = ctx->cfg_map[i];
 		if (!b) continue;
+		use_edges_count += use_list->count;
 		bb = &ctx->cfg_blocks[b];
 		if (i != bb->start && i != bb->end) {
 			/* insert before "end" */
-			ir_ref n = bb->end;
-			ir_ref p = _prev[n];
-			_prev[i] = p;
-			_next[i] = n;
-			_next[p] = i;
-			_prev[n] = i;
+			ir_ref next = bb->end;
+			ir_ref prev = _prev[next];
+			_prev[i] = prev;
+			_next[i] = next;
+			_next[prev] = i;
+			_prev[next] = i;
 		}
 	}

@@ -981,15 +1080,6 @@ int ir_schedule(ir_ctx *ctx)
 		ir_fix_bb_order(ctx, _prev, _next);
 	}

-#ifdef IR_DEBUG
-	if (ctx->flags & IR_DEBUG_SCHEDULE) {
-		fprintf(stderr, "Before Schedule\n");
-		for (i = 1; i != 0; i = _next[i]) {
-			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
-		}
-	}
-#endif
-
 	_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
 	_xlat += ctx->consts_count;
 	_xlat[IR_TRUE] = IR_TRUE;
@@ -999,10 +1089,17 @@ int ir_schedule(ir_ctx *ctx)
 	insns_count = 1;
 	consts_count = -(IR_TRUE - 1);

-	/* Topological sort according dependencies inside each basic block */
+	/* Schedule instructions inside each BB (now just topological sort according to dependencies) */
 	for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
 		ir_ref start;

+#ifdef IR_DEBUG
+		if (ctx->flags & IR_DEBUG_SCHEDULE) {
+			fprintf(stderr, "BB%d\n", b);
+			ir_schedule_print_list(ctx, b, _next, bb->start, bb->end, "INITIAL");
+		}
+#endif
+
 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
 		/* Schedule BB start */
 		start = i = bb->start;
@@ -1062,8 +1159,8 @@ int ir_schedule(ir_ctx *ctx)
 				for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) {
 					ir_ref use = *p;
 					ir_insn *use_insn = &ctx->ir_base[use];
-					if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
-						IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
+					if (!_xlat[use] && ctx->cfg_map[use]) {
+						IR_ASSERT(ctx->cfg_map[use] == b);
 						if (use_insn->op == IR_PARAM
 						 || use_insn->op == IR_VAR
 						 || use_insn->op == IR_PI
@@ -1100,76 +1197,20 @@ int ir_schedule(ir_ctx *ctx)
 				insn = &ctx->ir_base[i];
 			}
 		}
-		if (bb->successors_count > 1) {
-			ir_ref input, j = bb->end;
-			ir_insn *end = &ctx->ir_base[j];
-
-			if (end->op == IR_IF) {
-				/* Move condition closer to IF */
-				input = end->op2;
-				if (input > 0
-				 && _blocks[input] == b
-				 && !_xlat[input]
-				 && _prev[j] != input
-				 && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
-					if (input == i) {
-						i = _next[i];
-						insn = &ctx->ir_base[i];
-					}
-					/* remove "input" */
-					_prev[_next[input]] = _prev[input];
-					_next[_prev[input]] = _next[input];
-					/* insert before "j" */
-					_prev[input] = _prev[j];
-					_next[input] = j;
-					_next[_prev[j]] = input;
-					_prev[j] = input;
-				}
-			}
+
+		if (i != bb->end) {
+			ir_schedule_topsort(ctx, b, bb, _xlat, _next, _prev, i, bb->end, &insns_count, &consts_count);
 		}
-		while (i != bb->end) {
-			ir_ref n, j, *p, input;

-restart:
-			IR_ASSERT(_blocks[i] == b);
-			n = insn->inputs_count;
-			for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
-				input = *p;
-				if (!_xlat[input]) {
-					/* input is not scheduled yet */
-					if (input > 0) {
-						if (_blocks[input] == b) {
-							/* "input" should be before "i" to satisfy dependency */
 #ifdef IR_DEBUG
-							if (ctx->flags & IR_DEBUG_SCHEDULE) {
-								fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
-							}
-#endif
-							/* remove "input" */
-							_prev[_next[input]] = _prev[input];
-							_next[_prev[input]] = _next[input];
-							/* insert before "i" */
-							_prev[input] = _prev[i];
-							_next[input] = i;
-							_next[_prev[i]] = input;
-							_prev[i] = input;
-							/* restart from "input" */
-							i = input;
-							insn = &ctx->ir_base[i];
-							goto restart;
-						}
-					} else if (input < IR_TRUE) {
-						consts_count += ir_count_constant(_xlat, input);
-					}
-				}
-			}
-			_xlat[i] = insns_count;
-			insns_count += ir_insn_inputs_to_len(n);
-			IR_ASSERT(_next[i] != IR_UNUSED);
-			i = _next[i];
-			insn = &ctx->ir_base[i];
+		if (ctx->flags & IR_DEBUG_SCHEDULE) {
+			ir_schedule_print_list(ctx, b, _next, start, bb->end, "  FINAL");
 		}
+#endif
+
 		/* Schedule BB end */
+		i = bb->end;
+		insn = &ctx->ir_base[i];
 		_xlat[i] = bb->end = insns_count;
 		insns_count++;
 		if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
@@ -1179,15 +1220,6 @@ int ir_schedule(ir_ctx *ctx)
 		}
 	}

-#ifdef IR_DEBUG
-	if (ctx->flags & IR_DEBUG_SCHEDULE) {
-		fprintf(stderr, "After Schedule\n");
-		for (i = 1; i != 0; i = _next[i]) {
-			fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
-		}
-	}
-#endif
-
 #if 1
 	/* Check if scheduling didn't make any modifications */
 	if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
@@ -1215,113 +1247,55 @@ int ir_schedule(ir_ctx *ctx)

 	ir_mem_free(_prev);

-	ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
-	new_ctx.insns_count = insns_count;
-	new_ctx.flags2 = ctx->flags2;
-	new_ctx.ret_type = ctx->ret_type;
-	new_ctx.value_params = ctx->value_params;
-	new_ctx.mflags = ctx->mflags;
-	new_ctx.spill_base = ctx->spill_base;
-	new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
-	new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
-	new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
-	new_ctx.fixed_regset = ctx->fixed_regset;
-	new_ctx.fixed_save_regset = ctx->fixed_save_regset;
-	new_ctx.entries_count = ctx->entries_count;
-#if defined(IR_TARGET_AARCH64)
-	new_ctx.deoptimization_exits = ctx->deoptimization_exits;
-	new_ctx.get_exit_addr = ctx->get_exit_addr;
-	new_ctx.get_veneer = ctx->get_veneer;
-	new_ctx.set_veneer = ctx->set_veneer;
-#endif
-	new_ctx.loader = ctx->loader;
+	uint32_t *map = ir_mem_calloc(insns_count, sizeof(uint32_t));
+	_prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
+	lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
+	ir_ref *use_edges = edges = ir_mem_malloc(use_edges_count * sizeof(ir_ref));
+	base = ir_mem_malloc((consts_count + insns_count) * sizeof(ir_insn));
+	base += consts_count;

 	/* Copy constants */
-	if (consts_count == ctx->consts_count) {
-		new_ctx.consts_count = consts_count;
-		ref = 1 - consts_count;
-		insn = &ctx->ir_base[ref];
-		new_insn = &new_ctx.ir_base[ref];
-
-		memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
-		if (ctx->strtab.data) {
-			while (ref != IR_TRUE) {
-				if (new_insn->op == IR_FUNC_ADDR) {
-					if (new_insn->proto) {
-						size_t len;
-						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
-						new_insn->proto = ir_strl(&new_ctx, proto, len);
-					}
-				} else if (new_insn->op == IR_FUNC) {
-					size_t len;
-					const char *name = ir_get_strl(ctx, new_insn->val.name, &len);
-					new_insn->val.u64 = ir_strl(&new_ctx, name, len);
-					if (new_insn->proto) {
-						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
-						new_insn->proto = ir_strl(&new_ctx, proto, len);
-					}
-				} else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) {
-					size_t len;
-					const char *str = ir_get_strl(ctx, new_insn->val.name, &len);
-					new_insn->val.u64 = ir_strl(&new_ctx, str, len);
-				}
-				new_insn++;
-				ref++;
-			}
+	if (ctx->consts_count == consts_count) {
+		memcpy(base - consts_count + 1, ctx->ir_base - consts_count + 1, sizeof(ir_insn) * consts_count);
+		for (j = -consts_count + 1; j < IR_TRUE; j++) {
+			_xlat[j] = j;
 		}
 	} else {
-		new_ref = -new_ctx.consts_count;
-		new_insn = &new_ctx.ir_base[new_ref];
-		for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
-			if (!_xlat[ref]) {
-				continue;
+		ir_insn *src = ctx->ir_base - ctx->consts_count + 1;
+		ir_insn *dst = base - consts_count + 1;
+
+		i = -ctx->consts_count + 1;
+		j = -consts_count + 1;
+		while (i < IR_TRUE) {
+			if (_xlat[i]) {
+				*dst = *src;
+				dst->prev_const = 0;
+				_xlat[i] = j;
+				dst++;
+				j++;
 			}
-			new_insn->optx = insn->optx;
-			new_insn->prev_const = 0;
-			if (insn->op == IR_FUNC_ADDR) {
-				new_insn->val.u64 = insn->val.u64;
-				if (insn->proto) {
-					size_t len;
-					const char *proto = ir_get_strl(ctx, insn->proto, &len);
-					new_insn->proto = ir_strl(&new_ctx, proto, len);
-				} else {
-					new_insn->proto = 0;
-				}
-			} else if (insn->op == IR_FUNC) {
-				size_t len;
-				const char *name = ir_get_strl(ctx, insn->val.name, &len);
-				new_insn->val.u64 = ir_strl(&new_ctx, name, len);
-				if (insn->proto) {
-					const char *proto = ir_get_strl(ctx, insn->proto, &len);
-					new_insn->proto = ir_strl(&new_ctx, proto, len);
-				} else {
-					new_insn->proto = 0;
-				}
-			} else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) {
-				size_t len;
-				const char *str = ir_get_strl(ctx, insn->val.name, &len);
-				new_insn->val.u64 = ir_strl(&new_ctx, str, len);
-			} else {
-				new_insn->val.u64 = insn->val.u64;
-			}
-			_xlat[ref] = new_ref;
-			new_ref--;
-			new_insn--;
+			src++;
+			i++;
 		}
-		new_ctx.consts_count = -new_ref;
+		IR_ASSERT(j == IR_TRUE);
+		base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
+		base[IR_TRUE].val.u64 = 1;
+		base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
+		base[IR_FALSE].val.u64 = 0;
+		base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR);
+		base[IR_NULL].val.u64 = 0;
+		MAKE_NOP(&base[IR_UNUSED]);
 	}

-	new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
-	new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
-	new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
-	new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
-
 	/* Copy instructions, use lists and use edges */
+#ifdef IR_DEBUG
+	ir_ref orig_use_edges_count = use_edges_count;
+#endif
 	prev_ref = 0;
 	use_edges_count = 0;
 	for (i = 1; i != 0; i = _next[i]) {
 		new_ref = _xlat[i];
-		new_ctx.cfg_map[new_ref] = _blocks[i];
+		map[new_ref] = ctx->cfg_map[i];
 		_prev[new_ref] = prev_ref;
 		prev_ref = new_ref;

@@ -1330,7 +1304,7 @@ int ir_schedule(ir_ctx *ctx)
 		k = 0;
 		if (n == 1) {
 			ref = ctx->use_edges[use_list->refs];
-			if (_xlat[ref]) {
+			if (EXPECTED(_xlat[ref])) {
 				*edges = _xlat[ref];
 				edges++;
 				k = 1;
@@ -1339,7 +1313,7 @@ int ir_schedule(ir_ctx *ctx)
 			p = &ctx->use_edges[use_list->refs];
 			while (n--) {
 				ref = *p;
-				if (_xlat[ref]) {
+				if (EXPECTED(_xlat[ref])) {
 					*edges = _xlat[ref];
 					edges++;
 					k++;
@@ -1353,7 +1327,7 @@ int ir_schedule(ir_ctx *ctx)
 		new_list->count = k;

 		insn = &ctx->ir_base[i];
-		new_insn = &new_ctx.ir_base[new_ref];
+		new_insn = &base[new_ref];

 		new_insn->optx = insn->optx;
 		n = new_insn->inputs_count;
@@ -1365,11 +1339,7 @@ int ir_schedule(ir_ctx *ctx)
 				break;
 			case 1:
 				new_insn->op1 = _xlat[insn->op1];
-				if (new_insn->op == IR_PARAM || new_insn->op == IR_VAR || new_insn->op == IR_PROTO) {
-					size_t len;
-					const char *str = ir_get_strl(ctx, insn->op2, &len);
-					new_insn->op2 = ir_strl(&new_ctx, str, len);
-				} else if (new_insn->op == IR_BEGIN && insn->op2) {
+				if (new_insn->op == IR_BEGIN && insn->op2) {
 					new_insn->op2 = _xlat[insn->op2];
 				} else {
 					new_insn->op2 = insn->op2;
@@ -1428,12 +1398,12 @@ int ir_schedule(ir_ctx *ctx)
 	}

 	/* Update list of terminators (IR_OPND_CONTROL_REF) */
-	insn = &new_ctx.ir_base[1];
+	insn = &base[1];
 	ref = insn->op1;
 	if (ref) {
 		insn->op1 = ref = _xlat[ref];
 		while (1) {
-			insn = &new_ctx.ir_base[ref];
+			insn = &base[ref];
 			ref = insn->op3;
 			if (!ref) {
 				break;
@@ -1442,36 +1412,33 @@ int ir_schedule(ir_ctx *ctx)
 		}
 	}

-	IR_ASSERT(ctx->use_edges_count >= use_edges_count);
-	new_ctx.use_edges_count = use_edges_count;
-	new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
-
 	if (ctx->binding) {
 		ir_xlat_binding(ctx, _xlat);
-		new_ctx.binding = ctx->binding;
-		ctx->binding = NULL;
 	}

 	_xlat -= ctx->consts_count;
 	ir_mem_free(_xlat);
+	ir_mem_free(_next);

-	new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
-	new_ctx.cfg_edges_count = ctx->cfg_edges_count;
-	new_ctx.cfg_blocks = ctx->cfg_blocks;
-	new_ctx.cfg_edges = ctx->cfg_edges;
-	ctx->cfg_blocks = NULL;
-	ctx->cfg_edges = NULL;
-	ctx->value_params = NULL;
-	ir_code_buffer *saved_code_buffer = ctx->code_buffer;
-
-	ir_free(ctx);
-	IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
-	IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
-	memcpy(ctx, &new_ctx, sizeof(ir_ctx));
-	ctx->code_buffer = saved_code_buffer;
-	ctx->flags2 |= IR_LINEAR;
+	/* Switch to new IR buffer */
+	ir_mem_free(ctx->ir_base - ctx->consts_limit);
+	ctx->ir_base = base;
+	ctx->insns_count = ctx->insns_limit = insns_count;
+	ctx->consts_count = ctx->consts_limit = consts_count;

-	ir_mem_free(_next);
+	ir_mem_free(ctx->use_lists);
+	ir_mem_free(ctx->use_edges);
+	IR_ASSERT(orig_use_edges_count >= use_edges_count);
+	ctx->use_lists = lists;
+	ctx->use_edges = use_edges;
+	ctx->use_edges_count = use_edges_count;
+
+	ir_mem_free(ctx->cfg_map);
+	ctx->cfg_map = map;
+
+	ctx->prev_ref = _prev;
+
+	ctx->flags2 |= IR_LINEAR;

 	return 1;
 }
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index acd7e41a3e9..115c5121d75 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -908,7 +908,7 @@ IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
 	return 0;
 }

-IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
+IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
 {
 	if (ref == IR_TRUE) {
 		return 1;
@@ -1096,6 +1096,7 @@ void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref);
 void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val);

 /*** Iterative Optimization ***/
+void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist);
 void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist);
 void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist);
 void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist);
@@ -1179,16 +1180,17 @@ typedef enum _ir_fold_action {
 	IR_FOLD_DO_CONST
 } ir_fold_action;

-ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn);
+ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
+                  const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn);

 /*** Alias Analyzes (see ir.c) ***/
-ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
-ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
+ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
+ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
 ir_ref ir_find_aliasing_store(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);
 ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);

 /*** Predicates (see ir.c) ***/
-ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition);
+ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition);

 /*** IR Live Info ***/
 typedef ir_ref                   ir_live_pos;
@@ -1468,9 +1470,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 void ir_fix_stack_frame(ir_ctx *ctx);

 /* Utility */
-ir_type ir_get_return_type(ir_ctx *ctx);
 const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn);
-void ir_print_call_conv(uint32_t flags, FILE *f);

 //#define IR_BITSET_LIVENESS

diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index 23f44482cb8..4a893410d49 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -3761,14 +3761,13 @@ static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t
 {
 	char key[10];

-	IR_ASSERT(reg != IR_REG_NONE);
 	if (!ctx->fused_regs) {
 		ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
 		ir_strtab_init(ctx->fused_regs, 8, 128);
 	}
 	memcpy(key, &root, sizeof(ir_ref));
 	memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
-	ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
+	ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | (uint8_t)reg);
 }

 static void assign_regs(ir_ctx *ctx)
@@ -3874,93 +3873,88 @@ static void assign_regs(ir_ctx *ctx)
 										}
 										prev_use_ref = ref;
 									}
-								} else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
-								 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
-									if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
-									 && use_pos->hint != reg
-//									 && ctx->ir_base[ref].op != IR_CALL
-//									 && ctx->ir_base[ref].op != IR_TAILCALL) {
-									 && ctx->ir_base[ref].op != IR_SNAPSHOT
-									 && !needs_spill_load(ctx, ival, use_pos)) {
-										/* fuse spill load (valid only when register is not reused) */
-										reg = IR_REG_NONE;
-										if (use_pos->next
-										 && use_pos->op_num == 1
-										 && use_pos->next->pos == use_pos->pos
-										 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
-											/* Support for R2 = BINOP(R1, R1) */
-											if (use_pos->hint_ref < 0) {
-												ref = -use_pos->hint_ref;
+								} else {
+									if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
+									 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
+										if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
+										 && use_pos->hint != reg
+//										 && ctx->ir_base[ref].op != IR_CALL
+//										 && ctx->ir_base[ref].op != IR_TAILCALL) {
+										 && ctx->ir_base[ref].op != IR_SNAPSHOT
+										 && !needs_spill_load(ctx, ival, use_pos)) {
+											/* fuse spill load (valid only when register is not reused) */
+											reg = IR_REG_NONE;
+											if (use_pos->next
+											 && use_pos->op_num == 1
+											 && use_pos->next->pos == use_pos->pos
+											 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
+												/* Support for R2 = BINOP(R1, R1) */
+												if (use_pos->hint_ref < 0) {
+													ref = -use_pos->hint_ref;
+												}
+												ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
+												use_pos = use_pos->next;
 											}
-											ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
-											use_pos = use_pos->next;
-										}
-									} else {
-										if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
-											reg |= IR_REG_SPILL_SPECIAL;
 										} else {
-											reg |= IR_REG_SPILL_LOAD;
-										}
-										if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
-											uint32_t use_b = ctx->cfg_map[ref];
+											if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
+												reg |= IR_REG_SPILL_SPECIAL;
+											} else {
+												reg |= IR_REG_SPILL_LOAD;
+											}
+											if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
+												uint32_t use_b = ctx->cfg_map[ref];

-											if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
-												ir_bitset_incl(available, use_b);
+												if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
+													ir_bitset_incl(available, use_b);
+												}
+												prev_use_ref = ref;
 											}
-											prev_use_ref = ref;
 										}
+									} else {
+										/* reuse register without spill load */
 									}
-									if (use_pos->hint_ref < 0
-									 && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
-										if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
-											reg |= IR_REG_SPILL_SPECIAL;
+
+									if (use_pos->hint_ref < 0) {
+										if (use_pos->flags & IR_PHI_USE) {
+											IR_ASSERT(use_pos->hint_ref < 0);
+											IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
+											IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
+											if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
+												/* Spilled PHI var is passed through memory */
+												reg = IR_REG_NONE;
+											}
 										} else {
-											reg |= IR_REG_SPILL_LOAD;
-										}
-										if (reg != old_reg) {
 											IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
-											ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
-											ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
-											use_pos = use_pos->next;
-											continue;
+											old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num);
+											if ((old_reg != IR_REG_NONE && reg != old_reg) || reg == IR_REG_NONE) {
+												ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
+												ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
+												use_pos = use_pos->next;
+												continue;
+											}
 										}
+										ref = -use_pos->hint_ref;
 									}
-								} else if (use_pos->flags & IR_PHI_USE) {
-									IR_ASSERT(use_pos->hint_ref < 0);
-									IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
-									IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
-									if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
-										/* Spilled PHI var is passed through memory */
-										reg = IR_REG_NONE;
-									}
-								} else if (use_pos->hint_ref < 0
-										&& (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
-									if (reg != old_reg) {
-										IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
-										ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
-										ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
-										use_pos = use_pos->next;
-										continue;
-									}
-								} else {
-									/* reuse register without spill load */
-								}
-								if (use_pos->hint_ref < 0) {
-									ref = -use_pos->hint_ref;
 								}
+
 								ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);

 								use_pos = use_pos->next;
 							}
-						} else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
+						} else {
 							use_pos = ival->use_pos;
 							while (use_pos) {
 								ref = IR_LIVE_POS_TO_REF(use_pos->pos);
-								if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
+								if (ctx->ir_base[ref].op == IR_SNAPSHOT
+								 && !(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
 									IR_ASSERT(use_pos->hint_ref >= 0);
 									/* A reference to a CPU spill slot */
 									reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
 									ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
+								} else if (use_pos->hint_ref < 0 && !(use_pos->flags & IR_PHI_USE)) {
+									IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
+									ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
+									ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, IR_REG_NONE);
 								}
 								use_pos = use_pos->next;
 							}
diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index 51d7f96e518..3f1d943c687 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -18,7 +18,7 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f)
 	}
 }

-void ir_print_call_conv(uint32_t flags, FILE *f)
+static void ir_print_call_conv(uint32_t flags, FILE *f)
 {
 	switch (flags & IR_CALL_CONV_MASK) {
 		case IR_CC_BUILTIN:
@@ -75,6 +75,38 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c
 	}
 }

+void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f)
+{
+	if (ctx->flags & IR_STATIC) {
+		fprintf(f, "static ");
+	}
+	fprintf(f, "func %s%s(",
+		prefix ? "@" : "",
+		name);
+	if (ctx->ir_base[2].op == IR_PARAM) {
+		ir_insn *insn = &ctx->ir_base[2];
+
+		fprintf(f, "%s", ir_type_cname[insn->type]);
+		insn++;
+		while (insn->op == IR_PARAM) {
+			fprintf(f, ", %s", ir_type_cname[insn->type]);
+			insn++;;
+		}
+		if (ctx->flags & IR_VARARG_FUNC) {
+			fprintf(f, ", ...");
+		}
+	} else if (ctx->flags & IR_VARARG_FUNC) {
+		fprintf(f, "...");
+	}
+	fprintf(f, "): %s", ir_type_cname[ctx->ret_type != (ir_type)-1 ? ctx->ret_type : IR_VOID]);
+	ir_print_call_conv(ctx->flags, f);
+	if (ctx->flags & IR_CONST_FUNC) {
+		fprintf(f, " __const");
+	} else if (ctx->flags & IR_PURE_FUNC) {
+		fprintf(f, " __pure");
+	}
+}
+
 static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
 {
 	uint32_t succ;
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index e2f38a058ae..bfec32b568f 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -19,7 +19,6 @@
 #define IR_TOP                  IR_UNUSED
 #define IR_BOTTOM               IR_LAST_OP

-#define IR_MAKE_TOP(ref)        do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
 #define IR_MAKE_BOTTOM(ref)     do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)

 #define IR_IS_TOP(ref)          (ref >= 0 && _values[ref].op == IR_TOP)
@@ -27,17 +26,57 @@
 #define IR_IS_REACHABLE(ref)    _ir_is_reachable_ctrl(ctx, _values, ref)
 #define IR_IS_CONST(ref)        (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))

-IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
+typedef struct {
+	union {
+		struct {
+			IR_STRUCT_LOHI(
+				union {
+					IR_STRUCT_LOHI(
+						union {
+							IR_STRUCT_LOHI(
+								uint8_t        op,   /* [IR_TOP - unreachable, IR_BOTTOM - reachable} for control */
+								                     /* {IR_TOP | IR_COPY() | IR_CONST() | IR_BOTTOM} for data */
+								                     /* {IR_TOP | IR_MERGE() | IR_BOTTOM} for IR_MERGE */
+								                     /* {IR_TOP | IR_IF() | IR_BOTTOM} for IR_IF and IR_SWITCH */
+								uint8_t        type
+							);
+							uint16_t           opt;
+						},
+						uint16_t               _space_1
+					);
+					uint32_t                   optx;
+				},
+				union {
+					ir_ref                     copy;              /* identity for IR_COPY */
+					ir_ref                     unfeasible_inputs; /* number of unfeasible inputs for IR_MERGE */
+					ir_ref                     single_output;     /* reachable output for IR_IF */
+					ir_ref                     visited;           /* for IR_TOP */
+				}
+			);
+			union {
+				struct {
+					ir_ref                     next; /* double-linked identities list for IR_COPY */
+					ir_ref                     prev; /* double-linked identities list for IR_COPY */
+				};
+				ir_val                         val;  /* constant value for IR_CONST */
+			};
+		};
+		ir_insn                                insn; /* constant insn for IR_CONST */
+	};
+} ir_sccp_val;
+
+IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref)
 {
 	IR_ASSERT(!IR_IS_CONST_REF(ref));
 	IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
 	return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
 }

-IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_add_uses(const ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
 {
-	ir_use_list *use_list;
-	ir_ref n, *p, use;
+	const ir_use_list *use_list;
+	const ir_ref *p;
+	ir_ref n, use;

 	IR_ASSERT(!IR_IS_CONST_REF(ref));
 	use_list = &ctx->use_lists[ref];
@@ -50,23 +89,23 @@ IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueu
 	}
 }

-IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_add_input(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
 {
 	IR_ASSERT(!IR_IS_CONST_REF(ref));
 	IR_ASSERT(_values[ref].op == IR_TOP);
 	/* do backward propagaton only once */
-	if (!_values[ref].op1) {
-		_values[ref].op1 = 1;
+	if (!_values[ref].visited) {
+		_values[ref].visited = 1;
 		ir_bitqueue_add(worklist, ref);
 	}
 }

 #if IR_COMBO_COPY_PROPAGATION
-IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
+IR_ALWAYS_INLINE ir_ref ir_sccp_identity(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
 {
 	if (a > 0 && _values[a].op == IR_COPY) {
 		do {
-			a = _values[a].op1;
+			a = _values[a].copy;
 			IR_ASSERT(a > 0);
 		} while (_values[a].op == IR_COPY);
 		IR_ASSERT(_values[a].op == IR_BOTTOM);
@@ -75,7 +114,7 @@ IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a
 }

 #if 0
-static void CHECK_LIST(ir_insn *_values, ir_ref ref)
+static void CHECK_LIST(ir_sccp_val *_values, ir_ref ref)
 {
 	ir_ref member = _values[ref].op2;
 	while (member != ref) {
@@ -88,44 +127,44 @@ static void CHECK_LIST(ir_insn *_values, ir_ref ref)
 # define CHECK_LIST(_values, ref)
 #endif

-static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
+static void ir_sccp_add_identity(const ir_ctx *ctx, ir_sccp_val *_values, ir_ref src, ir_ref dst)
 {
 	IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
 	IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
 	IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);

 	_values[dst].optx = IR_COPY;
-	_values[dst].op1 = src;
+	_values[dst].copy = src;

 	if (_values[src].op == IR_BOTTOM) {
 		/* initialize empty double-linked list */
-		if (_values[src].op1 != src) {
-			_values[src].op1 = src;
-			_values[src].op2 = src;
-			_values[src].op3 = src;
+		if (_values[src].copy != src) {
+			_values[src].copy = src;
+			_values[src].next = src;
+			_values[src].prev = src;
 		}
 	} else {
 		src = ir_sccp_identity(ctx, _values, src);
 	}

 	/* insert into circular double-linked list */
-	ir_ref prev = _values[src].op3;
-	_values[dst].op2 = src;
-	_values[dst].op3 = prev;
-	_values[src].op3 = dst;
-	_values[prev].op2 = dst;
+	ir_ref prev = _values[src].prev;
+	_values[dst].next = src;
+	_values[dst].prev = prev;
+	_values[src].prev = dst;
+	_values[prev].next = dst;
 	CHECK_LIST(_values, dst);
 }

-static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+static void ir_sccp_split_partition(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
 {
 	ir_ref member, head, tail, next, prev;

 	CHECK_LIST(_values, ref);
 	IR_MAKE_BOTTOM(ref);
-	_values[ref].op1 = ref;
+	_values[ref].copy = ref;

-	member = _values[ref].op2;
+	member = _values[ref].next;
 	head = tail = IR_UNUSED;
 	while (member != ref) {
 		if (_values[member].op != IR_BOTTOM) {
@@ -133,19 +172,19 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
 		}
 		ir_sccp_add_uses(ctx, _values, worklist, member);

-		next = _values[member].op2;
+		next = _values[member].next;
 		if (ir_sccp_identity(ctx, _values, member) == ref) {
 			/* remove "member" from the old circular double-linked list */
-			prev = _values[member].op3;
-			_values[prev].op2 = next;
-			_values[next].op3 = prev;
+			prev = _values[member].prev;
+			_values[prev].next = next;
+			_values[next].prev = prev;

 			/* insert "member" into the new double-linked list */
 			if (!head) {
 				head = tail = member;
 			} else {
-				_values[tail].op2 = member;
-				_values[member].op3 = tail;
+				_values[tail].next = member;
+				_values[member].prev = tail;
 				tail = member;
 			}
 		}
@@ -153,26 +192,26 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
 	}

 	/* remove "ref" from the old circular double-linked list */
-	next = _values[ref].op2;
-	prev = _values[ref].op3;
-	_values[prev].op2 = next;
-	_values[next].op3 = prev;
+	next = _values[ref].next;
+	prev = _values[ref].prev;
+	_values[prev].next = next;
+	_values[next].prev = prev;
 	CHECK_LIST(_values, next);

 	/* close the new circle */
 	if (head) {
-		_values[ref].op2 = head;
-		_values[ref].op3 = tail;
-		_values[tail].op2 = ref;
-		_values[head].op3 = ref;
+		_values[ref].next = head;
+		_values[ref].prev = tail;
+		_values[tail].next = ref;
+		_values[head].prev = ref;
 	} else {
-		_values[ref].op2 = ref;
-		_values[ref].op3 = ref;
+		_values[ref].next = ref;
+		_values[ref].prev = ref;
 	}
 	CHECK_LIST(_values, ref);
 }

-IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
+IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
 {
 	if (_values[ref].op == IR_COPY) {
 		ir_sccp_split_partition(ctx, _values, worklist, ref);
@@ -187,7 +226,7 @@ IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_b
 # define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
 #endif

-IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
+IR_ALWAYS_INLINE bool ir_sccp_meet_const(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *val_insn)
 {
 	IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));

@@ -207,46 +246,51 @@ IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqu
 	return 1;
 }

-IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
+IR_ALWAYS_INLINE bool ir_sccp_meet_copy(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
+{
+#if IR_COMBO_COPY_PROPAGATION
+	if (_values[ref].op == IR_COPY) {
+		/* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
+		 *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
+		if (ir_sccp_identity(ctx, _values, ref) == ir_sccp_identity(ctx, _values, val)) {
+			return 0; /* not changed */
+		}
+		ir_sccp_split_partition(ctx, _values, worklist, ref);
+		return 1;
+	} else {
+		IR_ASSERT(_values[ref].op != IR_BOTTOM);
+		/* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
+		/* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
+		ir_sccp_add_identity(ctx, _values, val, ref);
+		return 1;
+	}
+#endif
+	IR_MAKE_BOTTOM(ref);
+	return 1;
+}
+
+#if 0
+IR_ALWAYS_INLINE bool ir_sccp_meet(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
 {
-	ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
-	ir_insn *val_insn;
+	const ir_insn *val_insn;

-	if (IR_IS_CONST_REF(val_identity)) {
-		val_insn = &ctx->ir_base[val_identity];
+	if (IR_IS_CONST_REF(val)) {
+		val_insn = &ctx->ir_base[val];
 	} else {
-		val_insn = &_values[val_identity];
+		val_insn = &_values[val].insn;

 		if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
-#if IR_COMBO_COPY_PROPAGATION
-			if (_values[ref].op == IR_COPY) {
-				/* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
-				 *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
-				if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
-					return 0; /* not changed */
-				}
-				ir_sccp_split_partition(ctx, _values, worklist, ref);
-				return 1;
-			} else {
-				IR_ASSERT(_values[ref].op != IR_BOTTOM);
-				/* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
-				/* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
-				ir_sccp_add_identity(ctx, _values, val, ref);
-				return 1;
-			}
-#endif
-
-			IR_MAKE_BOTTOM(ref);
-			return 1;
+			return ir_sccp_meet_copy(ctx, _values, worklist, ref, val);
 		}
 	}

 	return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
 }
+#endif

-static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
+static ir_ref ir_sccp_fold(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *insn)
 {
-	ir_insn *op1_insn, *op2_insn, *op3_insn;
+	const ir_insn *op1_insn, *op2_insn, *op3_insn;
 	ir_ref op1, op2, op3, copy;
 	uint32_t opt = insn->opt;

@@ -255,11 +299,11 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist,
 	op3 = ir_sccp_identity(ctx, _values, insn->op3);

 restart:
-	op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
-	op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
-	op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
+	op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? &_values[op1].insn : ctx->ir_base + op1;
+	op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? &_values[op2].insn : ctx->ir_base + op2;
+	op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? &_values[op3].insn : ctx->ir_base + op3;

-	switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
+	switch (ir_folding((ir_ctx*)ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
 		case IR_FOLD_DO_RESTART:
 			opt = ctx->fold_insn.optx;
 			op1 = ctx->fold_insn.op1;
@@ -272,19 +316,30 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist,
 			return 1;
 		case IR_FOLD_DO_COPY:
 			copy = ctx->fold_insn.op1;
-			return ir_sccp_meet(ctx, _values, worklist, ref, copy);
+			if (IR_IS_CONST_REF(copy)) {
+				insn = &ctx->ir_base[copy];
+			} else {
+				insn = &_values[copy].insn;
+				if (!IR_IS_CONST_OP(insn->op) && !IR_IS_SYM_CONST(insn->op)) {
+					return ir_sccp_meet_copy(ctx, _values, worklist, ref, copy);
+				}
+		    }
+			goto meet_const;
 		case IR_FOLD_DO_CONST:
-			return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
+			insn = &ctx->fold_insn;
+meet_const:
+			return ir_sccp_meet_const(ctx, _values, worklist, ref, insn);
 		default:
 			IR_ASSERT(0);
 			return 0;
 	}
 }

-static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
+static bool ir_sccp_analyze_phi(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref i, const ir_insn *insn)
 {
-	ir_ref j, n, input, *merge_input, *p;
-	ir_insn *v, *new_const = NULL;
+	ir_ref j, n, input;
+	const ir_ref *merge_input, *p;
+	const ir_insn *v, *new_const = NULL;
 #if IR_COMBO_COPY_PROPAGATION
 	ir_ref new_copy = IR_UNUSED;
 	ir_ref new_copy_identity = IR_UNUSED;
@@ -315,7 +370,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
 		} else if (input == i) {
 			continue;
 		} else {
-			v = &_values[input];
+			v = &_values[input].insn;
 			if (v->op == IR_TOP) {
 				ir_sccp_add_input(ctx, _values, worklist, input);
 				continue;
@@ -369,7 +424,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
 		} else if (input == i) {
 			continue;
 		} else {
-			v = &_values[input];
+			v = &_values[input].insn;
 			if (v->op == IR_TOP) {
 				ir_sccp_add_input(ctx, _values, worklist, input);
 				continue;
@@ -398,7 +453,9 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work

 #if IR_COMBO_COPY_PROPAGATION
 	if (new_copy) {
-		return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
+		IR_ASSERT(!IR_IS_CONST_REF(new_copy));
+		IR_ASSERT(!IR_IS_CONST_OP(_values[new_copy].op) && !IR_IS_SYM_CONST(_values[new_copy].op));
+		return ir_sccp_meet_copy(ctx, _values, worklist, i, new_copy);
 	}
 #endif

@@ -409,7 +466,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
 	return 1;
 }

-static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
+static bool ir_is_dead_load_ex(const ir_ctx *ctx, ir_ref ref, uint32_t flags, const ir_insn *insn)
 {
 	if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
 		return ctx->use_lists[ref].count == 1;
@@ -419,10 +476,10 @@ static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn
 	return 0;
 }

-static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_dead_load(const ir_ctx *ctx, ir_ref ref)
 {
 	if (ctx->use_lists[ref].count == 1) {
-		ir_insn *insn = &ctx->ir_base[ref];
+		const ir_insn *insn = &ctx->ir_base[ref];
 		uint32_t flags = ir_op_flags[insn->op];

 		if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
@@ -434,7 +491,7 @@ static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
 	return 0;
 }

-static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_dead(const ir_ctx *ctx, ir_ref ref)
 {
 	if (ctx->use_lists[ref].count == 0) {
 		return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
@@ -444,28 +501,28 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
 	return 0;
 }

-static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
+static bool ir_sccp_is_true(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
 {
-	ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
+	const ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;

 	return ir_const_is_true(v);
 }

-static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
+static bool ir_sccp_is_equal(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b)
 {
-	ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
-	ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
+	const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
+	const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;

 	IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
 	IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
 	return v1->val.u64 == v2->val.u64;
 }

-static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
+static bool ir_sccp_in_range(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b, ir_ref c)
 {
-	ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
-	ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
-	ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
+	const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
+	const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
+	const ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c].insn;

 	IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
 	IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
@@ -478,13 +535,13 @@ static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b,
 }

 #ifdef IR_SCCP_TRACE
-static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_val(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
 {
 	if (IR_IS_BOTTOM(i)) {
 		fprintf(stderr, "BOTTOM");
 	} else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
 		fprintf(stderr, "CONST(");
-		ir_print_const(ctx, &_values[i], stderr, true);
+		ir_print_const(ctx, &_values[i].insn, stderr, true);
 		fprintf(stderr, ")");
 #if IR_COMBO_COPY_PROPAGATION
 	} else if (_values[i].op == IR_COPY) {
@@ -501,13 +558,13 @@ static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
 	}
 }

-static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_start(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
 {
 	fprintf(stderr, "%d. ", i);
 	ir_sccp_trace_val(ctx, _values, i);
 }

-static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
+static void ir_sccp_trace_end(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
 {
 	fprintf(stderr, " -> ");
 	ir_sccp_trace_val(ctx, _values, i);
@@ -518,11 +575,12 @@ static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
 # define ir_sccp_trace_end(c, v, i)
 #endif

-static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
+static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
 {
-	ir_ref i, j, n, *p, use;
-	ir_use_list *use_list;
-	ir_insn *insn, *use_insn;
+	ir_ref i, j, n, use;
+	const ir_ref *p;
+	const ir_use_list *use_list;
+	const ir_insn *insn, *use_insn;
 	uint32_t flags;

 	/* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
@@ -610,7 +668,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
 					}
 				}
 				for (p = insn->ops + 1; n > 0; p++, n--) {
-					ir_ref input = *p;
+					const ir_ref input = *p;
 					IR_ASSERT(input > 0);
 					if (!IR_IS_REACHABLE(input)) {
 						unfeasible_inputs++;
@@ -618,9 +676,9 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
 				}
 				if (unfeasible_inputs == 0) {
 					IR_MAKE_BOTTOM(i);
-				} else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
+				} else if (_values[i].op != IR_MERGE || _values[i].unfeasible_inputs != unfeasible_inputs) {
 					_values[i].optx = IR_MERGE;
-					_values[i].op1 = unfeasible_inputs;
+					_values[i].unfeasible_inputs = unfeasible_inputs;
 				} else {
 					continue;
 				}
@@ -674,10 +732,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
 					}
 					if (_values[i].op == IR_TOP) {
 						_values[i].optx = IR_IF;
-						_values[i].op1 = use;
+						_values[i].single_output = use;
 						ir_bitqueue_add(worklist, use);
 						continue;
-					} else if (_values[i].op == IR_IF && _values[i].op1 == use) {
+					} else if (_values[i].op == IR_IF && _values[i].single_output == use) {
 						continue;
 					}
 				}
@@ -715,10 +773,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
 						use_insn = &ctx->ir_base[use_case];
 						if (_values[i].op == IR_TOP) {
 							_values[i].optx = IR_IF;
-							_values[i].op1 = use_case;
+							_values[i].single_output = use_case;
 							ir_bitqueue_add(worklist, use_case);
 							continue;
-						} else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
+						} else if (_values[i].op == IR_IF || _values[i].single_output == use_case) {
 							continue;
 						}
 					}
@@ -768,18 +826,20 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
 		for (i = 1; i < ctx->insns_count; i++) {
 			if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
 				fprintf(stderr, "%d. CONST(", i);
-				ir_print_const(ctx, &_values[i], stderr, true);
+				ir_print_const(ctx, &_values[i].insn, stderr, true);
 				fprintf(stderr, ")\n");
 #if IR_COMBO_COPY_PROPAGATION
 			} else if (_values[i].op == IR_COPY) {
-				fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
+				fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].copy);
 #endif
 			} else if (IR_IS_TOP(i)) {
-				fprintf(stderr, "%d. TOP\n", i);
+				if (ctx->ir_base[i].op != IR_TOP) {
+					fprintf(stderr, "%d. TOP\n", i);
+				}
 			} else if (_values[i].op == IR_IF) {
-				fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
+				fprintf(stderr, "%d. IF(%d)\n", i, _values[i].single_output);
 			} else if (_values[i].op == IR_MERGE) {
-				fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
+				fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].unfeasible_inputs);
 			} else if (!IR_IS_BOTTOM(i)) {
 				fprintf(stderr, "%d. %d\n", i, _values[i].op);
 			}
@@ -806,7 +866,7 @@ static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
 	}
 }

-static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
+static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_bitqueue *worklist)
 {
 	ir_ref j, n, *p;
 	ir_insn *insn;
@@ -829,7 +889,7 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi
 	}
 }

-static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
+static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
 {
 	ir_ref j, n, *p, use, i;
 	ir_insn *insn;
@@ -907,7 +967,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r
 	CLEAR_USES(ref);
 }

-static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
+static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref dst)
 {
 	ir_ref next;
 	ir_insn *insn, *next_insn;
@@ -1054,10 +1114,10 @@ static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_i
 	return 1;
 }

-static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
+static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
 {
 	ir_ref i, j;
-	ir_insn *value;
+	const ir_sccp_val *value;

 	for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
 		if (value->op == IR_BOTTOM) {
@@ -1072,7 +1132,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
 			ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
 #if IR_COMBO_COPY_PROPAGATION
 		} else if (value->op == IR_COPY) {
-			ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
+			ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->copy), iter_worklist);
 #endif
 		} else if (value->op == IR_TOP) {
 			/* remove unreachable instruction */
@@ -1104,7 +1164,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
 			}
 		} else if (value->op == IR_IF) {
 			/* remove one way IF/SWITCH */
-			ir_sccp_remove_if(ctx, _values, i, value->op1);
+			ir_sccp_remove_if(ctx, _values, i, value->single_output);
 		} else if (value->op == IR_MERGE) {
 			/* schedule merge to remove unfeasible MERGE inputs */
 			ir_bitqueue_add(worklist, i);
@@ -1121,6 +1181,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
 /* Iterative Optimizations */
 /***************************/

+void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
+{
+	ir_use_list *use_list = &ctx->use_lists[ref];
+	ir_ref *p, n = use_list->count;
+
+	for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
+		ir_bitqueue_add(worklist, *p);
+	}
+}
+
 /* Modification of some instruction may open new optimization oprtunities for other
  * instructions that use this one.
  *
@@ -1132,16 +1202,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
  *
  * TODO: Think abput a more general solution ???
  */
-static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
+static void ir_iter_add_related_uses(const ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
+	const ir_insn *insn = &ctx->ir_base[ref];

 	if (insn->op == IR_ADD || insn->op == IR_SUB) {
-		ir_use_list *use_list = &ctx->use_lists[ref];
+		const ir_use_list *use_list = &ctx->use_lists[ref];

 		if (use_list->count == 1) {
 			ir_ref use = ctx->use_edges[use_list->refs];
-			ir_insn *use_insn = &ctx->ir_base[ref];
+			const ir_insn *use_insn = &ctx->ir_base[ref];

 			if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
 				ir_bitqueue_add(worklist, use);
@@ -1266,16 +1336,17 @@ void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir
 	}
 }

-static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
+static ir_ref ir_iter_find_cse1(const ir_ctx *ctx, uint32_t optx, ir_ref op1)
 {
 	IR_ASSERT(!IR_IS_CONST_REF(op1));

-	ir_use_list *use_list = &ctx->use_lists[op1];
-	ir_ref *p, n = use_list->count;
+	const ir_use_list *use_list = &ctx->use_lists[op1];
+	const ir_ref *p;
+	ir_ref n = use_list->count;

 	for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
 		ir_ref use = *p;
-		ir_insn *use_insn = &ctx->ir_base[use];
+		const ir_insn *use_insn = &ctx->ir_base[use];

 		if (use_insn->optx == optx) {
 			IR_ASSERT(use_insn->op1 == op1);
@@ -1285,12 +1356,13 @@ static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
 	return IR_UNUSED;
 }

-static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
+static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
 {
 	uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
-	ir_use_list *use_list = NULL;
-	ir_ref *p, use;
-	ir_insn *use_insn;
+	const ir_use_list *use_list = NULL;
+	const ir_ref *p;
+	ir_ref use;
+	const ir_insn *use_insn;

 	if (n == 2) {
 		if (!IR_IS_CONST_REF(op1)) {
@@ -1373,7 +1445,8 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 {
 	uint32_t opt;
 	ir_ref op1, op2, op3, copy;
-	ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
+	const ir_insn *op1_insn, *op2_insn, *op3_insn;
+	ir_insn *insn;

 	insn = &ctx->ir_base[ref];
 	opt = insn->opt;
@@ -1408,9 +1481,6 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 			 || insn->op2 != ctx->fold_insn.op2
 			 || insn->op3 != ctx->fold_insn.op3) {

-				ir_use_list *use_list;
-				ir_ref n, j, *p, use;
-
 				insn->optx = ctx->fold_insn.opt;
 				IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
 				insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
@@ -1442,12 +1512,7 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 				insn->op2 = ctx->fold_insn.op2;
 				insn->op3 = ctx->fold_insn.op3;

-				use_list = &ctx->use_lists[ref];
-				n = use_list->count;
-				for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
-					use = *p;
-					ir_bitqueue_add(worklist, use);
-				}
+				ir_iter_add_uses(ctx, ref, worklist);
 			}
 			break;
 		case IR_FOLD_DO_COPY:
@@ -1464,9 +1529,9 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 	}
 }

-static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
+static bool ir_may_promote_d2f(const ir_ctx *ctx, ir_ref ref)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
+	const ir_insn *insn = &ctx->ir_base[ref];

 	IR_ASSERT(insn->type == IR_DOUBLE);
 	if (IR_IS_CONST_REF(ref)) {
@@ -1497,9 +1562,9 @@ static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
 	return 0;
 }

-static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
+static bool ir_may_promote_f2d(const ir_ctx *ctx, ir_ref ref)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
+	const ir_insn *insn = &ctx->ir_base[ref];

 	IR_ASSERT(insn->type == IR_FLOAT);
 	if (IR_IS_CONST_REF(ref)) {
@@ -1668,10 +1733,11 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *w
 	return ref;
 }

-static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
+static bool ir_may_promote_trunc(const ir_ctx *ctx, ir_type type, ir_ref ref)
 {
-	ir_insn *insn = &ctx->ir_base[ref];
-	ir_ref *p, n, input;
+	const ir_insn *insn = &ctx->ir_base[ref];
+	const ir_ref *p;
+	ir_ref n, input;

 	if (IR_IS_CONST_REF(ref)) {
 		return !IR_IS_SYM_CONST(insn->op);
@@ -1777,6 +1843,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
 						}
 					}
 					insn->type = type;
+					ir_iter_add_uses(ctx, ref, worklist);
 					return ref;
 				}

@@ -1857,7 +1924,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
 	return ref;
 }

-static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
+static ir_ref ir_ext_const(ir_ctx *ctx, const ir_insn *val_insn, ir_op op, ir_type type)
 {
 	ir_val new_val;

@@ -1921,10 +1988,11 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
 	return ref;
 }

-static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
+static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop)
 {
-	ir_insn *insn;
-	ir_ref n, *p, input, result, ctrl;
+	const ir_insn *insn;
+	const ir_ref *p;
+	ir_ref n, input, result, ctrl;

 	if (IR_IS_CONST_REF(val)) {
 		return 1; /* IR_START */
@@ -1955,18 +2023,18 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
 	return result;
 }

-static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
+static bool ir_is_loop_invariant(const ir_ctx *ctx, ir_ref ref, ir_ref loop)
 {
 	ref = _ir_estimated_control(ctx, ref, loop);
 	return ref < loop; // TODO: check dominance instead of order
 }

-static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
+static bool ir_is_cheaper_ext(const ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
 {
 	if (IR_IS_CONST_REF(ref)) {
 		return 1;
 	} else {
-		ir_insn *insn = &ctx->ir_base[ref];
+		const ir_insn *insn = &ctx->ir_base[ref];

 		if (insn->op == IR_LOAD) {
 			if (ir_is_loop_invariant(ctx, ref, loop)) {
@@ -1982,7 +2050,7 @@ static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_r
 					for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
 						use = *p;
 						if (use != ext_ref) {
-							ir_insn *use_insn = &ctx->ir_base[use];
+							const ir_insn *use_insn = &ctx->ir_base[use];

 							if (use_insn->op != op
 							 && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
@@ -2018,7 +2086,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
 			if (use == op_ref || use == ext_ref) {
 				continue;
 			} else {
-				ir_insn *use_insn = &ctx->ir_base[use];
+				const ir_insn *use_insn = &ctx->ir_base[use];

 				if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
 					if (use_insn->op1 == phi_ref) {
@@ -2057,7 +2125,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
 			if (use == phi_ref || use == ext_ref) {
 				continue;
 			} else {
-				ir_insn *use_insn = &ctx->ir_base[use];
+				const ir_insn *use_insn = &ctx->ir_base[use];

 				if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
 					if (use_insn->op1 == phi_ref) {
@@ -2194,7 +2262,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
 }

 static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
- {
+{
 	ir_ref ref = insn->op1;

 	/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
@@ -2445,7 +2513,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
 	}
 }

-static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
+static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
 {
 	return IR_IS_CONST_REF(ref)
 		&& !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
@@ -2470,7 +2538,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 			ir_ref root_ref = start1->op1;
 			ir_insn *root = &ctx->ir_base[root_ref];

-			if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
+			if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2)) {
 				ir_ref cond_ref = root->op2;
 				ir_insn *cond = &ctx->ir_base[cond_ref];
 				ir_type type = insn->type;
@@ -2550,7 +2618,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 						ir_use_list_remove_all(ctx, insn->op2, cond_ref);
 					}

-					MAKE_NOP(cond);   CLEAR_USES(cond_ref);
+					if (ctx->use_lists[cond_ref].count == 1) {
+						MAKE_NOP(cond);   CLEAR_USES(cond_ref);
+					} else {
+						ir_use_list_remove_one(ctx, cond_ref, root_ref);
+					}
 					MAKE_NOP(root);   CLEAR_USES(root_ref);
 					MAKE_NOP(start1); CLEAR_USES(start1_ref);
 					MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2636,7 +2708,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 						ir_use_list_remove_all(ctx, insn->op1, cond_ref);
 					}

-					MAKE_NOP(cond);   CLEAR_USES(cond_ref);
+					if (ctx->use_lists[cond_ref].count == 1) {
+						MAKE_NOP(cond);   CLEAR_USES(cond_ref);
+					} else {
+						ir_use_list_remove_one(ctx, cond_ref, root_ref);
+				    }
 					MAKE_NOP(root);   CLEAR_USES(root_ref);
 					MAKE_NOP(start1); CLEAR_USES(start1_ref);
 					MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2650,8 +2726,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					}

 					return 1;
-#if 0
-				} else {
+				} else if (cond->op != IR_OVERFLOW && insn->op2 <= cond_ref && insn->op3 <= cond_ref) {
 					/* COND
 					 *
 					 *    prev                     prev
@@ -2705,12 +2780,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					MAKE_NOP(end2);   CLEAR_USES(end2_ref);
 					MAKE_NOP(merge);  CLEAR_USES(merge_ref);

+					ir_bitqueue_add(worklist, ref);
 					if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
 						ir_bitqueue_add(worklist, next->op1);
 					}

 					return 1;
-#endif
 				}
 			}
 		}
@@ -2719,7 +2794,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 	return 0;
 }

-static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
+static bool ir_cmp_is_true(ir_op op, const ir_insn *op1, const ir_insn *op2)
 {
 	IR_ASSERT(op1->type == op2->type);
 	if (IR_IS_TYPE_INT(op1->type)) {
@@ -3246,7 +3321,7 @@ static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge
 	}
 }

-static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
+static ir_ref ir_find_ext_use(const ir_ctx *ctx, ir_ref ref)
 {
 	ir_use_list *use_list = &ctx->use_lists[ref];
 	ir_ref *p, n, use;
@@ -3628,6 +3703,7 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
 					insn->op1 = val;
 					insn->op2 = IR_UNUSED;
 					ir_bitqueue_add(worklist, i);
+					ir_iter_add_uses(ctx, i, worklist);
 				}
 			}
 		} else if (insn->op == IR_STORE) {
@@ -3677,11 +3753,11 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
 int ir_sccp(ir_ctx *ctx)
 {
 	ir_bitqueue sccp_worklist, iter_worklist;
-	ir_insn *_values;
+	ir_sccp_val *_values;

 	ir_bitqueue_init(&iter_worklist, ctx->insns_count);
 	ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
-	_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
+	_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_sccp_val));

 	ctx->flags2 |= IR_OPT_IN_SCCP;
 	ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 9072b0dd591..9b369fadbcc 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -1167,6 +1167,7 @@ const ir_call_conv_dsc ir_call_conv_x86_fastcall = {
 	_(CMP_AND_BRANCH_FP)   \
 	_(TEST_AND_BRANCH_INT) \
 	_(JCC_INT)             \
+	_(COND_TEST_INT)       \
 	_(COND_CMP_INT)        \
 	_(COND_CMP_FP)         \
 	_(GUARD_CMP_INT)       \
@@ -1405,6 +1406,7 @@ op2_const:
 			}
 			IR_FALLTHROUGH;
 		case IR_COND_CMP_INT:
+		case IR_COND_TEST_INT:
 			insn = &ctx->ir_base[ref];
 			if (IR_IS_TYPE_INT(insn->type)) {
 				if (IR_IS_CONST_REF(insn->op3) || ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) {
@@ -2125,6 +2127,34 @@ static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func)
 	return 0;
 }

+static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
+{
+	ir_insn *insn = &ctx->ir_base[ref];
+
+	if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
+		ir_use_list *use_list = &ctx->use_lists[ref];
+		ir_ref n = use_list->count;
+
+		if (n > 0) {
+			ir_ref *p = ctx->use_edges + use_list->refs;
+
+			do {
+				insn = &ctx->ir_base[*p];
+				if (insn->op != IR_IF
+				 && insn->op != IR_GUARD
+				 && insn->op != IR_GUARD_NOT
+				 && (insn->op != IR_COND || insn->op2 == ref || insn->op3 == ref)) {
+					return 0;
+				}
+				p++;
+				n--;
+			} while (n);
+			return 1;
+		}
+	}
+	return 0;
+}
+
 static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
 {
 	ir_insn *op2_insn;
@@ -2877,7 +2907,7 @@ store_int:
 				return IR_RETURN_FP;
 			}
 		case IR_IF:
-			if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
 				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -2889,7 +2919,9 @@ store_int:

 							if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) {
 								/* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */
-								ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+								if (ctx->use_lists[insn->op2].count == 1) {
+									ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+								}
 								ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT;
 								ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP;
 								return IR_TEST_AND_BRANCH_INT;
@@ -2901,10 +2933,14 @@ store_int:
 												op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) {
 								/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */
 								if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
-									ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+									if (ctx->use_lists[insn->op2].count == 1) {
+										ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+									}
 									ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
 								} else {
-									ir_match_fuse_load(ctx, op1_insn->op2, ref);
+									if (ctx->use_lists[insn->op2].count == 1) {
+										ir_match_fuse_load(ctx, op1_insn->op2, ref);
+									}
 									ctx->rules[op2_insn->op1] = IR_BINOP_INT;
 								}
 								ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
@@ -2912,12 +2948,16 @@ store_int:
 							}
 						}
 						/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
-						ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+						if (ctx->use_lists[insn->op2].count == 1) {
+							ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+						}
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
 						return IR_CMP_AND_BRANCH_INT;
 					} else {
 						/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
-						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+						if (ctx->use_lists[insn->op2].count == 1) {
+							ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+						}
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
 						return IR_CMP_AND_BRANCH_FP;
 					}
@@ -3005,31 +3045,43 @@ store_int:
 				break;
 			}
 		case IR_COND:
-			if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) {
+			if (!IR_IS_CONST_REF(insn->op1) && (ctx->use_lists[insn->op1].count == 1 || all_usages_are_fusable(ctx, insn->op1))) {
 				ir_insn *op1_insn = &ctx->ir_base[insn->op1];

 				if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UNORDERED) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) {
-						ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
+						if (ctx->use_lists[insn->op1].count == 1) {
+							ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
+						}
 						ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT;
 						return IR_COND_CMP_INT;
 					} else {
-						ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
+						if (ctx->use_lists[insn->op1].count == 1) {
+							ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
+						}
 						ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP;
 						return IR_COND_CMP_FP;
 					}
+				} else if (op1_insn->op == IR_AND) {
+					/* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */
+					ir_match_fuse_load_test_int(ctx, op1_insn, ref);
+					ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT;
+					return IR_COND_TEST_INT;
 				}
 			}
+			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
+				ir_match_fuse_load(ctx, insn->op1, ref);
+			}
 			return IR_COND;
 		case IR_GUARD:
 		case IR_GUARD_NOT:
-			if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
+			if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
 				op2_insn = &ctx->ir_base[insn->op2];
-				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
+				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
 					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
-				 && (insn->op2 == ref - 1 ||
-				     (insn->op2 == ctx->prev_ref[ref] - 1
-				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
+//???				 && (insn->op2 == ref - 1 ||
+//???				     (insn->op2 == ctx->prev_ref[ref] - 1
+//???				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
 					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
 						if (IR_IS_CONST_REF(op2_insn->op2)
 						 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op)
@@ -3043,10 +3095,14 @@ store_int:
 											(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
 												op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
 									if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
-										ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+										if (ctx->use_lists[insn->op2].count == 1) {
+											ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
+										}
 										ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
 									} else {
-										ir_match_fuse_load(ctx, op1_insn->op2, ref);
+										if (ctx->use_lists[insn->op2].count == 1) {
+											ir_match_fuse_load(ctx, op1_insn->op2, ref);
+										}
 										ctx->rules[op2_insn->op1] = IR_BINOP_INT;
 									}
 									/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */
@@ -3054,6 +3110,7 @@ store_int:
 									return IR_GUARD_JCC_INT;
 								}
 							} else if ((ctx->flags & IR_OPT_CODEGEN)
+							 && ctx->use_lists[insn->op2].count == 1
 							 && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */
 							 && ir_in_same_block(ctx, op2_insn->op1)
 							 && ctx->use_lists[op2_insn->op1].count == 2) {
@@ -3101,12 +3158,16 @@ store_int:
 							}
 						}
 						/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
-						ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+						if (ctx->use_lists[insn->op2].count == 1) {
+							ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
+						}
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
 						return IR_GUARD_CMP_INT;
 					} else {
 						/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
-						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+						if (ctx->use_lists[insn->op2].count == 1) {
+							ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
+						}
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
 						return IR_GUARD_CMP_FP;
 					}
@@ -6051,8 +6112,15 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins
 	ir_type type = ctx->ir_base[cmp_insn->op1].type;
 	ir_ref op1 = cmp_insn->op1;
 	ir_ref op2 = cmp_insn->op2;
-	ir_reg op1_reg = ctx->regs[ref][1];
-	ir_reg op2_reg = ctx->regs[ref][2];
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[ref][1];
+		op2_reg = ctx->regs[ref][2];
+	}

 	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
@@ -6218,8 +6286,15 @@ static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op
 	ir_type type = binop_insn->type;
 	ir_ref op1 = binop_insn->op1;
 	ir_ref op2 = binop_insn->op2;
-	ir_reg op1_reg = ctx->regs[ref][1];
-	ir_reg op2_reg = ctx->regs[ref][2];
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[ref][1];
+		op2_reg = ctx->regs[ref][2];
+	}

 	IR_ASSERT(binop_insn->op == IR_AND);
 	if (op1_reg != IR_REG_NONE) {
@@ -6329,8 +6404,13 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_

 	op1 = cmp_insn->op1;
 	op2 = cmp_insn->op2;
-	op1_reg = ctx->regs[cmp_ref][1];
-	op2_reg = ctx->regs[cmp_ref][2];
+	if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[cmp_ref][1];
+		op2_reg = ctx->regs[cmp_ref][2];
+	}

 	if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
 		ir_reg tmp_reg;
@@ -6603,8 +6683,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
 	ir_type type = ctx->ir_base[cmp_insn->op1].type;
 	ir_ref op1 = cmp_insn->op1;
 	ir_ref op2 = cmp_insn->op2;
-	ir_reg op1_reg = ctx->regs[insn->op2][1];
-	ir_reg op2_reg = ctx->regs[insn->op2][2];
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[insn->op2][1];
+		op2_reg = ctx->regs[insn->op2][2];
+	}

 	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
@@ -6735,37 +6822,24 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)

 	IR_ASSERT(def_reg != IR_REG_NONE);

-	if (op2 != op3) {
-		if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, type, op2_reg, op2);
-			if (op1 == op2) {
-				op1_reg = op2_reg;
-			}
-		}
-		if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
-			op3_reg = IR_REG_NUM(op3_reg);
-			ir_emit_load(ctx, type, op3_reg, op3);
-			if (op1 == op2) {
-				op1_reg = op3_reg;
-			}
-		}
-	} else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+	if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
 		op2_reg = IR_REG_NUM(op2_reg);
 		ir_emit_load(ctx, type, op2_reg, op2);
-		op3_reg = op2_reg;
 		if (op1 == op2) {
 			op1_reg = op2_reg;
 		}
-	} else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+		if (op3 == op2) {
+			op3_reg = op2_reg;
+		}
+	}
+	if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
 		op3_reg = IR_REG_NUM(op3_reg);
 		ir_emit_load(ctx, type, op3_reg, op3);
-		op2_reg = op3_reg;
 		if (op1 == op3) {
-			op1_reg = op3_reg;
+			op1_reg = op2_reg;
 		}
 	}
-	if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
+	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
 		ir_emit_load(ctx, op1_type, op1_reg, op1);
 	}
@@ -6774,7 +6848,13 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 		if (op1_reg != IR_REG_NONE) {
 			|	ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg
 		} else {
-			ir_mem mem = ir_ref_spill_slot(ctx, op1);
+			ir_mem mem;
+
+			if (ir_rule(ctx, insn->op1) & IR_FUSED) {
+				mem = ir_fuse_load(ctx, def, insn->op1);
+			} else {
+				mem = ir_ref_spill_slot(ctx, insn->op1);
+			}

 			|	ASM_MEM_IMM_OP cmp, op1_type, mem, 0
 		}
@@ -6864,6 +6944,115 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	}
 }

+static void ir_emit_cond_test_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
+{
+	ir_backend_data *data = ctx->data;
+	dasm_State **Dst = &data->dasm_state;
+	ir_type type = insn->type;
+	ir_ref op2 = insn->op2;
+	ir_ref op3 = insn->op3;
+	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+	ir_reg op2_reg = ctx->regs[def][2];
+	ir_reg op3_reg = ctx->regs[def][3];
+
+	if (op2 != op3) {
+		if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+			op2_reg = IR_REG_NUM(op2_reg);
+			ir_emit_load(ctx, type, op2_reg, op2);
+		}
+		if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+			op3_reg = IR_REG_NUM(op3_reg);
+			ir_emit_load(ctx, type, op3_reg, op3);
+		}
+	} else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
+		op2_reg = IR_REG_NUM(op2_reg);
+		ir_emit_load(ctx, type, op2_reg, op2);
+		op3_reg = op2_reg;
+	} else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
+		op3_reg = IR_REG_NUM(op3_reg);
+		ir_emit_load(ctx, type, op3_reg, op3);
+		op2_reg = op3_reg;
+	}
+
+	ir_emit_test_int_common(ctx, def, insn->op1, IR_NE);
+
+	if (IR_IS_TYPE_INT(type)) {
+		bool eq = 0;
+
+		if (op3_reg != IR_REG_NONE) {
+			if (op3_reg == def_reg) {
+				IR_ASSERT(op2_reg != IR_REG_NONE);
+				op3_reg = op2_reg;
+				eq = 1; // reverse
+			} else {
+				if (op2_reg != IR_REG_NONE) {
+					if (def_reg != op2_reg) {
+//						if (IR_IS_TYPE_INT(type)) {
+							ir_emit_mov(ctx, type, def_reg, op2_reg);
+//						} else {
+//							ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
+//						}
+					}
+				} else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) {
+					/* prevent "xor" and flags clobbering */
+					ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64);
+				} else {
+					ir_emit_load_ex(ctx, type, def_reg, op2, def);
+				}
+			}
+		} else {
+			IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg);
+			if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) {
+				/* prevent "xor" and flags clobbering */
+				ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64);
+			} else {
+				ir_emit_load_ex(ctx, type, def_reg, op3, def);
+			}
+			op3_reg = op2_reg;
+			eq = 1; // reverse
+		}
+
+		if (eq) {
+			|	ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg
+		} else {
+			|	ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg
+		}
+	} else {
+		|	jne >2
+		|1:
+
+		if (op2_reg != IR_REG_NONE) {
+			if (def_reg != op2_reg) {
+				if (IR_IS_TYPE_INT(type)) {
+					ir_emit_mov(ctx, type, def_reg, op2_reg);
+				} else {
+					ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
+				}
+			}
+		} else {
+			ir_emit_load_ex(ctx, type, def_reg, op2, def);
+		}
+		|	jmp >3
+		|2:
+		if (op3_reg != IR_REG_NONE) {
+			if (def_reg != op3_reg) {
+				if (IR_IS_TYPE_INT(type)) {
+					ir_emit_mov(ctx, type, def_reg, op3_reg);
+				} else {
+					ir_emit_fp_mov(ctx, type, def_reg, op3_reg);
+				}
+			}
+		} else {
+			ir_emit_load_ex(ctx, type, def_reg, op3, def);
+		}
+		|3:
+	}
+
+	if (IR_REG_SPILLED(ctx->regs[def][0])) {
+		ir_emit_store(ctx, type, def, def_reg);
+	}
+}
+
 static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
@@ -10454,9 +10643,16 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
 	ir_type type = ctx->ir_base[cmp_insn->op1].type;
 	ir_ref op1 = cmp_insn->op1;
 	ir_ref op2 = cmp_insn->op2;
-	ir_reg op1_reg = ctx->regs[insn->op2][1];
-	ir_reg op2_reg = ctx->regs[insn->op2][2];
 	void *addr;
+	ir_reg op1_reg, op2_reg;
+
+	if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
+		op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
+		op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
+	} else {
+		op1_reg = ctx->regs[insn->op2][1];
+		op2_reg = ctx->regs[insn->op2][2];
+	}

 	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
 		op1_reg = IR_REG_NUM(op1_reg);
@@ -11714,6 +11910,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
 				case IR_COND:
 					ir_emit_cond(ctx, i, insn);
 					break;
+				case IR_COND_TEST_INT:
+					ir_emit_cond_test_int(ctx, i, insn);
+					break;
 				case IR_COND_CMP_INT:
 					ir_emit_cond_cmp_int(ctx, i, insn);
 					break;
@@ -12180,7 +12379,7 @@ const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_pe
 	return entry;
 }

-bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
+bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
 {
 	return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr);
 }