Commit e7925111791 for php.net

commit e792511179199a6ec12ddabad5e7bd7e7d785271
Author: Dmitry Stogov <dmitry@php.net>
Date:   Tue Feb 24 18:40:53 2026 +0300

    Update IR (#21288)

    IR commit: ef9341183cdd0489a188a87e74f5b02a359df21b

diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index 3476b9bb061..f6058c5abe5 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -1420,13 +1420,21 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
 		if (old_size < new_size) {
 			/* Reallocate the whole edges buffer (this is inefficient) */
 			ctx->use_edges = ir_mem_realloc(ctx->use_edges, new_size);
+			if (n == ctx->use_edges_count) {
+				ctx->use_edges[n] = ref;
+				use_list->count++;
+				ctx->use_edges_count++;
+				return 1;
+			}
 		} else if (n == ctx->use_edges_count) {
 			ctx->use_edges[n] = ref;
 			use_list->count++;
 			ctx->use_edges_count++;
 			return 0;
 		}
-		memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref));
+		if (use_list->count) {
+			memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref));
+		}
 		use_list->refs = ctx->use_edges_count;
 		ctx->use_edges[use_list->refs + use_list->count] = ref;
 		use_list->count++;
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index bd314dcedb1..357b985e63c 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -5,6 +5,10 @@
  * Authors: Dmitry Stogov <dmitry@php.net>
  */

+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
 #include "ir.h"
 #include "ir_private.h"

@@ -188,9 +192,9 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
 							}
 						}
 					}
-					if (input > 0) {
-						ir_use_list_remove_one(ctx, input, bb->start);
-					}
+					IR_ASSERT(input > 0);
+					IR_ASSERT(ctx->use_lists[input].count == 1 && ctx->use_edges[ctx->use_lists[input].refs] == bb->start);
+					CLEAR_USES(input);
 				}
 			}
 			j--;
@@ -503,7 +507,8 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
 	}

 	ir_mem_free(life_inputs);
-	ir_use_list_remove_all(ctx, from, merge);
+	IR_ASSERT(ctx->use_lists[from].count == 1 && ctx->use_edges[ctx->use_lists[from].refs] == merge);
+	CLEAR_USES(from);
 }

 /* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */
@@ -975,10 +980,107 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
 	return b1 == b2;
 }

+/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs" and
+ * G. Ramalingam "Identifying Loops In Almost Linear Time". */
+
+#define ENTRY_TIME(b) times[(b) * 2]
+#define EXIT_TIME(b)  times[(b) * 2 + 1]
+
+static IR_NEVER_INLINE void ir_collect_irreducible_loops(ir_ctx *ctx, uint32_t *times, ir_worklist *work, ir_list *list)
+{
+	ir_block *blocks = ctx->cfg_blocks;
+	uint32_t *edges = ctx->cfg_edges;
+
+	IR_ASSERT(ir_list_len(list) != 0);
+	if (ir_list_len(list) > 1) {
+		/* Sort list to process irreducible loops in DFS order (insertion sort) */
+		ir_ref *a = list->a.refs;
+		uint32_t n = ir_list_len(list);
+		uint32_t i = 1;
+		while (i < n) {
+			uint32_t j = i;
+			while (j > 0 && ENTRY_TIME(a[j-1]) > ENTRY_TIME(a[j])) {
+				ir_ref tmp = a[j];
+				a[j] = a[j-1];
+				a[j-1] = tmp;
+				j--;
+			}
+			i++;
+		}
+	}
+	while (ir_list_len(list)) {
+		uint32_t hdr = ir_list_pop(list);
+		ir_block *bb = &blocks[hdr];
+
+		IR_ASSERT(bb->flags & IR_BB_IRREDUCIBLE_LOOP);
+		IR_ASSERT(!bb->loop_depth);
+		if (!bb->loop_depth) {
+			/* process irreducible loop */
+
+			bb->flags |= IR_BB_LOOP_HEADER;
+			bb->loop_depth = 1;
+			if (ctx->ir_base[bb->start].op == IR_MERGE) {
+				ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
+			}
+
+			/* find the closing edge(s) of the irreucible loop */
+			IR_ASSERT(bb->predecessors_count > 1);
+			IR_ASSERT(ir_worklist_len(work) == 0);
+			ir_bitset_clear(work->visited, ir_bitset_len(ir_worklist_capasity(work)));
+			ir_bitset_incl(work->visited, hdr);
+
+			uint32_t *p = &edges[bb->predecessors];
+			uint32_t n = bb->predecessors_count;
+			do {
+				uint32_t pred = *p;
+				if (ENTRY_TIME(pred) > ENTRY_TIME(hdr) && EXIT_TIME(pred) < EXIT_TIME(hdr)) {
+					IR_ASSERT(blocks[pred].loop_header == 0);
+					// blocks[pred].loop_header = 0; /* support for merged loops */
+					ir_worklist_push(work, pred);
+				}
+				p++;
+			} while (--n);
+
+			/* collect members of the irreducible loop */
+			while (ir_worklist_len(work)) {
+				uint32_t b = ir_worklist_pop(work);
+
+				bb = &blocks[b];
+				bb->loop_header = hdr;
+
+				uint32_t *p = &edges[bb->predecessors];
+				uint32_t n = bb->predecessors_count;
+
+				for (; n > 0; p++, n--) {
+					uint32_t pred = *p;
+					if (!ir_bitset_in(work->visited, pred)) {
+						if (blocks[pred].loop_header) {
+							if (blocks[pred].loop_header == b) continue;
+							do {
+								pred = blocks[pred].loop_header;
+							} while (blocks[pred].loop_header > 0);
+						}
+						if (ENTRY_TIME(pred) > ENTRY_TIME(hdr) && EXIT_TIME(pred) < EXIT_TIME(hdr)) {
+							/* "pred" is a descendant of "hdr" */
+								ir_worklist_push(work, pred);
+						} else if (bb->predecessors_count > 1) {
+							/* another entry to the irreducible loop */
+							bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
+							if (ctx->ir_base[bb->start].op == IR_MERGE) {
+								ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
 int ir_find_loops(ir_ctx *ctx)
 {
-	uint32_t b, j, n, count;
-	uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
+	uint32_t b, j, n;
+	uint32_t *times, *sorted_blocks, time = 1;
 	ir_block *blocks = ctx->cfg_blocks;
 	uint32_t *edges = ctx->cfg_edges;
 	ir_worklist work;
@@ -987,52 +1089,43 @@ int ir_find_loops(ir_ctx *ctx)
 		return 1;
 	}

-	/* We don't materialize the DJ spanning tree explicitly, as we are only interested in ancestor
-	 * queries. These are implemented by checking entry/exit times of the DFS search. */
+	/* Compute entry/exit times for the CFG DFS spanning tree to perform ancestor and back-edge queries. */
 	ir_worklist_init(&work, ctx->cfg_blocks_count + 1);
-	entry_times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t));
-	exit_times = entry_times + ctx->cfg_blocks_count + 1;
-	sorted_blocks = exit_times + ctx->cfg_blocks_count + 1;
-
-	memset(entry_times, 0, (ctx->cfg_blocks_count + 1) * sizeof(uint32_t));
+	times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t));
+	sorted_blocks = times + (ctx->cfg_blocks_count + 1) * 2;

 	ir_worklist_push(&work, 1);
+	ENTRY_TIME(1) = time++;
+
 	while (ir_worklist_len(&work)) {
 		ir_block *bb;
-		int child;

-next:
 		b = ir_worklist_peek(&work);
-		if (!entry_times[b]) {
-			entry_times[b] = time++;
-		}

-		/* Visit blocks immediately dominated by "b". */
+		/* Visit successors of "b". */
+next:
 		bb = &blocks[b];
-		for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
-			if (ir_worklist_push(&work, child)) {
-				goto next;
-			}
-		}
-
-		/* Visit join edges. */
-		if (bb->successors_count) {
+		n = bb->successors_count;
+		if (n) {
 			uint32_t *p = edges + bb->successors;
-			for (j = 0; j < bb->successors_count; j++, p++) {
+
+			for (; n > 0; p++, n--) {
 				uint32_t succ = *p;

-				if (blocks[succ].idom == b) {
-					continue;
-				} else if (ir_worklist_push(&work, succ)) {
+				if (ir_worklist_push(&work, succ)) {
+					b = succ;
+					ENTRY_TIME(b) = time++;
 					goto next;
 				}
 			}
 		}
-		exit_times[b] = time++;
+
+		EXIT_TIME(b) = time++;
 		ir_worklist_pop(&work);
 	}

 	/* Sort blocks by level, which is the opposite order in which we want to process them */
+	/* (Breadth First Search using "sorted_blocks" as a queue) */
 	sorted_blocks[1] = 1;
 	j = 1;
 	n = 2;
@@ -1046,127 +1139,64 @@ int ir_find_loops(ir_ctx *ctx)
 			}
 		}
 	}
-	count = n;
+	IR_ASSERT(n == ctx->cfg_blocks_count + 1);

-	/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
+#if IR_DEBUG
 	uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth;
-	uint32_t prev_irreducible = 0;
+#endif
+	uint32_t irreducible_depth = 0;
+	ir_list irreducible_list = {0};
+
 	while (n > 1) {
 		b = sorted_blocks[--n];
 		ir_block *bb = &blocks[b];

 		IR_ASSERT(bb->dom_depth <= prev_dom_depth);
-		if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) {
-			/* process delyed irreducible loops */
-			do {
-				b = sorted_blocks[prev_irreducible];
-				bb = &blocks[b];
-				if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) {
-					/* process irreducible loop */
-					uint32_t hdr = b;
-
-					bb->loop_depth = 1;
-					if (ctx->ir_base[bb->start].op == IR_MERGE) {
-						ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
-					}

-					/* find the closing edge(s) of the irreucible loop */
-					IR_ASSERT(bb->predecessors_count > 1);
-					uint32_t *p = &edges[bb->predecessors];
-					j = bb->predecessors_count;
-					do {
-						uint32_t pred = *p;
-
-						if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
-							if (!ir_worklist_len(&work)) {
-								ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
-							}
-							blocks[pred].loop_header = 0; /* support for merged loops */
-							ir_worklist_push(&work, pred);
-						}
-						p++;
-					} while (--j);
-					if (ir_worklist_len(&work) == 0) continue;
-
-					/* collect members of the irreducible loop */
-					while (ir_worklist_len(&work)) {
-						b = ir_worklist_pop(&work);
-						if (b != hdr) {
-							ir_block *bb = &blocks[b];
-							bb->loop_header = hdr;
-							if (bb->predecessors_count) {
-								uint32_t *p = &edges[bb->predecessors];
-								uint32_t n = bb->predecessors_count;
-								do {
-									uint32_t pred = *p;
-									while (blocks[pred].loop_header > 0) {
-										pred = blocks[pred].loop_header;
-									}
-									if (pred != hdr) {
-										if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) {
-											/* "pred" is a descendant of "hdr" */
-											ir_worklist_push(&work, pred);
-										} else {
-											/* another entry to the irreducible loop */
-											bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
-											if (ctx->ir_base[bb->start].op == IR_MERGE) {
-												ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
-											}
-										}
-									}
-									p++;
-								} while (--n);
-							}
-						}
-					}
-				}
-			} while (--prev_irreducible != n);
-			prev_irreducible = 0;
-			b = sorted_blocks[n];
-			bb = &blocks[b];
+		if (UNEXPECTED(bb->dom_depth < irreducible_depth)) {
+			ir_collect_irreducible_loops(ctx, times, &work, &irreducible_list);
+			irreducible_depth = 0;
 		}

 		if (bb->predecessors_count > 1) {
 			bool irreducible = 0;
+			uint32_t b_entry_time = ENTRY_TIME(b);
+			uint32_t b_exit_time = EXIT_TIME(b);
 			uint32_t *p = &edges[bb->predecessors];

-			j = bb->predecessors_count;
-			do {
+			for (j = bb->predecessors_count; j > 0; p++, j--) {
 				uint32_t pred = *p;

-				/* A join edge is one for which the predecessor does not
-				   immediately dominate the successor.  */
-				if (bb->idom != pred) {
-					/* In a loop back-edge (back-join edge), the successor dominates
-					   the predecessor.  */
+				/* Check back-edges */
+				if (ENTRY_TIME(pred) >= b_entry_time && EXIT_TIME(pred) <= b_exit_time) {
 					if (ir_dominates(blocks, b, pred)) {
+						/* In a loop back-edge (back-join edge), the successor dominates
+						   the predecessor.  */
 						if (!ir_worklist_len(&work)) {
 							ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
 						}
-						blocks[pred].loop_header = 0; /* support for merged loops */
+						IR_ASSERT(!blocks[pred].loop_header);
+						// blocks[pred].loop_header = 0; /* support for merged loops */
 						ir_worklist_push(&work, pred);
 					} else {
-						/* Otherwise it's a cross-join edge.  See if it's a branch
-						   to an ancestor on the DJ spanning tree.  */
-						if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
-							irreducible = 1;
-							break;
-						}
+						/* Otherwise it's a back-edge of irreducible loop. */
+						irreducible = 1;
+						break;
 					}
 				}
-				p++;
-			} while (--j);
+			}

 			if (UNEXPECTED(irreducible)) {
-				bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP;
+				bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
 				ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG;
-				/* Remember the position of the first irreducible loop to process all the irreducible loops
-				 * after the reducible loops with the same dominator tree depth
+				/* Delay processing of all irreducible loops
+				 * after all reducible loops with the same dominator tree depth
 				 */
-				if (!prev_irreducible) {
-					prev_irreducible = n;
-					prev_dom_depth = bb->dom_depth;
+				irreducible_depth = bb->dom_depth;
+				if (!ir_list_capasity(&irreducible_list)) {
+					ir_list_init(&irreducible_list, 16);
 				}
+				ir_list_push(&irreducible_list, b);
 				ir_list_clear(&work.l);
 			} else if (ir_worklist_len(&work)) {
 				/* collect members of the reducible loop */
@@ -1178,35 +1208,47 @@ int ir_find_loops(ir_ctx *ctx)
 				if (ctx->ir_base[bb->start].op == IR_MERGE) {
 					ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
 				}
+				ir_bitset_incl(work.visited, hdr);
 				while (ir_worklist_len(&work)) {
 					b = ir_worklist_pop(&work);
 					if (b != hdr) {
 						ir_block *bb = &blocks[b];
+
+						IR_ASSERT(!bb->loop_header);
 						bb->loop_header = hdr;
-						if (bb->predecessors_count) {
-							uint32_t *p = &edges[bb->predecessors];
-							uint32_t n = bb->predecessors_count;
-							do {
-								uint32_t pred = *p;
-								while (blocks[pred].loop_header > 0) {
-									pred = blocks[pred].loop_header;
-								}
-								if (pred != hdr) {
+
+						uint32_t *p = &edges[bb->predecessors];
+						uint32_t n = bb->predecessors_count;
+						for (; n > 0; p++, n--) {
+							uint32_t pred = *p;
+							if (!ir_bitset_in(work.visited, pred)) {
+								if (blocks[pred].loop_header) {
+									if (blocks[pred].loop_header == b) continue;
+									do {
+										pred = blocks[pred].loop_header;
+									} while (blocks[pred].loop_header > 0);
 									ir_worklist_push(&work, pred);
+								} else {
+									ir_bitset_incl(work.visited, pred);
+									ir_list_push_unchecked(&work.l, pred);
 								}
-								p++;
-							} while (--n);
+							}
 						}
 					}
 				}
 			}
 		}
 	}
-	IR_ASSERT(!prev_irreducible);
+
+	IR_ASSERT(!irreducible_depth);
+	if (ir_list_capasity(&irreducible_list)) {
+		ir_list_free(&irreducible_list);
+	}

 	if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
-		for (n = 1; n < count; n++) {
-			b = sorted_blocks[n];
+		n = ctx->cfg_blocks_count + 1;
+		for (j = 1; j < n; j++) {
+			b = sorted_blocks[j];
 			ir_block *bb = &blocks[b];
 			if (bb->loop_header > 0) {
 				ir_block *loop = &blocks[bb->loop_header];
@@ -1237,7 +1279,7 @@ int ir_find_loops(ir_ctx *ctx)
 		}
 	}

-	ir_mem_free(entry_times);
+	ir_mem_free(times);
 	ir_worklist_free(&work);

 	return 1;
diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c
index 1cadb099bce..92b66eb0358 100644
--- a/ext/opcache/jit/ir/ir_emit.c
+++ b/ext/opcache/jit/ir/ir_emit.c
@@ -5,6 +5,10 @@
  * Authors: Dmitry Stogov <dmitry@php.net>
  */

+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
 #include "ir.h"

 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index c644c188dca..7edb012f617 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -842,6 +842,7 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
 	xlat = ir_mem_malloc(count * sizeof(uint32_t));
 	ir_worklist_init(&worklist, count);
 	ir_worklist_push(&worklist, 1);
+	/* Schedule blocks bottom-up. Place block only after all its successurs (except back-edges) are placed. */
 	while (ir_worklist_len(&worklist) != 0) {
 next:
 		b = ir_worklist_peek(&worklist);
@@ -849,7 +850,14 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
 		n = bb->successors_count;
 		if (n == 1) {
 			succ = ctx->cfg_edges[bb->successors];
-			if (ir_worklist_push(&worklist, succ)) {
+			if (ir_bitset_in(worklist.visited, succ)) {
+				/* already processed */
+			} else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
+					&& ((ctx->cfg_blocks[b].flags & IR_BB_LOOP_HEADER) ?
+						(ctx->cfg_blocks[succ].loop_header != b) :
+						(ctx->cfg_blocks[succ].loop_header != ctx->cfg_blocks[b].loop_header))) {
+				/* "side" entry of irreducible loop (ignore) */
+			} else if (ir_worklist_push(&worklist, succ)) {
 				goto next;
 			}
 		} else if (n > 1) {
@@ -862,12 +870,11 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
 				succ = *q;
 				if (ir_bitset_in(worklist.visited, succ)) {
 					/* already processed */
-				} else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)
-				  && (succ == b || ctx->cfg_blocks[b].loop_header == succ)) {
-					/* back-edge of reducible loop */
 				} else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
-				  && (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) {
-					/* closing edge of irreducible loop */
+						&& ((ctx->cfg_blocks[b].flags & IR_BB_LOOP_HEADER) ?
+							(ctx->cfg_blocks[succ].loop_header != b) :
+							(ctx->cfg_blocks[succ].loop_header != ctx->cfg_blocks[b].loop_header))) {
+					/* "side" entry of irreducible loop (ignore) */
 				} else if (!best) {
 					best = succ;
 					best_loop_depth = ctx->cfg_blocks[best].loop_depth;
@@ -883,6 +890,8 @@ static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *
 				goto next;
 			}
 		}
+
+		/* All successors of "b" are placed. Now we can place "b" itself. */
 		ir_worklist_pop(&worklist);
 		count--;
 		new_blocks[count] = *bb;
diff --git a/ext/opcache/jit/ir/ir_gdb.c b/ext/opcache/jit/ir/ir_gdb.c
index ecaf880301e..8b5fba6b153 100644
--- a/ext/opcache/jit/ir/ir_gdb.c
+++ b/ext/opcache/jit/ir/ir_gdb.c
@@ -7,6 +7,10 @@
  * Based on Mike Pall's implementation of GDB interface for LuaJIT.
  */

+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
 #include <stddef.h>
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/ext/opcache/jit/ir/ir_perf.c b/ext/opcache/jit/ir/ir_perf.c
index dbb689b091f..e5a5e593740 100644
--- a/ext/opcache/jit/ir/ir_perf.c
+++ b/ext/opcache/jit/ir/ir_perf.c
@@ -14,6 +14,10 @@
  * perf report -i perf.data.jitted
  */

+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
 #include <stdio.h>
 #include <unistd.h>
 #include <time.h>
diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c
index bfec32b568f..6478ec69756 100644
--- a/ext/opcache/jit/ir/ir_sccp.c
+++ b/ext/opcache/jit/ir/ir_sccp.c
@@ -880,7 +880,7 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref
 		*p = IR_UNUSED;
 		/* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
 		if (input > 0 && _values[input].op > IR_COPY) {
-			ir_use_list_remove_all(ctx, input, ref);
+			ir_use_list_remove_one(ctx, input, ref);
 			if (ir_is_dead(ctx, input)) {
 				/* schedule DCE */
 				ir_bitqueue_add(worklist, input);
@@ -918,7 +918,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref
 		*p = IR_UNUSED;
 		/* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
 		if (input > 0 && _values[input].op > IR_COPY) {
-			ir_use_list_remove_all(ctx, input, ref);
+			ir_use_list_remove_one(ctx, input, ref);
 			if (ir_is_dead(ctx, input)) {
 				/* schedule DCE */
 				ir_bitqueue_add(worklist, input);
@@ -1233,7 +1233,7 @@ static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
 		ir_ref input = *p;
 		*p = IR_UNUSED;
 		if (input > 0) {
-			ir_use_list_remove_all(ctx, input, ref);
+			ir_use_list_remove_one(ctx, input, ref);
 			if (ir_is_dead(ctx, input)) {
 				/* schedule DCE */
 				ir_bitqueue_add(worklist, input);
@@ -1301,7 +1301,7 @@ static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bit
 		ir_ref input = *p;
 		*p = IR_UNUSED;
 		if (input > 0) {
-			ir_use_list_remove_all(ctx, input, ref);
+			ir_use_list_remove_one(ctx, input, ref);
 			if (ir_is_dead(ctx, input)) {
 				/* schedule DCE */
 				ir_bitqueue_add(worklist, input);
@@ -2427,7 +2427,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
 		next->op1 = root->op1;
 		ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
 		if (!IR_IS_CONST_REF(root->op2)) {
-			ir_use_list_remove_all(ctx, root->op2, root_ref);
+			ir_use_list_remove_one(ctx, root->op2, root_ref);
 			if (ir_is_dead(ctx, root->op2)) {
 				ir_bitqueue_add(worklist, root->op2);
 			}
@@ -2485,7 +2485,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
 		ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);

 		if (!IR_IS_CONST_REF(root->op2)) {
-			ir_use_list_remove_all(ctx, root->op2, root_ref);
+			ir_use_list_remove_one(ctx, root->op2, root_ref);
 			if (ir_is_dead(ctx, root->op2)) {
 				ir_bitqueue_add(worklist, root->op2);
 			}
@@ -2612,10 +2612,10 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					next->op1 = root->op1;
 					ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
 					if (!IR_IS_CONST_REF(insn->op1)) {
-						ir_use_list_remove_all(ctx, insn->op1, cond_ref);
+						ir_use_list_remove_one(ctx, insn->op1, cond_ref);
 					}
 					if (!IR_IS_CONST_REF(insn->op2)) {
-						ir_use_list_remove_all(ctx, insn->op2, cond_ref);
+						ir_use_list_remove_one(ctx, insn->op2, cond_ref);
 					}

 					if (ctx->use_lists[cond_ref].count == 1) {
@@ -2705,7 +2705,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
 					ir_use_list_remove_one(ctx, insn->op1, neg_ref);
 					if (!IR_IS_CONST_REF(insn->op1)) {
-						ir_use_list_remove_all(ctx, insn->op1, cond_ref);
+						ir_use_list_remove_one(ctx, insn->op1, cond_ref);
 					}

 					if (ctx->use_lists[cond_ref].count == 1) {
@@ -2771,7 +2771,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
 					next->op1 = root->op1;
 					ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
 					ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
-					ir_use_list_remove_all(ctx, root->op2, root_ref);
+					ir_use_list_remove_one(ctx, root->op2, root_ref);

 					MAKE_NOP(root);   CLEAR_USES(root_ref);
 					MAKE_NOP(start1); CLEAR_USES(start1_ref);
@@ -3035,8 +3035,8 @@ static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue
 				 *    IF_FALSE  |              MERGE
 				 *    |                        |
 				 */
-				ir_use_list_remove_all(ctx, merge_ref, cond_ref);
-				ir_use_list_remove_all(ctx, ref, if_true_ref);
+				ir_use_list_remove_one(ctx, merge_ref, cond_ref);
+				ir_use_list_remove_one(ctx, ref, if_true_ref);
 				if (!IR_IS_CONST_REF(cond->op3)) {
 					ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
 				}
@@ -3230,8 +3230,8 @@ static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu
 						 *    |                        |
 						 */

-						ir_use_list_remove_all(ctx, merge_ref, phi_ref);
-						ir_use_list_remove_all(ctx, ref, if_true_ref);
+						ir_use_list_remove_one(ctx, merge_ref, phi_ref);
+						ir_use_list_remove_one(ctx, ref, if_true_ref);
 						if (!IR_IS_CONST_REF(phi->op3)) {
 							ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
 						}
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 9b369fadbcc..049c341cc8f 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -12306,7 +12306,7 @@ next_block:;

 		do {
 			/* _cldemote(p); */
-			asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
+			__asm__ volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
 			p += 64;
 		} while (p < start + size);
 	}