Commit 098b1f89bd7 for php.net

commit 098b1f89bd76e4bd657243c77029fe395834ed93
Author: Dmitry Stogov <dmitry@php.net>
Date:   Mon Jan 12 21:23:38 2026 +0300

    Update IR (#20916)

    IR commit: 40cd6ad28c376cf006c360f39d8aeff6d6e7bf78

diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h
index a9665059705..a274ceb5b16 100644
--- a/ext/opcache/jit/ir/ir.h
+++ b/ext/opcache/jit/ir/ir.h
@@ -539,38 +539,38 @@ void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func);
 void ir_strtab_free(ir_strtab *strtab);

 /* IR Context Flags */
-#define IR_FUNCTION            (1<<0) /* Generate a function. */
-#define IR_FASTCALL_FUNC       (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */
-#define IR_VARARG_FUNC         (1<<2)
-#define IR_BUILTIN_FUNC        (1<<3)
-#define IR_STATIC              (1<<4)
-#define IR_EXTERN              (1<<5)
-#define IR_CONST               (1<<6)
-
-#define IR_CONST_FUNC          (1<<6)
-#define IR_PURE_FUNC           (1<<7)
-
-#define IR_INITIALIZED         (1<<7) /* sym data flag: constant or an initialized variable */
-#define IR_CONST_STRING        (1<<8) /* sym data flag: constant string */
-
-#define IR_SKIP_PROLOGUE       (1<<8) /* Don't generate function prologue. */
-#define IR_USE_FRAME_POINTER   (1<<9)
-#define IR_PREALLOCATED_STACK  (1<<10)
-#define IR_NO_STACK_COMBINE    (1<<11)
-#define IR_START_BR_TARGET     (1<<12)
-#define IR_ENTRY_BR_TARGET     (1<<13)
-#define IR_GEN_ENDBR           (1<<14)
-#define IR_MERGE_EMPTY_ENTRIES (1<<15)
-
-#define IR_OPT_INLINE          (1<<16)
-#define IR_OPT_FOLDING         (1<<17)
-#define IR_OPT_CFG             (1<<18) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
-#define IR_OPT_MEM2SSA         (1<<19)
-#define IR_OPT_CODEGEN         (1<<20)
-#define IR_GEN_NATIVE          (1<<21)
-#define IR_GEN_CODE            (1<<22) /* C or LLVM */
-
-#define IR_GEN_CACHE_DEMOTE    (1<<23) /* Demote the generated code from closest CPU caches */
+#define IR_PROTO_MASK          0xff
+#define IR_CALL_CONV_MASK      0x0f
+
+#define IR_VARARG_FUNC         (1<<4)
+#define IR_CONST_FUNC          (1<<5)
+#define IR_PURE_FUNC           (1<<6)
+
+#define IR_CONST               (1<<5)
+#define IR_INITIALIZED         (1<<6) /* sym data flag: constant or an initialized variable */
+#define IR_CONST_STRING        (1<<7) /* sym data flag: constant string */
+
+#define IR_FUNCTION            (1<<8) /* Generate a function. */
+#define IR_STATIC              (1<<9)
+#define IR_EXTERN              (1<<10)
+
+#define IR_USE_FRAME_POINTER   (1<<11)
+#define IR_NO_STACK_COMBINE    (1<<12)
+#define IR_GEN_ENDBR           (1<<13)
+#define IR_GEN_CACHE_DEMOTE    (1<<14) /* Demote the generated code from closest CPU caches */
+
+#define IR_SKIP_PROLOGUE       (1<<15) /* Don't generate function prologue. */
+#define IR_START_BR_TARGET     (1<<16)
+#define IR_ENTRY_BR_TARGET     (1<<17)
+#define IR_MERGE_EMPTY_ENTRIES (1<<18)
+
+#define IR_OPT_INLINE          (1<<19)
+#define IR_OPT_FOLDING         (1<<20)
+#define IR_OPT_CFG             (1<<21) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
+#define IR_OPT_MEM2SSA         (1<<22)
+#define IR_OPT_CODEGEN         (1<<23)
+#define IR_GEN_NATIVE          (1<<24)
+#define IR_GEN_CODE            (1<<25)

 /* debug related */
 #ifdef IR_DEBUG
@@ -582,6 +582,24 @@ void ir_strtab_free(ir_strtab *strtab);
 # define IR_DEBUG_BB_SCHEDULE  (1U<<31)
 #endif

+/* Calling Conventions */
+#define IR_CC_DEFAULT          0x00
+#define IR_CC_BUILTIN          0x01
+#define IR_CC_FASTCALL         0x02
+#define	IR_CC_PRESERVE_NONE    0x03
+
+#if defined(IR_TARGET_X64)
+# define IR_CC_X86_64_SYSV     0x08
+# define IR_CC_X86_64_MS       0x09
+#elif defined(IR_TARGET_AARCH64)
+# define IR_CC_AARCH64_SYSV    0x08
+# define IR_CC_AARCH64_DARWIN  0x09
+#endif
+
+/* Deprecated constants */
+#define IR_BUILTIN_FUNC        IR_CC_BUILTIN
+#define IR_FASTCALL_FUNC       IR_CC_FASTCALL
+
 typedef struct _ir_ctx           ir_ctx;
 typedef struct _ir_use_list      ir_use_list;
 typedef struct _ir_block         ir_block;
@@ -728,7 +746,7 @@ const char *ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len);
 #define IR_MAX_PROTO_PARAMS 255

 typedef struct _ir_proto_t {
-	uint8_t flags;
+	uint8_t flags; /* first 8 bits of ir_ctx.flags */
 	uint8_t ret_type;
 	uint8_t params_count;
 	uint8_t param_types[5];
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index b553243309f..88996cb6f98 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -213,14 +213,21 @@ static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size)
 |.endmacro

 typedef struct _ir_backend_data {
-    ir_reg_alloc_data  ra_data;
-	uint32_t           dessa_from_block;
+	ir_reg_alloc_data  ra_data;
 	dasm_State        *dasm_state;
 	ir_bitset          emit_constants;
 	int                rodata_label, jmp_table_label;
 	bool               resolved_label_syms;
 } ir_backend_data;

+typedef struct _ir_aarch64_sysv_va_list {
+	void    *stack;
+	void    *gr_top;
+	void    *vr_top;
+	int32_t  gr_offset;
+	int32_t  vr_offset;
+} ir_aarch64_sysv_va_list;
+
 #define IR_GP_REG_NAME(code, name64, name32) \
 	#name64,
 #define IR_GP_REG_NAME32(code, name64, name32) \
@@ -230,9 +237,11 @@ typedef struct _ir_backend_data {
 #define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \
 	#name32,

-static const char *_ir_reg_name[IR_REG_NUM] = {
+static const char *_ir_reg_name[] = {
 	IR_GP_REGS(IR_GP_REG_NAME)
 	IR_FP_REGS(IR_FP_REG_NAME)
+	"ALL",
+	"SCRATCH",
 };

 static const char *_ir_reg_name32[IR_REG_NUM] = {
@@ -240,38 +249,11 @@ static const char *_ir_reg_name32[IR_REG_NUM] = {
 	IR_FP_REGS(IR_FP_REG_NAME32)
 };

-/* Calling Convention */
-static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
-	IR_REG_INT_ARG1,
-	IR_REG_INT_ARG2,
-	IR_REG_INT_ARG3,
-	IR_REG_INT_ARG4,
-	IR_REG_INT_ARG5,
-	IR_REG_INT_ARG6,
-	IR_REG_INT_ARG7,
-	IR_REG_INT_ARG8,
-};
-
-static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
-	IR_REG_FP_ARG1,
-	IR_REG_FP_ARG2,
-	IR_REG_FP_ARG3,
-	IR_REG_FP_ARG4,
-	IR_REG_FP_ARG5,
-	IR_REG_FP_ARG6,
-	IR_REG_FP_ARG7,
-	IR_REG_FP_ARG8,
-};
-
 const char *ir_reg_name(int8_t reg, ir_type type)
 {
 	if (reg >= IR_REG_NUM) {
-		if (reg == IR_REG_SCRATCH) {
-			return "SCRATCH";
-		} else {
-			IR_ASSERT(reg == IR_REG_ALL);
-			return "ALL";
-		}
+		IR_ASSERT((uint8_t)reg < sizeof(_ir_reg_name) / sizeof(_ir_reg_name[0]));
+		return _ir_reg_name[reg];
 	}
 	IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
 	if (type == IR_VOID) {
@@ -284,6 +266,82 @@ const char *ir_reg_name(int8_t reg, ir_type type)
 	}
 }

+/* Calling Conventions */
+#define IR_REG_SCRATCH_AARCH64         IR_REG_SET_1
+
+#define IR_REGSET_SCRATCH_AARCH64 \
+	(IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) | \
+	 IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) | \
+	 IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31))
+
+const ir_regset ir_scratch_regset[] = {
+	IR_REGSET_GP | IR_REGSET_FP,
+	IR_REGSET_SCRATCH_AARCH64,
+};
+
+const ir_call_conv_dsc ir_call_conv_aarch64_sysv = {
+	0,           /* cleanup_stack_by_callee */
+	0,           /* pass_struct_by_val      */
+	1,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	8,           /* int_param_regs_count    */
+	8,           /* fp_param_regs_count     */
+	IR_REG_X0 ,  /* int_ret_reg             */
+	IR_REG_V0,   /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_SCRATCH_AARCH64,
+	(const int8_t[8]){IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7},
+	(const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7},
+	IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15),
+
+};
+
+const ir_call_conv_dsc ir_call_conv_aarch64_darwin = {
+	0,           /* cleanup_stack_by_callee */
+	0,           /* pass_struct_by_val      */
+	0,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	8,           /* int_param_regs_count    */
+	8,           /* fp_param_regs_count     */
+	IR_REG_X0 ,  /* int_ret_reg             */
+	IR_REG_V0,   /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_SCRATCH_AARCH64,
+	(const int8_t[8]){IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7},
+	(const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7},
+	IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15),
+
+};
+
+const ir_call_conv_dsc ir_call_conv_aarch64_preserve_none = {
+	0,           /* cleanup_stack_by_callee */
+	0,           /* pass_struct_by_val      */
+	1,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	23,          /* int_param_regs_count    */
+	8,           /* fp_param_regs_count     */
+	IR_REG_X0 ,  /* int_ret_reg             */
+	IR_REG_V0,   /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_ALL,
+	(const int8_t[23]){IR_REG_X20, IR_REG_X21, IR_REG_X22, IR_REG_X23, IR_REG_X24, IR_REG_X25, IR_REG_X26, IR_REG_X27,
+	                   IR_REG_X28,
+	                   IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7,
+	                   IR_REG_X10, IR_REG_X11, IR_REG_X12, IR_REG_X13, IR_REG_X14, IR_REG_X9},
+	(const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7},
+	IR_REGSET_EMPTY,
+
+};
+
+#ifdef __APPLE__
+# define ir_call_conv_default ir_call_conv_aarch64_darwin
+#else
+# define ir_call_conv_default ir_call_conv_aarch64_sysv
+#endif
+
 #define IR_RULES(_)        \
 	_(CMP_INT)             \
 	_(CMP_FP)              \
@@ -342,6 +400,8 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 	const ir_insn *insn;
 	int n = 0;
 	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
+	const ir_proto_t *proto;
+	const ir_call_conv_dsc *cc;

 	constraints->def_reg = IR_REG_NONE;
 	constraints->hints_count = 0;
@@ -584,20 +644,33 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 			n++;
 			break;
 		case IR_ARGVAL:
-			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
+			/* memcpy() clobbers all scratch registers */
+			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH_AARCH64, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
 			n = 1;
 			break;
 		case IR_CALL:
 			insn = &ctx->ir_base[ref];
-			constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1;
-			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
+			proto = ir_call_proto(ctx, insn);
+			cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+			if (insn->type != IR_VOID) {
+				constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ?
+					cc->int_ret_reg : cc->fp_ret_reg;
+			}
+			constraints->tmp_regs[0] = IR_SCRATCH_REG(cc->scratch_reg, IR_USE_SUB_REF, IR_DEF_SUB_REF);
 			n = 1;
-			IR_FALLTHROUGH;
+			if (insn->inputs_count > 2) {
+				goto get_arg_hints;
+			}
+			flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
+			break;
 		case IR_TAILCALL:
 			insn = &ctx->ir_base[ref];
 			if (insn->inputs_count > 2) {
+				proto = ir_call_proto(ctx, insn);
+				cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+get_arg_hints:
 				constraints->hints[2] = IR_REG_NONE;
-				constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints);
+				constraints->hints_count = ir_get_args_regs(ctx, insn, cc, constraints->hints);
 				if (!IR_IS_CONST_REF(insn->op2)) {
 					constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
 					n++;
@@ -658,19 +731,22 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 			flags = IR_USE_SHOULD_BE_IN_REG;
 			break;
 		case IR_EXITCALL:
-			constraints->def_reg = IR_REG_INT_RET1;
+			cc = ir_get_call_conv_dsc(ctx->flags);
+			constraints->def_reg = cc->int_ret_reg;
 			break;
 		case IR_RSTORE:
 			flags = IR_OP3_SHOULD_BE_IN_REG;
 			break;
 		case IR_RETURN_INT:
+			cc = ir_get_call_conv_dsc(ctx->flags);
 			flags = IR_OP2_SHOULD_BE_IN_REG;
-			constraints->hints[2] = IR_REG_INT_RET1;
+			constraints->hints[2] = cc->int_ret_reg;
 			constraints->hints_count = 3;
 			break;
 		case IR_RETURN_FP:
+			cc = ir_get_call_conv_dsc(ctx->flags);
 			flags = IR_OP2_SHOULD_BE_IN_REG;
-			constraints->hints[2] = IR_REG_FP_RET1;
+			constraints->hints[2] = cc->fp_ret_reg;
 			constraints->hints_count = 3;
 			break;
 		case IR_SNAPSHOT:
@@ -1798,72 +1874,73 @@ static void ir_emit_prologue(ir_ctx *ctx)
 			}
 		}
 	}
+
 	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
-#ifndef __APPLE__
-		const int8_t *int_reg_params = _ir_int_reg_params;
-		const int8_t *fp_reg_params = _ir_fp_reg_params;
-		ir_reg fp;
-		int offset;
-		int i;
+		const ir_call_conv_dsc *cc = data->ra_data.cc;

-		if (ctx->flags & IR_USE_FRAME_POINTER) {
-			fp = IR_REG_FRAME_POINTER;
+		if (cc->sysv_varargs) {
+			ir_reg fp;
+			int offset;
+			int i;

-			offset = ctx->locals_area_size + sizeof(void*) * 2;
-		} else {
-			fp = IR_REG_STACK_POINTER;
-			offset = ctx->locals_area_size + ctx->call_stack_size;
-		}
+			if (ctx->flags & IR_USE_FRAME_POINTER) {
+				fp = IR_REG_FRAME_POINTER;

-		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-			ir_reg prev = IR_REG_NONE;
+				offset = ctx->locals_area_size + sizeof(void*) * 2;
+			} else {
+				fp = IR_REG_STACK_POINTER;
+				offset = ctx->locals_area_size + ctx->call_stack_size;
+			}
+
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
+				ir_reg prev = IR_REG_NONE;

-			/* skip named args */
-			offset += sizeof(void*) * ctx->gp_reg_params;
-			for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
+				/* skip named args */
+				offset += sizeof(void*) * ctx->gp_reg_params;
+				for (i = ctx->gp_reg_params; i < cc->int_param_regs_count; i++) {
+					if (prev != IR_REG_NONE) {
+						if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
+							|	stp Rx(prev), Rx(cc->int_param_regs[i]), [Rx(fp), #offset]
+						} else if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
+							|	str Rx(prev), [Rx(fp), #offset]
+							|	str Rx(cc->int_param_regs[i]), [Rx(fp), #(offset+8)]
+						} else {
+							ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+							|	str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
+							|	add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8
+							|	str Rx(cc->int_param_regs[i]), [Rx(fp), Rx(IR_REG_INT_TMP)]
+						}
+						prev = IR_REG_NONE;
+						offset += sizeof(void*) * 2;
+					} else {
+						prev = cc->int_param_regs[i];
+					}
+				}
 				if (prev != IR_REG_NONE) {
-					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
-						|	stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
-					} else if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
+					if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
 						|	str Rx(prev), [Rx(fp), #offset]
-						|	str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)]
 					} else {
 						ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
 						|	str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
-						|	add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8
-						|	str Rx(int_reg_params[i]), [Rx(fp), Rx(IR_REG_INT_TMP)]
 					}
-					prev = IR_REG_NONE;
-					offset += sizeof(void*) * 2;
-				} else {
-					prev = int_reg_params[i];
+					offset += sizeof(void*);
 				}
 			}
-			if (prev != IR_REG_NONE) {
-				if (aarch64_may_encode_addr_offset(offset + 8, 8)) {
-					|	str Rx(prev), [Rx(fp), #offset]
-				} else {
-					ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
-					|	str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)]
-				}
-				offset += sizeof(void*);
-			}
-		}
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-			/* skip named args */
-			offset += 16 * ctx->fp_reg_params;
-			for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
-				// TODO: Rd->Rq stur->str ???
-				if (aarch64_may_encode_addr_offset(offset, 8)) {
-					|	str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
-				} else {
-					ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
-					|	str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+				/* skip named args */
+				offset += 16 * ctx->fp_reg_params;
+				for (i = ctx->fp_reg_params; i < cc->fp_param_regs_count; i++) {
+					// TODO: Rd->Rq stur->str ???
+					if (aarch64_may_encode_addr_offset(offset, 8)) {
+						|	str Rd(cc->fp_param_regs[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
+					} else {
+						ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
+						|	str Rd(cc->fp_param_regs[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)]
+					}
+					offset += 16;
 				}
-				offset += 16;
 			}
 		}
-#endif
 	}
 }

@@ -3257,10 +3334,6 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint
 				break;
 			case IR_UNORDERED:
 				|	bvs =>true_block
-//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
-//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
-//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
-//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
 		}
 	}
 	if (false_block) {
@@ -3434,15 +3507,17 @@ static void ir_emit_return_void(ir_ctx *ctx)

 static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
 {
+	ir_backend_data *data = ctx->data;
+	ir_reg ret_reg = data->ra_data.cc->int_ret_reg;
 	ir_reg op2_reg = ctx->regs[ref][2];

-	if (op2_reg != IR_REG_INT_RET1) {
+	if (op2_reg != ret_reg) {
 		ir_type type = ctx->ir_base[insn->op2].type;

 		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
-			ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg);
+			ir_emit_mov(ctx, type, ret_reg, op2_reg);
 		} else {
-			ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2);
+			ir_emit_load(ctx, type, ret_reg, insn->op2);
 		}
 	}
 	ir_emit_return_void(ctx);
@@ -3450,14 +3525,16 @@ static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)

 static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
 {
+	ir_backend_data *data = ctx->data;
+	ir_reg ret_reg = data->ra_data.cc->fp_ret_reg;
 	ir_reg op2_reg = ctx->regs[ref][2];
 	ir_type type = ctx->ir_base[insn->op2].type;

-	if (op2_reg != IR_REG_FP_RET1) {
+	if (op2_reg != ret_reg) {
 		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
-			ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg);
+			ir_emit_fp_mov(ctx, type, ret_reg, op2_reg);
 		} else {
-			ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2);
+			ir_emit_load(ctx, type, ret_reg, insn->op2);
 		}
 	}
 	ir_emit_return_void(ctx);
@@ -4461,281 +4538,281 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)

 static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#ifdef __APPLE__
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_reg fp;
-	int arg_area_offset;
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+	if (!cc->sysv_varargs) {
+		ir_reg fp;
+		int arg_area_offset;
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;
+
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}

-	if (ctx->flags & IR_USE_FRAME_POINTER) {
-		fp = IR_REG_FRAME_POINTER;
-		arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
+		if (ctx->flags & IR_USE_FRAME_POINTER) {
+			fp = IR_REG_FRAME_POINTER;
+			arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
+		} else {
+			fp = IR_REG_STACK_POINTER;
+			arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
+		}
+		|	add Rx(tmp_reg), Rx(fp), #arg_area_offset
+		|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
 	} else {
-		fp = IR_REG_STACK_POINTER;
-		arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
-	}
-	|	add Rx(tmp_reg), Rx(fp), #arg_area_offset
-	|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
-#else
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_reg fp;
-	int reg_save_area_offset;
-	int overflow_arg_area_offset;
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;
+		ir_reg fp;
+		int reg_save_area_offset;
+		int overflow_arg_area_offset;
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}

-	if (ctx->flags & IR_USE_FRAME_POINTER) {
-		fp = IR_REG_FRAME_POINTER;
-		reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2;
-		overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
-	} else {
-		fp = IR_REG_STACK_POINTER;
-		reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
-		overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
-	}
-
-	/* Set va_list.stack */
-	|	add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset
-	|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
-	if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-		reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS;
-		/* Set va_list.gr_top */
-		if (overflow_arg_area_offset != reg_save_area_offset) {
-			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
-		}
-		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))]
-		/* Set va_list.gr_offset */
-		|	movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params)))
-		|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
-	} else {
-		/* Set va_list.gr_offset */
-		|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
-	}
-	if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-		reg_save_area_offset += 16 * IR_REG_FP_ARGS;
-		/* Set va_list.vr_top */
-		if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < IR_REG_INT_ARGS) {
-			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
-		}
-		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
-		/* Set va_list.vr_offset */
-		|	movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params)))
-		|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
-	} else {
-		/* Set va_list.vr_offset */
-		|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
+		if (ctx->flags & IR_USE_FRAME_POINTER) {
+			fp = IR_REG_FRAME_POINTER;
+			reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2;
+			overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
+		} else {
+			fp = IR_REG_STACK_POINTER;
+			reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
+			overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
+		}
+
+		/* Set va_list.stack */
+		|	add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset
+		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))]
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
+			reg_save_area_offset += sizeof(void*) * cc->int_param_regs_count;
+			/* Set va_list.gr_top */
+			if (overflow_arg_area_offset != reg_save_area_offset) {
+				|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
+			}
+			|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_top))]
+			/* Set va_list.gr_offset */
+			|	movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (cc->int_param_regs_count - ctx->gp_reg_params)))
+			|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))]
+		} else {
+			/* Set va_list.gr_offset */
+			|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))]
+		}
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+			reg_save_area_offset += 16 * cc->fp_param_regs_count;
+			/* Set va_list.vr_top */
+			if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < cc->int_param_regs_count) {
+				|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
+			}
+			|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_top))]
+			/* Set va_list.vr_offset */
+			|	movn Rw(tmp_reg), #~(0 - (16 * (cc->fp_param_regs_count - ctx->fp_reg_params)))
+			|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))]
+		} else {
+			/* Set va_list.vr_offset */
+			|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))]
+		}
 	}
-#endif
 }

 static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#ifdef __APPLE__
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_reg tmp_reg = ctx->regs[def][1];
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg op3_reg = ctx->regs[def][3];
-	int32_t op2_offset, op3_offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+	if (!cc->sysv_varargs) {
+		ir_reg tmp_reg = ctx->regs[def][1];
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg op3_reg = ctx->regs[def][3];
+		int32_t op2_offset, op3_offset;
+
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			op2_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		op2_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	if (op3_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op3_reg)) {
-			op3_reg = IR_REG_NUM(op3_reg);
-			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+		if (op3_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op3_reg)) {
+				op3_reg = IR_REG_NUM(op3_reg);
+				ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+			}
+			op3_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
+			op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
 		}
-		op3_offset = 0;
+		|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
+		|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
 	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
-		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
-	}
-	|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
-	|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
-#else
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_reg tmp_reg = ctx->regs[def][1];
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg op3_reg = ctx->regs[def][3];
-	int32_t op2_offset, op3_offset;
+		ir_reg tmp_reg = ctx->regs[def][1];
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg op3_reg = ctx->regs[def][3];
+		int32_t op2_offset, op3_offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			op2_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		op2_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
+		if (op3_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op3_reg)) {
+				op3_reg = IR_REG_NUM(op3_reg);
+				ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+			}
+			op3_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
+			op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
+		}
+		|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
+		|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
+		|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)]
+		|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)]
+		|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)]
+		|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)]
+		|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)]
+		|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)]
 	}
-	if (op3_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op3_reg)) {
-			op3_reg = IR_REG_NUM(op3_reg);
-			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
-		}
-		op3_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
-		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
-	}
-	|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
-	|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
-	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)]
-	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)]
-	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)]
-	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)]
-	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)]
-	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)]
-#endif
 }

 static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#ifdef __APPLE__
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_type type = insn->type;
-	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;

-	IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
-		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	|	ldr Rx(tmp_reg), [Rx(op2_reg), #offset]
-	if (def_reg  != IR_REG_NONE) {
-		ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
-	}
-	|	add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
-	|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
-	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
-		ir_emit_store(ctx, type, def, def_reg);
-	}
-#else
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_type type = insn->type;
-	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;
+	if (!cc->sysv_varargs) {
+		ir_type type = insn->type;
+		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;

-	IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+		IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	if (IR_IS_TYPE_INT(type)) {
-		|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
-		|	cmp Rw(tmp_reg), wzr
-		|	bge >1
-		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))]
-		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
-		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
+		|	ldr Rx(tmp_reg), [Rx(op2_reg), #offset]
 		if (def_reg  != IR_REG_NONE) {
-			|	ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
+			ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
 		}
-		|	add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
-		|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
-		|	b >2
-		|1:
-		|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
-		if (def_reg  != IR_REG_NONE) {
-			|	ldr Rx(def_reg), [Rx(tmp_reg)]
+		|	add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
+		|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
+		if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
+			ir_emit_store(ctx, type, def, def_reg);
 		}
-		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
-		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
-		|2:
 	} else {
-		|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
-		|	cmp Rw(tmp_reg), wzr
-		|	bge >1
-		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
-		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
-		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
-		if (def_reg  != IR_REG_NONE) {
-			|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
+		ir_type type = insn->type;
+		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;
+
+		IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		|	add Rw(tmp_reg), Rw(tmp_reg), #16
-		|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
-		|	b >2
-		|1:
-		|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
-		if (def_reg  != IR_REG_NONE) {
-			|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
+		if (IR_IS_TYPE_INT(type)) {
+			|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))]
+			|	cmp Rw(tmp_reg), wzr
+			|	bge >1
+			|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_top))]
+			|	sxtw Rx(tmp_reg), Rw(tmp_reg)
+			|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
+			if (def_reg  != IR_REG_NONE) {
+				|	ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
+			}
+			|	add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
+			|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))]
+			|	b >2
+			|1:
+			|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))]
+			if (def_reg  != IR_REG_NONE) {
+				|	ldr Rx(def_reg), [Rx(tmp_reg)]
+			}
+			|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
+			|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))]
+			|2:
+		} else {
+			|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))]
+			|	cmp Rw(tmp_reg), wzr
+			|	bge >1
+			|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_top))]
+			|	sxtw Rx(tmp_reg), Rw(tmp_reg)
+			|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
+			if (def_reg  != IR_REG_NONE) {
+				|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
+			}
+			|	add Rw(tmp_reg), Rw(tmp_reg), #16
+			|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))]
+			|	b >2
+			|1:
+			|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))]
+			if (def_reg  != IR_REG_NONE) {
+				|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
+			}
+			|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
+			|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))]
+			|2:
+		}
+		if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
+			ir_emit_store(ctx, type, def, def_reg);
 		}
-		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
-		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
-		|2:
 	}
-	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
-		ir_emit_store(ctx, type, def, def_reg);
-	}
-#endif
 }

 static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
@@ -4958,19 +5035,23 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 	}
 }

-static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stack_ptr)
+static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, const ir_call_conv_dsc *cc, int32_t *copy_stack_ptr)
 {
 	int j, n;
 	ir_type type;
 	int int_param = 0;
 	int fp_param = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
 	int32_t used_stack = 0, copy_stack = 0;
-#ifdef __APPLE__
-	const ir_proto_t *proto = ir_call_proto(ctx, insn);
-	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
-#endif
+
+	/* On APPLE "unnamed" arguments always passed through stack */
+	int last_named_input;
+
+	if (!cc->sysv_varargs) {
+		const ir_proto_t *proto = ir_call_proto(ctx, insn);
+		last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
+	} else {
+		last_named_input = insn->inputs_count;
+	}

 	n = insn->inputs_count;
 	for (j = 3; j <= n; j++) {
@@ -4984,19 +5065,16 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stac
 			copy_stack = IR_ALIGNED_SIZE(copy_stack, align);
 			type = IR_ADDR;
 		}
-#ifdef __APPLE__
 		if (j > last_named_input) {
 			used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
-		} else
-#endif
-		if (IR_IS_TYPE_INT(type)) {
-			if (int_param >= int_reg_params_count) {
+		} else if (IR_IS_TYPE_INT(type)) {
+			if (int_param >= cc->int_param_regs_count) {
 				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
 			}
 			int_param++;
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(type));
-			if (fp_param >= fp_reg_params_count) {
+			if (fp_param >= cc->fp_param_regs_count) {
 				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
 			}
 			fp_param++;
@@ -5008,7 +5086,7 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stac
 	return used_stack + copy_stack;
 }

-static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
+static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_call_conv_dsc *cc, ir_reg tmp_reg)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
@@ -5020,10 +5098,6 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 	int int_param = 0;
 	int fp_param = 0;
 	int count = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
 	int32_t used_stack, copy_stack = 0, stack_offset = 0, copy_stack_offset = 0;
 	ir_copy *copies;
 	bool do_pass3 = 0;
@@ -5043,7 +5117,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		// TODO: support for preallocated stack
 		used_stack = 0;
 	} else {
-		used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
+		used_stack = ir_call_used_stack(ctx, insn, cc, &copy_stack);
 		/* Stack must be 16 byte aligned */
 		used_stack = IR_ALIGNED_SIZE(used_stack, 16);
 		if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) {
@@ -5061,10 +5135,15 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		}
 	}

-#ifdef __APPLE__
-	const ir_proto_t *proto = ir_call_proto(ctx, insn);
-	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
-#endif
+	/* On APPLE "unnamed" arguments always passed through stack */
+	int last_named_input;
+
+	if (!cc->sysv_varargs) {
+		const ir_proto_t *proto = ir_call_proto(ctx, insn);
+		last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
+	} else {
+		last_named_input = insn->inputs_count;
+	}

 	if (copy_stack) {
 		/* Copy struct arguments */
@@ -5085,17 +5164,17 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 				copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
 				src_reg = ctx->regs[arg][1];

-				|	add Rx(IR_REG_INT_ARG1), sp, #(used_stack - copy_stack_offset)
+				|	add Rx(ir_call_conv_default.int_param_regs[0]), sp, #(used_stack - copy_stack_offset)
 				if (src_reg != IR_REG_NONE) {
 					if (IR_REG_SPILLED(src_reg)) {
 						src_reg = IR_REG_NUM(src_reg);
 						ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1);
 					}
-					|	mov Rx(IR_REG_INT_ARG2), Rx(src_reg)
+					|	mov Rx(ir_call_conv_default.int_param_regs[1]), Rx(src_reg)
 				} else {
-					ir_emit_load(ctx, IR_ADDR, IR_REG_INT_ARG2, arg_insn->op1);
+					ir_emit_load(ctx, IR_ADDR, ir_call_conv_default.int_param_regs[1], arg_insn->op1);
 				}
-				ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_ARG3, size);
+				ir_emit_load_imm_int(ctx, IR_ADDR, ir_call_conv_default.int_param_regs[2], size);

 				if (aarch64_may_use_b(ctx->code_buffer, addr)) {
 					|	bl &addr
@@ -5117,18 +5196,15 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		arg_insn = &ctx->ir_base[arg];
 		type = arg_insn->type;

-#ifdef __APPLE__
 		if (j > last_named_input) {
 			if (arg_insn->op == IR_ARGVAL) {
 				do_pass3 = 1;
 				continue;
 			}
 			dst_reg = IR_REG_NONE; /* pass argument through stack */
-		} else
-#endif
-		if (IR_IS_TYPE_INT(type)) {
-			if (int_param < int_reg_params_count) {
-				dst_reg = int_reg_params[int_param];
+		} else if (IR_IS_TYPE_INT(type)) {
+			if (int_param < cc->int_param_regs_count) {
+				dst_reg = cc->int_param_regs[int_param];
 			} else {
 				dst_reg = IR_REG_NONE; /* pass argument through stack */
 			}
@@ -5139,8 +5215,8 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 			}
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(type));
-			if (fp_param < fp_reg_params_count) {
-				dst_reg = fp_reg_params[fp_param];
+			if (fp_param < cc->fp_param_regs_count) {
+				dst_reg = cc->fp_param_regs[fp_param];
 			} else {
 				dst_reg = IR_REG_NONE; /* pass argument through stack */
 			}
@@ -5149,7 +5225,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		if (dst_reg != IR_REG_NONE) {
 			if (IR_IS_CONST_REF(arg) ||
 			    src_reg == IR_REG_NONE ||
-			    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) {
+			    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) {
 				/* delay CONST->REG and MEM->REG moves to third pass */
 				do_pass3 = 1;
 			} else {
@@ -5201,14 +5277,11 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 				copy_stack_offset += size;
 				align = IR_MAX((int)sizeof(void*), align);
 				copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
-#ifdef __APPLE__
 				if (j > last_named_input) {
 					|	add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset)
 					ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
-				} else
-#endif
-				if (int_param < int_reg_params_count) {
-					dst_reg = int_reg_params[int_param];
+				} else if (int_param < cc->int_param_regs_count) {
+					dst_reg = cc->int_param_regs[int_param];
 					|	add Rx(dst_reg), sp, #(used_stack - copy_stack_offset)
 				} else {
 					|	add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset)
@@ -5218,22 +5291,19 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 				int_param++;
 				continue;
 			}
-#ifdef __APPLE__
 			if (j > last_named_input) {
 				dst_reg = IR_REG_NONE; /* pass argument through stack */
-			} else
-#endif
-			if (IR_IS_TYPE_INT(type)) {
-				if (int_param < int_reg_params_count) {
-					dst_reg = int_reg_params[int_param];
+			} else if (IR_IS_TYPE_INT(type)) {
+				if (int_param < cc->int_param_regs_count) {
+					dst_reg = cc->int_param_regs[int_param];
 				} else {
 					dst_reg = IR_REG_NONE; /* argument already passed through stack */
 				}
 				int_param++;
 			} else {
 				IR_ASSERT(IR_IS_TYPE_FP(type));
-				if (fp_param < fp_reg_params_count) {
-					dst_reg = fp_reg_params[fp_param];
+				if (fp_param < cc->fp_param_regs_count) {
+					dst_reg = cc->fp_param_regs[fp_param];
 				} else {
 					dst_reg = IR_REG_NONE; /* argument already passed through stack */
 				}
@@ -5242,7 +5312,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 			if (dst_reg != IR_REG_NONE) {
 				if (IR_IS_CONST_REF(arg) ||
 				    src_reg == IR_REG_NONE ||
-				    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) {
+				    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) {
 					if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) {
 						if (ir_type_size[type] == 1) {
 							type = IR_ADDR;
@@ -5282,7 +5352,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 	return used_stack;
 }

-static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack)
+static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_call_conv_dsc *cc, int32_t used_stack)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
@@ -5317,27 +5387,27 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used
 		if (IR_IS_TYPE_INT(insn->type)) {
 			def_reg = IR_REG_NUM(ctx->regs[def][0]);
 			if (def_reg != IR_REG_NONE) {
-				if (def_reg != IR_REG_INT_RET1) {
-					ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
+				if (def_reg != cc->int_ret_reg) {
+					ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg);
 				}
 				if (IR_REG_SPILLED(ctx->regs[def][0])) {
 					ir_emit_store(ctx, insn->type, def, def_reg);
 				}
 			} else if (ctx->use_lists[def].count > 1) {
-				ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1);
+				ir_emit_store(ctx, insn->type, def, cc->int_ret_reg);
 			}
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(insn->type));
 			def_reg = IR_REG_NUM(ctx->regs[def][0]);
 			if (def_reg != IR_REG_NONE) {
-				if (def_reg != IR_REG_FP_RET1) {
-					ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1);
+				if (def_reg != cc->fp_ret_reg) {
+					ir_emit_fp_mov(ctx, insn->type, def_reg, cc->fp_ret_reg);
 				}
 				if (IR_REG_SPILLED(ctx->regs[def][0])) {
 					ir_emit_store(ctx, insn->type, def, def_reg);
 				}
 			} else if (ctx->use_lists[def].count > 1) {
-				ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
+				ir_emit_store(ctx, insn->type, def, cc->fp_ret_reg);
 			}
 		}
 	}
@@ -5345,18 +5415,22 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used

 static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
-	ir_emit_call_ex(ctx, def, insn, used_stack);
+	const ir_proto_t *proto = ir_call_proto(ctx, insn);
+	const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+	int32_t used_stack = ir_emit_arguments(ctx, def, insn, cc, ctx->regs[def][1]);
+	ir_emit_call_ex(ctx, def, insn, cc, used_stack);
 }

 static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
-	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
+	const ir_proto_t *proto = ir_call_proto(ctx, insn);
+	const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+	int32_t used_stack = ir_emit_arguments(ctx, def, insn, cc, ctx->regs[def][1]);

 	if (used_stack != 0) {
-		ir_emit_call_ex(ctx, def, insn, used_stack);
+		ir_emit_call_ex(ctx, def, insn, cc, used_stack);
 		ir_emit_return_void(ctx);
 		return;
 	}
@@ -5578,15 +5652,23 @@ static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
 			case IR_GT:
 				|	bgt &addr
 				break;
+			case IR_ULT:
+				|	blt &addr
+				break;
+			case IR_UGE:
+				|	bhs &addr
+				break;
+			case IR_ULE:
+				|	ble &addr
+				break;
+			case IR_UGT:
+				|	bhi &addr
+				break;
 			case IR_ORDERED:
 				|	bvc &addr
 				break;
 			case IR_UNORDERED:
 				|	bvs &addr
-//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
-//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
-//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
-//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
 		}
 	}
 }
@@ -5660,7 +5742,11 @@ static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i
 	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);

 	if (insn->op == IR_GUARD) {
-		op ^= 1; // reverse
+		if (op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) {
+			op ^= 1; // reverse
+		} else {
+			op ^= 5; // reverse
+		}
 	}
 	ir_emit_guard_jcc(ctx, op, addr, 0);
 }
@@ -5746,6 +5832,7 @@ static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = &ir_call_conv_default;
 	dasm_State **Dst = &data->dasm_state;
 	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);

@@ -5785,10 +5872,10 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	|	stp x2, x3, [sp, #-16]!
 	|	stp x0, x1, [sp, #-16]!

-	|	mov Rx(IR_REG_INT_ARG2), sp
-	|	add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8)
-	|	str Rx(IR_REG_INT_ARG1), [sp, #(31*8)]
-	|	mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP)
+	|	mov Rx(cc->int_param_regs[1]), sp
+	|	add Rx(cc->int_param_regs[0]), Rx(cc->int_param_regs[1]), #(32*8+32*8)
+	|	str Rx(cc->int_param_regs[0]), [sp, #(31*8)]
+	|	mov Rx(cc->int_param_regs[0]), Rx(IR_REG_INT_TMP)

 	if (IR_IS_CONST_REF(insn->op2)) {
 		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
@@ -5805,8 +5892,8 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)

 	|	add sp, sp, #(32*8+32*8)

-	if (def_reg != IR_REG_INT_RET1) {
-		ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
+	if (def_reg != cc->int_ret_reg) {
+		ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg);
 	}
 	if (IR_REG_SPILLED(ctx->regs[def][0])) {
 		ir_emit_store(ctx, insn->type, def, def_reg);
@@ -5852,11 +5939,8 @@ static void ir_emit_load_params(ir_ctx *ctx)
 	int fp_param_num = 0;
 	ir_reg src_reg;
 	ir_reg dst_reg;
-	// TODO: Calling convention specific
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	int32_t stack_offset = 0;
 	int32_t stack_start = ctx->stack_frame_size;

@@ -5866,15 +5950,15 @@ static void ir_emit_load_params(ir_ctx *ctx)
 		insn = &ctx->ir_base[use];
 		if (insn->op == IR_PARAM) {
 			if (IR_IS_TYPE_INT(insn->type)) {
-				if (int_param_num < int_reg_params_count) {
-					src_reg = int_reg_params[int_param_num];
+				if (int_param_num < cc->int_param_regs_count) {
+					src_reg = cc->int_param_regs[int_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				int_param_num++;
 			} else {
-				if (fp_param_num < fp_reg_params_count) {
-					src_reg = fp_reg_params[fp_param_num];
+				if (fp_param_num < cc->fp_param_regs_count) {
+					src_reg = cc->fp_param_regs[fp_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
@@ -5914,10 +5998,9 @@ static ir_reg ir_get_free_reg(ir_type type, ir_regset available)
 	return IR_REGSET_FIRST(available);
 }

-static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
+static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *dessa_from_block)
 {
-	ir_backend_data *data = ctx->data;
-	ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end;
+	ir_ref ref = ctx->cfg_blocks[(intptr_t)dessa_from_block].end;

 	if (to == 0) {
 		if (IR_IS_TYPE_INT(type)) {
@@ -5953,11 +6036,8 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 	int int_param_num = 0;
 	int fp_param_num = 0;
 	ir_reg src_reg;
-	// TODO: Calling convention specific
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	int32_t stack_offset = 0;
 	int32_t stack_start = ctx->stack_frame_size;

@@ -5967,15 +6047,15 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 		insn = &ctx->ir_base[use];
 		if (insn->op == IR_PARAM) {
 			if (IR_IS_TYPE_INT(insn->type)) {
-				if (int_param_num < int_reg_params_count) {
-					src_reg = int_reg_params[int_param_num];
+				if (int_param_num < cc->int_param_regs_count) {
+					src_reg = cc->int_param_regs[int_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				int_param_num++;
 			} else {
-				if (fp_param_num < fp_reg_params_count) {
-					src_reg = fp_reg_params[fp_param_num];
+				if (fp_param_num < cc->fp_param_regs_count) {
+					src_reg = cc->fp_param_regs[fp_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
@@ -5999,8 +6079,8 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 		}
 	}

-	ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count);
-	ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count);
+	ctx->gp_reg_params = IR_MIN(int_param_num, cc->int_param_regs_count);
+	ctx->fp_reg_params = IR_MIN(fp_param_num, cc->fp_param_regs_count);
 	ctx->param_stack_size = stack_offset;
 }

@@ -6011,11 +6091,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 	ir_insn *insn;
 	ir_ref i, n, j, *p;
 	uint32_t *rule, insn_flags;
-	ir_backend_data *data = ctx->data;
 	ir_regset available = 0;
 	ir_target_constraints constraints;
 	uint32_t def_flags;
 	ir_reg reg;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
+	ir_regset scratch = ir_scratch_regset[cc->scratch_reg - IR_REG_NUM];

 	ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
 	memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
@@ -6051,7 +6133,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 					 && *rule != IR_CMP_AND_BRANCH_FP
 					 && *rule != IR_GUARD_CMP_INT
 					 && *rule != IR_GUARD_CMP_FP) {
-						available = IR_REGSET_SCRATCH;
+						available = scratch;
 					}
 					if (ctx->vregs[i]) {
 						reg = constraints.def_reg;
@@ -6081,7 +6163,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 							if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
 								ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
 							} else {
-								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data);
+								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type);
 							}
 						} else if (insn->op == IR_PARAM) {
 							IR_ASSERT(0 && "unexpected PARAM");
@@ -6092,7 +6174,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 						ir_ref n = use_list->count;

 						if (n > 0) {
-							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data);
+							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type);
 							ir_ref i, *p, use;
 							ir_insn *use_insn;

@@ -6147,10 +6229,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 									}
 								}
 								ctx->regs[i][constraints.tmp_regs[n].num] = reg;
-							} else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
-								available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
 							} else {
-								IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
+								reg = constraints.tmp_regs[n].reg;
+								if (reg >= IR_REG_NUM) {
+									available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]);
+								} else {
+									IR_REGSET_EXCL(available, reg);
+								}
 							}
 						} while (n);
 					}
@@ -6186,8 +6271,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 			rule += n;
 		}
 		if (bb->flags & IR_BB_DESSA_MOVES) {
-			data->dessa_from_block = b;
-			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
+			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, (void*)(intptr_t)b);
 		}
 	}

@@ -6204,8 +6288,11 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)

 	for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
 		if (insn->op == IR_CALL) {
+			const ir_proto_t *proto = ir_call_proto(ctx, insn);
+			const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
 			int32_t copy_stack;
-			call_stack_size = ir_call_used_stack(ctx, insn, &copy_stack);
+
+			call_stack_size = ir_call_used_stack(ctx, insn, cc, &copy_stack);
 			if (call_stack_size > peak_call_stack_size) {
 				peak_call_stack_size = call_stack_size;
 			}
@@ -6237,11 +6324,14 @@ void ir_fix_stack_frame(ir_ctx *ctx)
 	}

 	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-			additional_size += sizeof(void*) * IR_REG_INT_ARGS;
+		ir_backend_data *data = ctx->data;
+		const ir_call_conv_dsc *cc = data->ra_data.cc;
+
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
+			additional_size += sizeof(void*) * cc->int_param_regs_count;
 		}
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-			additional_size += 16 * IR_REG_FP_ARGS;
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+			additional_size += 16 * cc->int_param_regs_count;
 		}
 	}

@@ -6308,6 +6398,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
 	ir_ref igoto_dup_ref = IR_UNUSED;
 	uint32_t igoto_dup_block = 0;

+	data.ra_data.cc = ir_get_call_conv_dsc(ctx->flags);
 	data.ra_data.unused_slot_4 = 0;
 	data.ra_data.unused_slot_2 = 0;
 	data.ra_data.unused_slot_1 = 0;
diff --git a/ext/opcache/jit/ir/ir_aarch64.h b/ext/opcache/jit/ir/ir_aarch64.h
index 9da64b9249f..e0817f9b330 100644
--- a/ext/opcache/jit/ir/ir_aarch64.h
+++ b/ext/opcache/jit/ir/ir_aarch64.h
@@ -87,14 +87,15 @@ enum _ir_reg {
 	IR_GP_REGS(IR_GP_REG_ENUM)
 	IR_FP_REGS(IR_FP_REG_ENUM)
 	IR_REG_NUM,
+	IR_REG_ALL = IR_REG_NUM, /* special name for regset */
+	IR_REG_SET_1,            /* special name for regset */
+	IR_REG_SET_NUM,
 };

 #define IR_REG_GP_FIRST IR_REG_X0
 #define IR_REG_FP_FIRST IR_REG_V0
 #define IR_REG_GP_LAST  (IR_REG_FP_FIRST - 1)
 #define IR_REG_FP_LAST  (IR_REG_NUM - 1)
-#define IR_REG_SCRATCH  (IR_REG_NUM)        /* special name for regset */
-#define IR_REG_ALL      (IR_REG_NUM + 1)    /* special name for regset */

 #define IR_REGSET_64BIT 1

@@ -125,65 +126,4 @@ enum _ir_reg {
 #define IR_REG_LR  IR_REG_X30
 #define IR_REG_ZR  IR_REG_X31

-/* Calling Convention */
-#define IR_REG_INT_RET1 IR_REG_X0
-#define IR_REG_FP_RET1  IR_REG_V0
-#define IR_REG_INT_ARGS 8
-#define IR_REG_FP_ARGS  8
-#define IR_REG_INT_ARG1 IR_REG_X0
-#define IR_REG_INT_ARG2 IR_REG_X1
-#define IR_REG_INT_ARG3 IR_REG_X2
-#define IR_REG_INT_ARG4 IR_REG_X3
-#define IR_REG_INT_ARG5 IR_REG_X4
-#define IR_REG_INT_ARG6 IR_REG_X5
-#define IR_REG_INT_ARG7 IR_REG_X6
-#define IR_REG_INT_ARG8 IR_REG_X7
-#define IR_REG_FP_ARG1  IR_REG_V0
-#define IR_REG_FP_ARG2  IR_REG_V1
-#define IR_REG_FP_ARG3  IR_REG_V2
-#define IR_REG_FP_ARG4  IR_REG_V3
-#define IR_REG_FP_ARG5  IR_REG_V4
-#define IR_REG_FP_ARG6  IR_REG_V5
-#define IR_REG_FP_ARG7  IR_REG_V6
-#define IR_REG_FP_ARG8  IR_REG_V7
-#define IR_MAX_REG_ARGS 16
-#define IR_SHADOW_ARGS  0
-
-# define IR_REGSET_SCRATCH \
-	(IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \
-	| IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \
-	| IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31))
-
-# define IR_REGSET_PRESERVED \
-	(IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \
-	| IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15))
-
-#ifndef __APPLE__
-typedef struct _ir_va_list {
-	void    *stack;
-	void    *gr_top;
-	void    *vr_top;
-	int32_t  gr_offset;
-	int32_t  vr_offset;
-} ir_va_list;
-#endif
-
-typedef struct _ir_tmp_reg {
-	union {
-		uint8_t num;
-		int8_t  reg;
-	};
-	uint8_t     type;
-	int8_t      start;
-	int8_t      end;
-} ir_tmp_reg;
-
-struct _ir_target_constraints {
-	int8_t      def_reg;
-	uint8_t     tmps_count;
-	uint8_t     hints_count;
-	ir_tmp_reg  tmp_regs[3];
-	int8_t      hints[IR_MAX_REG_ARGS + 3];
-};
-
 #endif /* IR_AARCH64_H */
diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c
index 5cc732927d4..92962313d99 100644
--- a/ext/opcache/jit/ir/ir_dump.c
+++ b/ext/opcache/jit/ir/ir_dump.c
@@ -8,6 +8,14 @@
 #include "ir.h"
 #include "ir_private.h"

+#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
+# include "ir_x86.h"
+#elif defined(IR_TARGET_AARCH64)
+# include "ir_aarch64.h"
+#else
+# error "Unknown IR target"
+#endif
+
 void ir_dump(const ir_ctx *ctx, FILE *f)
 {
 	ir_ref i, j, n, ref, *p;
@@ -456,8 +464,8 @@ void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f)
 		}
 	}
 #if 1
-	n = ctx->vregs_count + ir_regs_number() + 2;
-	for (i = ctx->vregs_count + 1; i <= n; i++) {
+	n = ctx->vregs_count + 1 + IR_REG_SET_NUM;
+	for (i = ctx->vregs_count + 1; i < n; i++) {
 		ir_live_interval *ival = ctx->live_intervals[i];

 		if (ival) {
diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c
index 847ca375b5b..a6dfde77f57 100644
--- a/ext/opcache/jit/ir/ir_emit.c
+++ b/ext/opcache/jit/ir/ir_emit.c
@@ -63,18 +63,7 @@ typedef struct _ir_dessa_copy {
 	int32_t to;   /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg  */
 } ir_dessa_copy;

-#if IR_REG_INT_ARGS
-static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
-#else
-static const int8_t *_ir_int_reg_params;
-#endif
-#if IR_REG_FP_ARGS
-static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
-#else
-static const int8_t *_ir_fp_reg_params;
-#endif
-
-static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
+const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn)
 {
 	if (IR_IS_CONST_REF(insn->op2)) {
 		const ir_insn *func = &ctx->ir_base[insn->op2];
@@ -90,49 +79,6 @@ static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
 	return NULL;
 }

-#ifdef IR_HAVE_FASTCALL
-static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
-static const int8_t *_ir_fp_fc_reg_params;
-
-bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
-{
-	if (sizeof(void*) == 4) {
-		if (IR_IS_CONST_REF(insn->op2)) {
-			const ir_insn *func = &ctx->ir_base[insn->op2];
-
-			if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
-				if (func->proto) {
-					const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);
-
-					return (proto->flags & IR_FASTCALL_FUNC) != 0;
-				}
-			}
-		} else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
-			const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
-
-			return (proto->flags & IR_FASTCALL_FUNC) != 0;
-		}
-		return 0;
-	}
-	return 0;
-}
-#else
-bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
-{
-	return 0;
-}
-#endif
-
-bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
-{
-	const ir_proto_t *proto = ir_call_proto(ctx, insn);
-
-	if (proto) {
-		return (proto->flags & IR_VARARG_FUNC) != 0;
-	}
-	return 0;
-}
-
 IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
 {
 	IR_ASSERT(!IR_IS_CONST_REF(ref));
@@ -153,19 +99,7 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
 	ir_insn *insn;
 	int int_param = 0;
 	int fp_param = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
-
-#ifdef IR_HAVE_FASTCALL
-	if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-		int_reg_params = _ir_int_fc_reg_params;
-		fp_reg_params = _ir_fp_fc_reg_params;
-	}
-#endif
+	const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(ctx->flags);

 	for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
 		use = *p;
@@ -173,70 +107,48 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
 		if (insn->op == IR_PARAM) {
 			if (IR_IS_TYPE_INT(insn->type)) {
 				if (use == ref) {
-#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
-					if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
+					if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) {
 						/* struct passed by value on stack */
 						return IR_REG_NONE;
-					} else
-#endif
-					if (int_param < int_reg_params_count) {
-						return int_reg_params[int_param];
+					} else if (int_param < cc->int_param_regs_count) {
+						return cc->int_param_regs[int_param];
 					} else {
 						return IR_REG_NONE;
 					}
-#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
-				} else {
-					if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
-						/* struct passed by value on stack */
-						continue;
-					}
-#endif
+				} else if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) {
+					/* struct passed by value on stack */
+					continue;
 				}
 				int_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				fp_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					fp_param++;
+				}
 			} else {
 				IR_ASSERT(IR_IS_TYPE_FP(insn->type));
 				if (use == ref) {
-					if (fp_param < fp_reg_params_count) {
-						return fp_reg_params[fp_param];
+					if (fp_param < cc->fp_param_regs_count) {
+						return cc->fp_param_regs[fp_param];
 					} else {
 						return IR_REG_NONE;
 					}
 				}
 				fp_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				int_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					int_param++;
+				}
 			}
 		}
 	}
 	return IR_REG_NONE;
 }

-static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
+static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, const ir_call_conv_dsc *cc, int8_t *regs)
 {
 	int j, n;
 	ir_type type;
 	int int_param = 0;
 	int fp_param = 0;
 	int count = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
-
-#ifdef IR_HAVE_FASTCALL
-	if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-		int_reg_params = _ir_int_fc_reg_params;
-		fp_reg_params = _ir_fp_fc_reg_params;
-	}
-#endif

 	n = insn->inputs_count;
 	n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
@@ -244,27 +156,25 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs
 		ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
 		type = arg->type;
 		if (IR_IS_TYPE_INT(type)) {
-			if (int_param < int_reg_params_count && arg->op != IR_ARGVAL) {
-				regs[j] = int_reg_params[int_param];
+			if (int_param < cc->int_param_regs_count && arg->op != IR_ARGVAL) {
+				regs[j] = cc->int_param_regs[int_param];
 				count = j + 1;
 				int_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				fp_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					fp_param++;
+				}
 			} else {
 				regs[j] = IR_REG_NONE;
 			}
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(type));
-			if (fp_param < fp_reg_params_count) {
-				regs[j] = fp_reg_params[fp_param];
+			if (fp_param < cc->fp_param_regs_count) {
+				regs[j] = cc->fp_param_regs[fp_param];
 				count = j + 1;
 				fp_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				int_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					int_param++;
+				}
 			} else {
 				regs[j] = IR_REG_NONE;
 			}
@@ -419,7 +329,6 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);

 typedef struct _ir_common_backend_data {
     ir_reg_alloc_data  ra_data;
-	uint32_t           dessa_from_block;
 	dasm_State        *dasm_state;
 	ir_bitset          emit_constants;
 } ir_common_backend_data;
@@ -1071,3 +980,32 @@ int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
 	IR_ASSERT(offset != -1);
 	return IR_SPILL_POS_TO_OFFSET(offset);
 }
+
+const ir_call_conv_dsc *ir_get_call_conv_dsc(uint32_t flags)
+{
+#ifdef IR_TARGET_X86
+	if ((flags & IR_CALL_CONV_MASK) == IR_CC_FASTCALL) {
+		return &ir_call_conv_x86_fastcall;
+	}
+#elif defined(IR_TARGET_X64)
+	switch (flags & IR_CALL_CONV_MASK) {
+		case IR_CC_DEFAULT:              return &ir_call_conv_default;
+		case IR_CC_FASTCALL:             return &ir_call_conv_default;
+		case IR_CC_PRESERVE_NONE:        return &ir_call_conv_x86_64_preserve_none;
+		case IR_CC_X86_64_SYSV:          return &ir_call_conv_x86_64_sysv;
+		case IR_CC_X86_64_MS:            return &ir_call_conv_x86_64_ms;
+		default: break;
+	}
+#elif defined(IR_TARGET_AARCH64)
+	switch (flags & IR_CALL_CONV_MASK) {
+		case IR_CC_DEFAULT:              return &ir_call_conv_default;
+		case IR_CC_FASTCALL:             return &ir_call_conv_default;
+		case IR_CC_PRESERVE_NONE:        return &ir_call_conv_aarch64_preserve_none;
+		case IR_CC_AARCH64_SYSV:         return &ir_call_conv_aarch64_sysv;
+		case IR_CC_AARCH64_DARWIN:       return &ir_call_conv_aarch64_darwin;
+		default: break;
+	}
+#endif
+	IR_ASSERT((flags & IR_CALL_CONV_MASK) == IR_CC_DEFAULT || (flags & IR_CALL_CONV_MASK) == IR_CC_BUILTIN);
+	return &ir_call_conv_default;
+}
diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c
index e6486ba64a1..67c97611eaa 100644
--- a/ext/opcache/jit/ir/ir_gcm.c
+++ b/ext/opcache/jit/ir/ir_gcm.c
@@ -361,20 +361,20 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 				while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
 					j = ctx->cfg_blocks[j].idom;
 				}
-				clone = ir_hashtab_find(&hash, j);
-				if (clone == IR_INVALID_VAL) {
-					clone = clones_count++;
-					ir_hashtab_add(&hash, j, clone);
-					clones[clone].block = j;
-					clones[clone].use_count = 0;
-					clones[clone].use = -1;
-				}
-				uses[uses_count].ref = use;
-				uses[uses_count].block = i;
-				uses[uses_count].next = clones[clone].use;
-				clones[clone].use_count++;
-				clones[clone].use = uses_count++;
 			}
+			clone = ir_hashtab_find(&hash, j);
+			if (clone == IR_INVALID_VAL) {
+				clone = clones_count++;
+				ir_hashtab_add(&hash, j, clone);
+				clones[clone].block = j;
+				clones[clone].use_count = 0;
+				clones[clone].use = -1;
+			}
+			uses[uses_count].ref = use;
+			uses[uses_count].block = i;
+			uses[uses_count].next = clones[clone].use;
+			clones[clone].use_count++;
+			clones[clone].use = uses_count++;
 		}
 	}

@@ -413,7 +413,8 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
 	n = ctx->use_lists[ref].refs;
 	for (i = 0; i < clones_count; i++) {
 		clone = clones[i].ref;
-		if (clones[i].use_count == 1
+		if (clones[i].block
+		 && clones[i].use_count == 1
 		 && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) {
 			/* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
 			 * Sink it down to the real usage block.
diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h
index dbacc3967d0..acd7e41a3e9 100644
--- a/ext/opcache/jit/ir/ir_private.h
+++ b/ext/opcache/jit/ir/ir_private.h
@@ -1015,6 +1015,8 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
 #define IR_HAS_FP_RET_SLOT     (1<<10)
 #define IR_16B_FRAME_ALIGNMENT (1<<11)
 #define IR_HAS_BLOCK_ADDR      (1<<12)
+#define IR_PREALLOCATED_STACK  (1<<13)
+

 /* Temporary: MEM2SSA -> SCCP */
 #define IR_MEM2SSA_VARS        (1<<25)
@@ -1275,9 +1277,9 @@ struct _ir_live_interval {
 	ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */
 };

-typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to);
+typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *data);

-int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy);
+int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy, void *data);

 #if defined(IR_REGSET_64BIT)

@@ -1363,16 +1365,44 @@ IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set)

 #endif /* defined(IR_REGSET_64BIT) */

+/*** Calling Conventions ***/
+#if defined(IR_REGSET_64BIT)
+struct _ir_call_conv_dsc {
+	bool          cleanup_stack_by_callee: 1; /* use "retn $size" to return */
+	bool          pass_struct_by_val: 1;      /* pass aggreagate by value, otherwise their copies are passed by ref */
+	bool          sysv_varargs: 1;            /* Use SysV varargs ABI */
+	bool          shadow_param_regs: 1;       /* registers for INT and FP parametrs shadow each other */
+	                                          /* (WIN64: 1-st arg is passed in %rcx/%xmm0, 2-nd in %rdx/%xmm1) */
+	uint8_t       shadow_store_size;          /* reserved stack space to keep arguemnts passed in registers (WIN64) */
+	uint8_t       int_param_regs_count;       /* number of registers for INT parameters */
+	uint8_t       fp_param_regs_count;        /* number of registers for FP parameters */
+	int8_t        int_ret_reg;                /* register to return INT value */
+	int8_t        fp_ret_reg;                 /* register to return FP value */
+	int8_t        fp_varargs_reg;             /* register to pass number of fp register arguments into vararg func */
+	int8_t        scratch_reg;                /* pseudo register to reffer srcatch regset (clobbered by call) */
+	const int8_t *int_param_regs;             /* registers for INT parameters */
+	const int8_t *fp_param_regs;              /* registers for FP parameters */
+	ir_regset     preserved_regs;             /* preserved or callee-saved registers */
+};
+
+extern const ir_regset ir_scratch_regset[];
+#endif
+
+typedef struct _ir_call_conv_dsc ir_call_conv_dsc;
+
+const ir_call_conv_dsc *ir_get_call_conv_dsc(uint32_t flags);
+
 /*** IR Register Allocation ***/
 /* Flags for ctx->regs[][] (low bits are used for register number itself) */
 typedef struct _ir_reg_alloc_data {
+	const ir_call_conv_dsc *cc;
 	int32_t unused_slot_4;
 	int32_t unused_slot_2;
 	int32_t unused_slot_1;
 	ir_live_interval **handled;
 } ir_reg_alloc_data;

-int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data);
+int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type);

 IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg)
 {
@@ -1406,9 +1436,27 @@ IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int o

 #define IR_RULE_MASK 0xff

+#define IR_MAX_REG_ARGS 64
+
 extern const char *ir_rule_name[];

-typedef struct _ir_target_constraints ir_target_constraints;
+typedef struct _ir_tmp_reg {
+	union {
+		uint8_t num;
+		int8_t  reg;
+	};
+	uint8_t     type;
+	int8_t      start;
+	int8_t      end;
+} ir_tmp_reg;
+
+typedef struct {
+	int8_t      def_reg;
+	uint8_t     tmps_count;
+	uint8_t     hints_count;
+	ir_tmp_reg  tmp_regs[3];
+	int8_t      hints[IR_MAX_REG_ARGS + 3];
+} ir_target_constraints;

 #define IR_TMP_REG(_num, _type, _start, _end) \
 	(ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)}
@@ -1421,8 +1469,8 @@ void ir_fix_stack_frame(ir_ctx *ctx);

 /* Utility */
 ir_type ir_get_return_type(ir_ctx *ctx);
-bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn);
-bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn);
+const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn);
+void ir_print_call_conv(uint32_t flags, FILE *f);

 //#define IR_BITSET_LIVENESS

diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index 2e8a8e3f34f..23f44482cb8 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -610,8 +610,8 @@ int ir_compute_live_ranges(ir_ctx *ctx)
 	len = ir_bitset_len(ctx->vregs_count + 1);
 	bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));

-	/* vregs + tmp + fixed + SRATCH + ALL */
-	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
+	/* vregs + tmp + fixed + ALL + SCRATCH_N */
+	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*));

 #ifdef IR_DEBUG
 	visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
@@ -1265,8 +1265,8 @@ int ir_compute_live_ranges(ir_ctx *ctx)
 	/* Compute Live Ranges */
 	ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;

-	/* vregs + tmp + fixed + SRATCH + ALL */
-	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
+	/* vregs + tmp + fixed + ALL + SCRATCH_N */
+	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*));

     if (!ctx->arena) {
 		ctx->arena = ir_arena_create(16 * 1024);
@@ -2037,8 +2037,8 @@ int ir_coalesce(ir_ctx *ctx)
 		n--;
 		if (n != ctx->vregs_count) {
 			j = ctx->vregs_count - n;
-			/* vregs + tmp + fixed + SRATCH + ALL */
-			for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
+			/* vregs + tmp + fixed + ALL + SCRATCH_N */
+			for (i = n + 1; i <= n + IR_REG_SET_NUM; i++) {
 				ctx->live_intervals[i] = ctx->live_intervals[i + j];
 				if (ctx->live_intervals[i]) {
 					ctx->live_intervals[i]->vreg = i;
@@ -2105,7 +2105,7 @@ int ir_compute_dessa_moves(ir_ctx *ctx)
  * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
  * pp. 114-125, doi: 10.1109/CGO.2009.19.
  */
-int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
+int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy, void *data)
 {
 	uint32_t succ, k, n = 0;
 	ir_block *bb, *succ_bb;
@@ -2180,7 +2180,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
 			while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
 				a = pred[b];
 				c = loc[a];
-				emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
+				emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b], data);
 				ir_bitset_excl(todo, b);
 				loc[a] = b;
 				src[b] = dst[b];
@@ -2193,7 +2193,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
 				break;
 			}
 			IR_ASSERT(b != loc[pred[b]]);
-			emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
+			emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0, data);
 			loc[b] = 0;
 			ir_bitset_incl(ready, b);
 		}
@@ -2211,7 +2211,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
 			if (insn->op == IR_PHI) {
 				input = ir_insn_op(insn, k);
 				if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
-					emit_copy(ctx, insn->type, input, ref);
+					emit_copy(ctx, insn->type, input, ref, data);
 				}
 			}
 		}
@@ -2501,8 +2501,9 @@ static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *iva
 	return child;
 }

-static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_alloc_data *data)
+static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size)
 {
+	ir_reg_alloc_data *data = ctx->data;
 	int32_t ret;

 	IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
@@ -2601,12 +2602,12 @@ static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_all
 	return ret;
 }

-int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
+int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type)
 {
-	return ir_allocate_small_spill_slot(ctx, ir_type_size[type], data);
+	return ir_allocate_small_spill_slot(ctx, ir_type_size[type]);
 }

-static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_alloc_data *data)
+static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size)
 {
 	int32_t ret;

@@ -2616,7 +2617,7 @@ static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_allo
 		} else if (size > 4 && size < 8) {
 			size = 8;
 		}
-		return ir_allocate_small_spill_slot(ctx, size, data);
+		return ir_allocate_small_spill_slot(ctx, size);
 	}

 	/* Align stack allocated data to 16 byte */
@@ -2836,13 +2837,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
 		/* freeUntilPos[it.reg] = 0 */
 		reg = other->reg;
 		IR_ASSERT(reg >= 0);
-		if (reg >= IR_REG_SCRATCH) {
-			if (reg == IR_REG_SCRATCH) {
-				available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
-			} else {
-				IR_ASSERT(reg == IR_REG_ALL);
-				available = IR_REGSET_EMPTY;
-			}
+		if (reg >= IR_REG_NUM) {
+			available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]);
 		} else {
 			IR_REGSET_EXCL(available, reg);
 		}
@@ -2864,15 +2860,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
 			if (next) {
 				reg = other->reg;
 				IR_ASSERT(reg >= 0);
-				if (reg >= IR_REG_SCRATCH) {
-					ir_regset regset;
-
-					if (reg == IR_REG_SCRATCH) {
-						regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
-					} else {
-						IR_ASSERT(reg == IR_REG_ALL);
-						regset = available;
-					}
+				if (reg >= IR_REG_NUM) {
+					ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
 					overlapped = IR_REGSET_UNION(overlapped, regset);
 					IR_REGSET_FOREACH(regset, reg) {
 						if (next < freeUntilPos[reg]) {
@@ -2922,7 +2911,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
 		}

 		/* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
-		scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
+		scratch = IR_REGSET_INTERSECTION(available,
+			ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM]);
 		if (scratch != IR_REGSET_EMPTY) {
 			/* prefer registers that don't conflict with the hints for the following unhandled intervals */
 			if (1) {
@@ -2970,8 +2960,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
 			pos = freeUntilPos[i];
 			reg = i;
 		} else if (freeUntilPos[i] == pos
-				&& !IR_REGSET_IN(IR_REGSET_SCRATCH, reg)
-				&& IR_REGSET_IN(IR_REGSET_SCRATCH, i)) {
+				&& !IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], reg)
+				&& IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], i)) {
 			/* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
 			pos = freeUntilPos[i];
 			reg = i;
@@ -3077,15 +3067,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 		/* nextUsePos[it.reg] = next use of it after start of current */
 		reg = other->reg;
 		IR_ASSERT(reg >= 0);
-		if (reg >= IR_REG_SCRATCH) {
-			ir_regset regset;
-
-			if (reg == IR_REG_SCRATCH) {
-				regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
-			} else {
-				IR_ASSERT(reg == IR_REG_ALL);
-				regset = available;
-			}
+		if (reg >= IR_REG_NUM) {
+			ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
 			IR_REGSET_FOREACH(regset, reg) {
 				blockPos[reg] = nextUsePos[reg] = 0;
 			} IR_REGSET_FOREACH_END();
@@ -3109,18 +3092,11 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 		/* freeUntilPos[it.reg] = next intersection of it with current */
 		reg = other->reg;
 		IR_ASSERT(reg >= 0);
-		if (reg >= IR_REG_SCRATCH) {
+		if (reg >= IR_REG_NUM) {
 			ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);

 			if (overlap) {
-				ir_regset regset;
-
-				if (reg == IR_REG_SCRATCH) {
-					regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
-				} else {
-					IR_ASSERT(reg == IR_REG_ALL);
-					regset = available;
-				}
+				ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
 				IR_REGSET_FOREACH(regset, reg) {
 					if (overlap < nextUsePos[reg]) {
 						nextUsePos[reg] = overlap;
@@ -3325,9 +3301,9 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
 	return reg;
 }

-static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
+static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *data)
 {
-	ir_block *bb = ctx->data;
+	ir_block *bb = data;
 	ir_tmp_reg tmp_reg;

 	if (to == 0) {
@@ -3365,7 +3341,7 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
 	return 1;
 }

-static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
+static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival)
 {
 	ir_use_pos *use_pos = ival->use_pos;

@@ -3417,7 +3393,7 @@ static void ir_assign_bound_spill_slots(ir_ctx *ctx)
 	}
 }

-static int ir_linear_scan(ir_ctx *ctx)
+static int ir_linear_scan(ir_ctx *ctx, ir_ref vars)
 {
 	uint32_t b;
 	ir_block *bb;
@@ -3428,8 +3404,6 @@ static int ir_linear_scan(ir_ctx *ctx)
 	int j;
 	ir_live_pos position;
 	ir_reg reg;
-	ir_reg_alloc_data data;
-	ir_ref vars = ctx->vars;

 	if (!ctx->live_intervals) {
 		return 0;
@@ -3440,19 +3414,11 @@ static int ir_linear_scan(ir_ctx *ctx)
 		for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
 			IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
 			if (bb->flags & IR_BB_DESSA_MOVES) {
-				ctx->data = bb;
-				ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
+				ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, bb);
 			}
 		}
 	}

-	ctx->data = &data;
-	ctx->stack_frame_size = 0;
-	data.unused_slot_4 = 0;
-	data.unused_slot_2 = 0;
-	data.unused_slot_1 = 0;
-	data.handled = NULL;
-
 	while (vars) {
 		ir_ref var = vars;
 		ir_insn *insn = &ctx->ir_base[var];
@@ -3461,7 +3427,7 @@ static int ir_linear_scan(ir_ctx *ctx)
 		vars = insn->op3; /* list next */

 		if (insn->op == IR_VAR) {
-			ir_ref slot = ir_allocate_spill_slot(ctx, insn->type, &data);;
+			ir_ref slot = ir_allocate_spill_slot(ctx, insn->type);
 			ir_use_list *use_list;
 			ir_ref n, *p;

@@ -3484,7 +3450,7 @@ static int ir_linear_scan(ir_ctx *ctx)
 			IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
 			IR_ASSERT(val->val.i64 < 0x7fffffff);

-			insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32, &data);
+			insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32);
 		}
 	}

@@ -3492,7 +3458,7 @@ static int ir_linear_scan(ir_ctx *ctx)
 		ival = ctx->live_intervals[j];
 		if (ival) {
 			if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
-					|| !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
+					|| !ir_ival_spill_for_fuse_load(ctx, ival)) {
 				ir_add_to_unhandled(&unhandled, ival);
 			}
 		}
@@ -3503,8 +3469,8 @@ static int ir_linear_scan(ir_ctx *ctx)
 		ir_merge_to_unhandled(&unhandled, ival);
 	}

-	/* vregs + tmp + fixed + SRATCH + ALL */
-	for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
+	/* vregs + tmp + fixed + ALL + SCRATCH_N */
+	for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_SET_NUM; j++) {
 		ival = ctx->live_intervals[j];
 		if (ival) {
 			ival->current_range = &ival->range;
@@ -3663,7 +3629,7 @@ static int ir_linear_scan(ir_ctx *ctx)
 			ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
 			ir_live_interval *old;

-			data.handled = handled;
+			((ir_reg_alloc_data*)(ctx->data))->handled = handled;
 			active = NULL;
 			while (unhandled) {
 				ival = unhandled;
@@ -3701,7 +3667,7 @@ static int ir_linear_scan(ir_ctx *ctx)
 					other = prev ? prev->list_next : active;
 				}

-				ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data);
+				ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type);
 				if (unhandled && ival->end > unhandled->range.start) {
 					ival->list_next = active;
 					active = ival;
@@ -3721,15 +3687,16 @@ static int ir_linear_scan(ir_ctx *ctx)
 					}
 				}
 			}
-			data.handled = NULL;
+			((ir_reg_alloc_data*)(ctx->data))->handled = NULL;
 		}
 	}

 #ifdef IR_TARGET_X86
 	if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
-		ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
-	} else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
-		ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
+		ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE);
+	} else if ((ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE)
+			&& ((ir_reg_alloc_data*)(ctx->data))->cc->fp_ret_reg == IR_REG_NONE) {
+		ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type);
 	} else {
 		ctx->ret_slot = -1;
 	}
@@ -4033,17 +4000,18 @@ static void assign_regs(ir_ctx *ctx)
 		} while (ival);
 	}

+	const ir_call_conv_dsc *cc = ((ir_reg_alloc_data*)(ctx->data))->cc;
 	if (ctx->fixed_stack_frame_size != -1) {
 		ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
-		if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
+		if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs),
 			ctx->used_preserved_regs)) {
 			// TODO: Preserved reg and fixed frame conflict ???
 			// IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
 		}
 	} else {
 		ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
-			IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
-				(ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED));
+			IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs),
+				(ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : cc->preserved_regs));
 	}

 	ir_fix_stack_frame(ctx);
@@ -4051,9 +4019,24 @@ static void assign_regs(ir_ctx *ctx)

 int ir_reg_alloc(ir_ctx *ctx)
 {
-	if (ir_linear_scan(ctx)) {
+	ir_reg_alloc_data data;
+	ir_ref vars = ctx->vars;
+
+	data.cc = ir_get_call_conv_dsc(ctx->flags);
+	data.unused_slot_4 = 0;
+	data.unused_slot_2 = 0;
+	data.unused_slot_1 = 0;
+	data.handled = NULL;
+
+	ctx->data = &data;
+	ctx->stack_frame_size = 0;
+
+	if (ir_linear_scan(ctx, vars)) {
 		assign_regs(ctx);
+		ctx->data = NULL;
 		return 1;
 	}
+
+	ctx->data = NULL;
 	return 0;
 }
diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c
index dd955172950..51d7f96e518 100644
--- a/ext/opcache/jit/ir/ir_save.c
+++ b/ext/opcache/jit/ir/ir_save.c
@@ -18,6 +18,38 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f)
 	}
 }

+void ir_print_call_conv(uint32_t flags, FILE *f)
+{
+	switch (flags & IR_CALL_CONV_MASK) {
+		case IR_CC_BUILTIN:
+			fprintf(f, " __builtin");
+			break;
+		case IR_CC_FASTCALL:
+			fprintf(f, " __fastcall");
+			break;
+		case IR_CC_PRESERVE_NONE:
+			fprintf(f, " __preserve_none");
+			break;
+#if defined(IR_TARGET_X64)
+		case IR_CC_X86_64_SYSV:
+			fprintf(f, " __sysv");
+			break;
+		case IR_CC_X86_64_MS:
+			fprintf(f, " __win64");
+			break;
+#elif defined(IR_TARGET_AARCH64)
+		case IR_CC_AARCH64_SYSV:
+			fprintf(f, " __sysv");
+			break;
+		case IR_CC_AARCH64_DARWIN:
+			fprintf(f, " __darwin");
+			break;
+#endif
+		default:
+			IR_ASSERT((flags & IR_CALL_CONV_MASK) == IR_CC_DEFAULT);
+	}
+}
+
 void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f)
 {
 	uint32_t j;
@@ -35,11 +67,7 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c
 		fprintf(f, "...");
 	}
 	fprintf(f, "): %s", ir_type_cname[ret_type]);
-	if (flags & IR_FASTCALL_FUNC) {
-		fprintf(f, " __fastcall");
-	} else if (flags & IR_BUILTIN_FUNC) {
-		fprintf(f, " __builtin");
-	}
+	ir_print_call_conv(flags, f);
 	if (flags & IR_CONST_FUNC) {
 		fprintf(f, " __const");
 	} else if (flags & IR_PURE_FUNC) {
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index 7f714dd11d2..9072b0dd591 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -882,8 +882,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_
 |.endmacro

 typedef struct _ir_backend_data {
-    ir_reg_alloc_data  ra_data;
-	uint32_t           dessa_from_block;
+	ir_reg_alloc_data  ra_data;
 	dasm_State        *dasm_state;
 	ir_bitset          emit_constants;
 	int                rodata_label, jmp_table_label;
@@ -897,6 +896,13 @@ typedef struct _ir_backend_data {
 	bool               resolved_label_syms;
 } ir_backend_data;

+typedef struct _ir_x86_64_sysv_va_list {
+	uint32_t  gp_offset;
+	uint32_t  fp_offset;
+	void     *overflow_arg_area;
+	void     *reg_save_area;
+} ir_x86_64_sysv_va_list;
+
 #define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \
 	#name64,
 #define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \
@@ -908,9 +914,19 @@ typedef struct _ir_backend_data {
 #define IR_FP_REG_NAME(code, name) \
 	#name,

-static const char *_ir_reg_name[IR_REG_NUM] = {
+static const char *_ir_reg_name[] = {
 	IR_GP_REGS(IR_GP_REG_NAME)
 	IR_FP_REGS(IR_FP_REG_NAME)
+	"ALL",
+	"SCRATCH",
+#ifdef IR_TARGET_X64
+# ifdef _WIN64
+	"SCRATCH_SYSV",
+# else
+	"SCRATCH_MS",
+# endif
+	"SCRATCH_PN",  /* preserve none */
+#endif
 };

 static const char *_ir_reg_name32[IR_REG_NUM] = {
@@ -925,66 +941,11 @@ static const char *_ir_reg_name8[IR_REG_NUM] = {
 	IR_GP_REGS(IR_GP_REG_NAME8)
 };

-/* Calling Convention */
-#ifdef _WIN64
-
-static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
-	IR_REG_INT_ARG1,
-	IR_REG_INT_ARG2,
-	IR_REG_INT_ARG3,
-	IR_REG_INT_ARG4,
-};
-
-static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
-	IR_REG_FP_ARG1,
-	IR_REG_FP_ARG2,
-	IR_REG_FP_ARG3,
-	IR_REG_FP_ARG4,
-};
-
-#elif defined(IR_TARGET_X64)
-
-static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
-	IR_REG_INT_ARG1,
-	IR_REG_INT_ARG2,
-	IR_REG_INT_ARG3,
-	IR_REG_INT_ARG4,
-	IR_REG_INT_ARG5,
-	IR_REG_INT_ARG6,
-};
-
-static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
-	IR_REG_FP_ARG1,
-	IR_REG_FP_ARG2,
-	IR_REG_FP_ARG3,
-	IR_REG_FP_ARG4,
-	IR_REG_FP_ARG5,
-	IR_REG_FP_ARG6,
-	IR_REG_FP_ARG7,
-	IR_REG_FP_ARG8,
-};
-
-#else
-
-static const int8_t *_ir_int_reg_params = NULL;
-static const int8_t *_ir_fp_reg_params = NULL;
-static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = {
-	IR_REG_INT_FCARG1,
-	IR_REG_INT_FCARG2,
-};
-static const int8_t *_ir_fp_fc_reg_params = NULL;
-
-#endif
-
 const char *ir_reg_name(int8_t reg, ir_type type)
 {
 	if (reg >= IR_REG_NUM) {
-		if (reg == IR_REG_SCRATCH) {
-			return "SCRATCH";
-		} else {
-			IR_ASSERT(reg == IR_REG_ALL);
-			return "ALL";
-		}
+		IR_ASSERT((uint8_t)reg < sizeof(_ir_reg_name) / sizeof(_ir_reg_name[0]));
+		return _ir_reg_name[reg];
 	}
 	IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
 	if (type == IR_VOID) {
@@ -1002,6 +963,159 @@ const char *ir_reg_name(int8_t reg, ir_type type)
 	}
 }

+/* Calling Conventions */
+#ifdef IR_TARGET_X64
+
+# ifdef _WIN64
+#  define IR_REG_SCRATH_X86_64_MS      IR_REG_SET_1
+#  define IR_REG_SCRATH_X86_64_SYSV    IR_REG_SET_2
+#  define IR_REG_SCRATH_X86_64_PN      IR_REG_SET_3
+# else
+#  define IR_REG_SCRATH_X86_64_SYSV    IR_REG_SET_1
+#  define IR_REG_SCRATH_X86_64_MS      IR_REG_SET_2
+#  define IR_REG_SCRATH_X86_64_PN      IR_REG_SET_3
+# endif
+
+# define IR_REGSET_SCRATCH_X86_64_SYSV \
+	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | \
+	 IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) | \
+	 IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) | \
+	 IR_REGSET_FP)
+
+# define IR_REGSET_SCRATCH_X86_64_WIN \
+	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | \
+	 IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) | \
+	 IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5))
+
+# define IR_REGSET_SCRATCH_X86_64_PN \
+	(IR_REGSET_DIFFERENCE(IR_REGSET_GP, IR_REGSET(IR_REG_RBP)) | IR_REGSET_FP)
+
+const ir_regset ir_scratch_regset[] = {
+	IR_REGSET_GP | IR_REGSET_FP,
+# ifdef _WIN64
+	IR_REGSET_SCRATCH_X86_64_WIN,
+	IR_REGSET_SCRATCH_X86_64_SYSV,
+# else
+	IR_REGSET_SCRATCH_X86_64_SYSV,
+	IR_REGSET_SCRATCH_X86_64_WIN,
+# endif
+	IR_REGSET_SCRATCH_X86_64_PN,
+};
+
+const ir_call_conv_dsc ir_call_conv_x86_64_ms = {
+	0,           /* cleanup_stack_by_callee */
+	0,           /* pass_struct_by_val      */
+	0,           /* sysv_varargs            */
+	1,           /* shadow_param_regs       */
+	32,          /* shadow_store_size       */
+	4,           /* int_param_regs_count    */
+	4,           /* fp_param_regs_count     */
+	IR_REG_RAX,  /* int_ret_reg             */
+	IR_REG_XMM0, /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_SCRATH_X86_64_MS,
+	(const int8_t[4]){IR_REG_RCX, IR_REG_RDX, IR_REG_R8, IR_REG_R9},
+	(const int8_t[4]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3},
+	IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI) |
+		IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) | IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15),
+};
+
+const ir_call_conv_dsc ir_call_conv_x86_64_sysv = {
+	0,           /* cleanup_stack_by_callee */
+	1,           /* pass_struct_by_val      */
+	1,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	6,           /* int_param_regs_count    */
+	8,           /* fp_param_regs_count     */
+	IR_REG_RAX,  /* int_ret_reg             */
+	IR_REG_XMM0, /* fp_ret_reg              */
+	IR_REG_RAX,  /* fp_varargs_reg          */
+	IR_REG_SCRATH_X86_64_SYSV,
+	(const int8_t[6]){IR_REG_RDI, IR_REG_RSI, IR_REG_RDX, IR_REG_RCX, IR_REG_R8, IR_REG_R9},
+	(const int8_t[8]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3,
+	                  IR_REG_XMM4, IR_REG_XMM5, IR_REG_XMM6, IR_REG_XMM7},
+	IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15),
+
+};
+
+const ir_call_conv_dsc ir_call_conv_x86_64_preserve_none = {
+	0,           /* cleanup_stack_by_callee */
+	1,           /* pass_struct_by_val      */
+	1,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	12,          /* int_param_regs_count    */
+	8,           /* fp_param_regs_count     */
+	IR_REG_RAX,  /* int_ret_reg             */
+	IR_REG_XMM0, /* fp_ret_reg              */
+	IR_REG_RAX,  /* fp_varargs_reg          */
+	IR_REG_SCRATH_X86_64_PN,
+	(const int8_t[12]){IR_REG_R12, IR_REG_R13, IR_REG_R14, IR_REG_R15,
+	                   IR_REG_RDI, IR_REG_RSI, IR_REG_RDX, IR_REG_RCX, IR_REG_R8, IR_REG_R9,
+	                   IR_REG_R11, IR_REG_RAX},
+	(const int8_t[8]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3,
+	                  IR_REG_XMM4, IR_REG_XMM5, IR_REG_XMM6, IR_REG_XMM7},
+	IR_REGSET(IR_REG_RBP),
+
+};
+
+# ifdef _WIN64
+#  define ir_call_conv_default ir_call_conv_x86_64_ms
+# else
+#  define ir_call_conv_default ir_call_conv_x86_64_sysv
+# endif
+
+#else
+
+# define IR_REG_SCRATCH_X86             IR_REG_SET_1
+
+# define IR_REGSET_SCRATCH_X86 \
+	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP)
+
+const ir_regset ir_scratch_regset[] = {
+	IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_FP_LAST),
+	IR_REGSET_SCRATCH_X86,
+};
+
+const ir_call_conv_dsc ir_call_conv_x86_cdecl = {
+	0,           /* cleanup_stack_by_callee */
+	1,           /* pass_struct_by_val      */
+	0,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	0,           /* int_param_regs_count    */
+	0,           /* fp_param_regs_count     */
+	IR_REG_RAX,  /* int_ret_reg             */
+	IR_REG_NONE, /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_SCRATCH_X86,
+	NULL,
+	NULL,
+	IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI),
+};
+
+const ir_call_conv_dsc ir_call_conv_x86_fastcall = {
+	1,           /* cleanup_stack_by_callee */
+	1,           /* pass_struct_by_val      */
+	0,           /* sysv_varargs            */
+	0,           /* shadow_param_regs       */
+	0,           /* shadow_store_size       */
+	2,           /* int_param_regs_count    */
+	0,           /* fp_param_regs_count     */
+	IR_REG_RAX,  /* int_ret_reg             */
+	IR_REG_NONE, /* fp_ret_reg              */
+	IR_REG_NONE, /* fp_varargs_reg          */
+	IR_REG_SCRATCH_X86,
+	(const int8_t[4]){IR_REG_RCX, IR_REG_RDX},
+	NULL,
+	IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI),
+};
+
+# define ir_call_conv_default ir_call_conv_x86_cdecl
+
+#endif
+
 #define IR_RULES(_)        \
 	_(CMP_INT)             \
 	_(CMP_FP)              \
@@ -1156,6 +1270,8 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
 	const ir_insn *insn;
 	int n = 0;
 	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
+	const ir_proto_t *proto;
+	const ir_call_conv_dsc *cc;

 	constraints->def_reg = IR_REG_NONE;
 	constraints->hints_count = 0;
@@ -1391,21 +1507,48 @@ op2_const:
 			break;
 		case IR_CALL:
 			insn = &ctx->ir_base[ref];
-			if (IR_IS_TYPE_INT(insn->type)) {
-				constraints->def_reg = IR_REG_INT_RET1;
-#ifdef IR_REG_FP_RET1
-			} else {
-				constraints->def_reg = IR_REG_FP_RET1;
+			proto = ir_call_proto(ctx, insn);
+			cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+			if (insn->type != IR_VOID) {
+				if (IR_IS_TYPE_INT(insn->type)) {
+					constraints->def_reg = cc->int_ret_reg;
+				} else {
+					IR_ASSERT(IR_IS_TYPE_FP(insn->type));
+#ifdef IR_TARGET_X86
+					if (cc->fp_ret_reg == IR_REG_NONE) {
+						ctx->flags2 |= IR_HAS_FP_RET_SLOT;
+					} else
 #endif
+					{
+						constraints->def_reg = cc->fp_ret_reg;
+					}
+				}
 			}
-			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
+			constraints->tmp_regs[0] = IR_SCRATCH_REG(cc->scratch_reg, IR_USE_SUB_REF, IR_DEF_SUB_REF);
 			n = 1;
-			IR_FALLTHROUGH;
+			if (!IR_IS_CONST_REF(insn->op2)
+			 && proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) {
+				constraints->tmp_regs[n] = IR_SCRATCH_REG(cc->fp_varargs_reg, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
+				n++;
+			}
+			if (insn->inputs_count > 2) {
+				goto get_arg_hints;
+			}
+			flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
+			break;
 		case IR_TAILCALL:
 			insn = &ctx->ir_base[ref];
+			proto = ir_call_proto(ctx, insn);
+			cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+			if (!IR_IS_CONST_REF(insn->op2)
+			 && proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) {
+				constraints->tmp_regs[n] = IR_SCRATCH_REG(cc->fp_varargs_reg, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
+				n++;
+			}
 			if (insn->inputs_count > 2) {
+get_arg_hints:
 				constraints->hints[2] = IR_REG_NONE;
-				constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints);
+				constraints->hints_count = ir_get_args_regs(ctx, insn, cc, constraints->hints);
 				if (!IR_IS_CONST_REF(insn->op2)) {
 					constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
 					n++;
@@ -1533,7 +1676,8 @@ op2_const:
 			break;
 		case IR_EXITCALL:
 			flags = IR_USE_MUST_BE_IN_REG;
-			constraints->def_reg = IR_REG_INT_RET1;
+			cc = ir_get_call_conv_dsc(ctx->flags);
+			constraints->def_reg = cc->int_ret_reg;
 			break;
 		case IR_IF_INT:
 		case IR_GUARD:
@@ -1548,16 +1692,21 @@ op2_const:
 			flags = IR_OP3_SHOULD_BE_IN_REG;
 			break;
 		case IR_RETURN_INT:
+			cc = ir_get_call_conv_dsc(ctx->flags);
 			flags = IR_OP2_SHOULD_BE_IN_REG;
-			constraints->hints[2] = IR_REG_INT_RET1;
+			constraints->hints[2] = cc->int_ret_reg;
 			constraints->hints_count = 3;
 			break;
 		case IR_RETURN_FP:
-#ifdef IR_REG_FP_RET1
-			flags = IR_OP2_SHOULD_BE_IN_REG;
-			constraints->hints[2] = IR_REG_FP_RET1;
-			constraints->hints_count = 3;
+			cc = ir_get_call_conv_dsc(ctx->flags);
+#ifdef IR_TARGET_X86
+			if (cc->fp_ret_reg != IR_REG_NONE)
 #endif
+			{
+				flags = IR_OP2_SHOULD_BE_IN_REG;
+				constraints->hints[2] = cc->fp_ret_reg;
+				constraints->hints_count = 3;
+			}
 			break;
 		case IR_SNAPSHOT:
 			flags = 0;
@@ -1888,20 +2037,12 @@ static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root)
 	}
 }

-static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct)
+static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root)
 {
-	if (direct) {
-		if (insn->op == IR_LT || insn->op == IR_LE) {
-			/* swap operands to avoid P flag check */
-			ir_swap_ops(insn);
-			insn->op ^= 3;
-		}
-	} else {
-		if (insn->op == IR_GT || insn->op == IR_GE) {
-			/* swap operands to avoid P flag check */
-			ir_swap_ops(insn);
-			insn->op ^= 3;
-		}
+	if (insn->op == IR_LT || insn->op == IR_LE || insn->op == IR_UGT || insn->op == IR_UGE) {
+		/* swap operands to avoid P flag check */
+		ir_swap_ops(insn);
+		insn->op ^= 3;
 	}
 	if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) {
 		/* pass */
@@ -1926,7 +2067,7 @@ static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func)
 {
 	const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);

-	if (proto->flags & IR_BUILTIN_FUNC) {
+	if ((proto->flags & IR_CALL_CONV_MASK) == IR_CC_BUILTIN) {
 		size_t name_len;
 		const char *name = ir_get_strl(ctx, func->val.name, &name_len);

@@ -2452,15 +2593,23 @@ binop_fp:
 				}
 			}
 			ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT;
-#ifndef IR_REG_FP_RET1
-			if (IR_IS_TYPE_FP(insn->type)) {
-				ctx->flags2 |= IR_HAS_FP_RET_SLOT;
-			}
-#endif
 			IR_FALLTHROUGH;
 		case IR_TAILCALL:
 		case IR_IJMP:
-			ir_match_fuse_load(ctx, insn->op2, ref);
+			if (!IR_IS_CONST_REF(insn->op2)) {
+				if (ctx->ir_base[insn->op2].op == IR_PROTO) {
+					if (IR_IS_CONST_REF(ctx->ir_base[insn->op2].op1)) {
+						ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_PROTO;
+					} else {
+						ir_match_fuse_load(ctx, ctx->ir_base[insn->op2].op1, ref);
+						if (ctx->rules[ctx->ir_base[insn->op2].op1] & IR_FUSED) {
+							ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_PROTO;
+						}
+				   }
+				} else {
+					ir_match_fuse_load(ctx, insn->op2, ref);
+				}
+			}
 			return insn->op;
 		case IR_IGOTO:
 			if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
@@ -2478,11 +2627,12 @@ binop_fp:
 		case IR_VAR:
 			return IR_STATIC_ALLOCA;
 		case IR_PARAM:
-#ifndef _WIN64
 			if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
-				return IR_STATIC_ALLOCA;
+				const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(ctx->flags);
+				if (cc->pass_struct_by_val) {
+					return IR_STATIC_ALLOCA;
+				}
 			}
-#endif
 			return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM;
 		case IR_ALLOCA:
 			/* alloca() may be used only in functions */
@@ -2767,7 +2917,7 @@ store_int:
 						return IR_CMP_AND_BRANCH_INT;
 					} else {
 						/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
-						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1);
+						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
 						return IR_CMP_AND_BRANCH_FP;
 					}
@@ -2864,7 +3014,7 @@ store_int:
 						ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT;
 						return IR_COND_CMP_INT;
 					} else {
-						ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref, 1);
+						ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
 						ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP;
 						return IR_COND_CMP_FP;
 					}
@@ -2956,7 +3106,7 @@ store_int:
 						return IR_GUARD_CMP_INT;
 					} else {
 						/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
-						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT);
+						ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
 						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
 						return IR_GUARD_CMP_FP;
 					}
@@ -3907,59 +4057,68 @@ static void ir_emit_prologue(ir_ctx *ctx)
 		}
 	}
 	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
-#if defined(_WIN64)
-		ir_reg fp;
-		int offset;
+		const ir_call_conv_dsc *cc = data->ra_data.cc;

-		if (ctx->flags & IR_USE_FRAME_POINTER) {
-			fp = IR_REG_FRAME_POINTER;
-			offset = sizeof(void*) * 2;
-		} else {
-			fp = IR_REG_STACK_POINTER;
-			offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*);
+		if (cc->shadow_store_size) {
+			ir_reg fp;
+			int shadow_store;
+			int offset = 0;
+			int n = 0;
+
+			if (ctx->flags & IR_USE_FRAME_POINTER) {
+				fp = IR_REG_FRAME_POINTER;
+				shadow_store = sizeof(void*) * 2;
+			} else {
+				fp = IR_REG_STACK_POINTER;
+				shadow_store = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*);
+			}
+
+			while (offset < cc->shadow_store_size && n < cc->int_param_regs_count) {
+				|	mov [Ra(fp)+shadow_store+offset], Ra(cc->int_param_regs[n])
+				n++;
+				offset += sizeof(void*);
+			}
 		}
-		|	mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1)
-		|	mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2)
-		|	mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3)
-		|	mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4)
-#elif defined(IR_TARGET_X64)
+
+		if (cc->sysv_varargs) {
+			IR_ASSERT(sizeof(void*) == 8);
+#ifdef IR_TARGET_X64
 |.if X64
-		const int8_t *int_reg_params = _ir_int_reg_params;
-		const int8_t *fp_reg_params = _ir_fp_reg_params;
-		uint32_t i;
-		ir_reg fp;
-		int offset;
+			int32_t i;
+			ir_reg fp;
+			int offset;

-		if (ctx->flags & IR_USE_FRAME_POINTER) {
-			fp = IR_REG_FRAME_POINTER;
+			if (ctx->flags & IR_USE_FRAME_POINTER) {
+				fp = IR_REG_FRAME_POINTER;

-			offset = -(ctx->stack_frame_size - ctx->locals_area_size);
-		} else {
-			fp = IR_REG_STACK_POINTER;
-			offset = ctx->locals_area_size + ctx->call_stack_size;
-		}
+				offset = -(ctx->stack_frame_size - ctx->locals_area_size);
+			} else {
+				fp = IR_REG_STACK_POINTER;
+				offset = ctx->locals_area_size + ctx->call_stack_size;
+			}

-		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-			/* skip named args */
-			offset += sizeof(void*) * ctx->gp_reg_params;
-			for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
-				|	mov qword [Ra(fp)+offset], Rq(int_reg_params[i])
-				offset += sizeof(void*);
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
+				/* skip named args */
+				offset += sizeof(void*) * ctx->gp_reg_params;
+				for (i = ctx->gp_reg_params; i < cc->int_param_regs_count; i++) {
+					|	mov qword [Ra(fp)+offset], Rq(cc->int_param_regs[i])
+					offset += sizeof(void*);
+				}
 			}
-		}
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-			|	test al, al
-			|	je	>1
-			/* skip named args */
-			offset += 16 * ctx->fp_reg_params;
-			for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
-				|	movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST)
-				offset += 16;
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+				|	test al, al
+				|	je	>1
+				/* skip named args */
+				offset += 16 * ctx->fp_reg_params;
+				for (i = ctx->fp_reg_params; i < cc->fp_param_regs_count; i++) {
+					|	movaps [Ra(fp)+offset], xmm(cc->fp_param_regs[i]-IR_REG_FP_FIRST)
+					offset += 16;
+				}
+				|1:
 			}
-			|1:
-		}
 |.endif
 #endif
+		}
 	}
 }

@@ -6995,27 +7154,26 @@ static void ir_emit_return_void(ir_ctx *ctx)

 	ir_emit_epilogue(ctx);

-#ifdef IR_TARGET_X86
-	if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) {
+	if (data->ra_data.cc->cleanup_stack_by_callee && ctx->param_stack_size) {
 		|	ret ctx->param_stack_size
-		return;
+	} else {
+		|	ret
 	}
-#endif
-
-	|	ret
 }

 static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
 {
+	ir_backend_data *data = ctx->data;
+	ir_reg ret_reg = data->ra_data.cc->int_ret_reg;
 	ir_reg op2_reg = ctx->regs[ref][2];

-	if (op2_reg != IR_REG_INT_RET1) {
+	if (op2_reg != ret_reg) {
 		ir_type type = ctx->ir_base[insn->op2].type;

 		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
-			ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg);
+			ir_emit_mov(ctx, type, ret_reg, op2_reg);
 		} else {
-			ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2);
+			ir_emit_load(ctx, type, ret_reg, insn->op2);
 		}
 	}
 	ir_emit_return_void(ctx);
@@ -7023,64 +7181,68 @@ static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)

 static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
 {
+	ir_backend_data *data = ctx->data;
 	ir_reg op2_reg = ctx->regs[ref][2];
 	ir_type type = ctx->ir_base[insn->op2].type;
+	ir_reg ret_reg = data->ra_data.cc->fp_ret_reg;

-#ifdef IR_REG_FP_RET1
-	if (op2_reg != IR_REG_FP_RET1) {
+	if (op2_reg != ret_reg && ret_reg != IR_REG_NONE) {
 		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
-			ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg);
+			ir_emit_fp_mov(ctx, type, ret_reg, op2_reg);
 		} else {
-			ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2);
+			ir_emit_load(ctx, type, ret_reg, insn->op2);
 		}
 	}
-#else
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;

-	if (IR_IS_CONST_REF(insn->op2)) {
-		ir_insn *value = &ctx->ir_base[insn->op2];
+#ifdef IR_TARGET_X86
+	if (ret_reg == IR_REG_NONE) {
+		dasm_State **Dst = &data->dasm_state;

-		if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) {
-			|	fldz
-		} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
-			|	fld1
-		} else {
-			int label = ir_get_const_label(ctx, insn->op2);
+		if (IR_IS_CONST_REF(insn->op2)) {
+			ir_insn *value = &ctx->ir_base[insn->op2];
+
+			if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) {
+				|	fldz
+			} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
+				|	fld1
+			} else {
+				int label = ir_get_const_label(ctx, insn->op2);
+
+				if (type == IR_DOUBLE) {
+					|	fld qword [=>label]
+				} else {
+					IR_ASSERT(type == IR_FLOAT);
+					|	fld dword [=>label]
+				}
+			}
+		} else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
+			ir_reg fp;
+			int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);

 			if (type == IR_DOUBLE) {
-				|	fld qword [=>label]
+				|	fld qword [Ra(fp)+offset]
 			} else {
 				IR_ASSERT(type == IR_FLOAT);
-				|	fld dword [=>label]
+				|	fld dword [Ra(fp)+offset]
 			}
-		}
-	} else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
-		ir_reg fp;
-		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);
-
-		if (type == IR_DOUBLE) {
-			|	fld qword [Ra(fp)+offset]
 		} else {
-			IR_ASSERT(type == IR_FLOAT);
-			|	fld dword [Ra(fp)+offset]
-		}
-	} else {
-		int32_t offset = ctx->ret_slot;
-		ir_reg fp;
+			int32_t offset = ctx->ret_slot;
+			ir_reg fp;

-		IR_ASSERT(offset != -1);
-		offset = IR_SPILL_POS_TO_OFFSET(offset);
-		fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg);
-		if (type == IR_DOUBLE) {
-			|	fld qword [Ra(fp)+offset]
-		} else {
-			IR_ASSERT(type == IR_FLOAT);
-			|	fld dword [Ra(fp)+offset]
+			IR_ASSERT(offset != -1);
+			offset = IR_SPILL_POS_TO_OFFSET(offset);
+			fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg);
+			if (type == IR_DOUBLE) {
+				|	fld qword [Ra(fp)+offset]
+			} else {
+				IR_ASSERT(type == IR_FLOAT);
+				|	fld dword [Ra(fp)+offset]
+			}
 		}
 	}
 #endif
+
 	ir_emit_return_void(ctx);
 }

@@ -8555,327 +8717,323 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)

 static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#if defined(_WIN64) || defined(IR_TARGET_X86)
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_reg fp;
-	int arg_area_offset;
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+	if (!cc->sysv_varargs) {
+		ir_reg fp;
+		int arg_area_offset;
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;
+
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}

-	if (ctx->flags & IR_USE_FRAME_POINTER) {
-		fp = IR_REG_FRAME_POINTER;
-		arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size;
+		if (ctx->flags & IR_USE_FRAME_POINTER) {
+			fp = IR_REG_FRAME_POINTER;
+			arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size;
+		} else {
+			fp = IR_REG_STACK_POINTER;
+			arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size;
+		}
+		|	lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset]
+		|	mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
 	} else {
-		fp = IR_REG_STACK_POINTER;
-		arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size;
-	}
-	|	lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset]
-	|	mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
-#elif defined(IR_TARGET_X64)
+		IR_ASSERT(sizeof(void*) == 8);
+#ifdef IR_TARGET_X64
 |.if X64
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_reg fp;
-	int reg_save_area_offset;
-	int overflow_arg_area_offset;
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	bool have_reg_save_area = 0;
-	int32_t offset;
+		ir_reg fp;
+		int reg_save_area_offset;
+		int overflow_arg_area_offset;
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		bool have_reg_save_area = 0;
+		int32_t offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}

-	if (ctx->flags & IR_USE_FRAME_POINTER) {
-		fp = IR_REG_FRAME_POINTER;
-		reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size);
-		overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size;
-	} else {
-		fp = IR_REG_STACK_POINTER;
-		reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
-		overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size;
-	}
+		if (ctx->flags & IR_USE_FRAME_POINTER) {
+			fp = IR_REG_FRAME_POINTER;
+			reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size);
+			overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size;
+		} else {
+			fp = IR_REG_STACK_POINTER;
+			reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
+			overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size;
+		}

-	if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-		|	lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset]
-		have_reg_save_area = 1;
-		/* Set va_list.gp_offset */
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params
-	} else {
-		reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS;
-		/* Set va_list.gp_offset */
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * IR_REG_INT_ARGS
-	}
-	if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-		if (!have_reg_save_area) {
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
 			|	lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset]
 			have_reg_save_area = 1;
+			/* Set va_list.gp_offset */
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params
+		} else {
+			reg_save_area_offset -= sizeof(void*) * cc->int_param_regs_count;
+			/* Set va_list.gp_offset */
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], sizeof(void*) * cc->int_param_regs_count
 		}
-		/* Set va_list.fp_offset */
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params
-	} else {
-		/* Set va_list.fp_offset */
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS
-	}
-	if (have_reg_save_area) {
-		/* Set va_list.reg_save_area */
-		|	mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg)
-	}
-	|	lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset]
-	/* Set va_list.overflow_arg_area */
-	|	mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
+		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+			if (!have_reg_save_area) {
+				|	lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset]
+				have_reg_save_area = 1;
+			}
+			/* Set va_list.fp_offset */
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], sizeof(void*) * cc->int_param_regs_count + 16 * ctx->fp_reg_params
+		} else {
+			/* Set va_list.fp_offset */
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], sizeof(void*) * cc->int_param_regs_count + 16 * cc->fp_param_regs_count
+		}
+		if (have_reg_save_area) {
+			/* Set va_list.reg_save_area */
+			|	mov qword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))], Ra(tmp_reg)
+		}
+		|	lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset]
+		/* Set va_list.overflow_arg_area */
+		|	mov qword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg)
 |.endif
-#else
-	IR_ASSERT(0 && "NIY va_start");
 #endif
+	}
 }

 static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#if defined(_WIN64) || defined(IR_TARGET_X86)
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_reg tmp_reg = ctx->regs[def][1];
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg op3_reg = ctx->regs[def][3];
-	int32_t op2_offset, op3_offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+	if (!cc->sysv_varargs) {
+		ir_reg tmp_reg = ctx->regs[def][1];
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg op3_reg = ctx->regs[def][3];
+		int32_t op2_offset, op3_offset;
+
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			op2_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		op2_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	if (op3_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op3_reg)) {
-			op3_reg = IR_REG_NUM(op3_reg);
-			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+		if (op3_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op3_reg)) {
+				op3_reg = IR_REG_NUM(op3_reg);
+				ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+			}
+			op3_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
+			op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
 		}
-		op3_offset = 0;
+		|	mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset]
+		|	mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg)
 	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
-		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
-	}
-	|	mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset]
-	|	mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg)
-#elif defined(IR_TARGET_X64)
+		IR_ASSERT(sizeof(void*) == 8);
+#ifdef IR_TARGET_X64
 |.if X64
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_reg tmp_reg = ctx->regs[def][1];
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg op3_reg = ctx->regs[def][3];
-	int32_t op2_offset, op3_offset;
+		ir_reg tmp_reg = ctx->regs[def][1];
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg op3_reg = ctx->regs[def][3];
+		int32_t op2_offset, op3_offset;

-	IR_ASSERT(tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
-		}
-		op2_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	if (op3_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op3_reg)) {
-			op3_reg = IR_REG_NUM(op3_reg);
-			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+		IR_ASSERT(tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			op2_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		op3_offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
-		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
-	}
-	|	mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))]
-	|	mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg)
-	|	mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, fp_offset))]
-	|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, fp_offset))], Ra(tmp_reg)
-	|	mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, overflow_arg_area))]
-	|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
-	|	mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, reg_save_area))]
-	|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg)
+		if (op3_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op3_reg)) {
+				op3_reg = IR_REG_NUM(op3_reg);
+				ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
+			}
+			op3_offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
+			op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
+		}
+		|	mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))]
+		|	mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], Rd(tmp_reg)
+		|	mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))]
+		|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], Ra(tmp_reg)
+		|	mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))]
+		|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg)
+		|	mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))]
+		|	mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))], Ra(tmp_reg)
 |.endif
-#else
-	IR_ASSERT(0 && "NIY va_copy");
 #endif
+	}
 }

 static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-#if defined(_WIN64) || defined(IR_TARGET_X86)
 	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	dasm_State **Dst = &data->dasm_state;
-	ir_type type = insn->type;
-	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;

-	IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+	if (!cc->sysv_varargs) {
+		ir_type type = insn->type;
+		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;
+
+		IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
 		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	|	mov Ra(tmp_reg), aword [Ra(op2_reg)+offset]
-#ifdef _WIN64
-	if (def_reg != IR_REG_NONE) {
-		ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg));
-	}
-	|	add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
-#else
-	if (!insn->op3) {
-		if (def_reg != IR_REG_NONE) {
-			ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg));
+		|	mov Ra(tmp_reg), aword [Ra(op2_reg)+offset]
+		if (!cc->pass_struct_by_val || !insn->op3) {
+			if (def_reg != IR_REG_NONE) {
+				ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg));
+			}
+			|	add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
+		} else {
+			int size = IR_VA_ARG_SIZE(insn->op3);
+
+			if (def_reg != IR_REG_NONE) {
+				IR_ASSERT(type == IR_ADDR);
+				int align = IR_VA_ARG_ALIGN(insn->op3);
+
+				if (align > (int)sizeof(void*)) {
+					|	add Ra(tmp_reg), (align-1)
+					|	and Ra(tmp_reg), ~(align-1)
+				}
+				|	mov Ra(def_reg), Ra(tmp_reg)
+			}
+			|	add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
+		}
+		|	mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
+		if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
+			ir_emit_store(ctx, type, def, def_reg);
 		}
-		|	add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
 	} else {
-		int size = IR_VA_ARG_SIZE(insn->op3);
+		IR_ASSERT(sizeof(void*) == 8);
+#ifdef IR_TARGET_X64
+|.if X64
+		ir_type type = insn->type;
+		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
+		ir_reg op2_reg = ctx->regs[def][2];
+		ir_reg tmp_reg = ctx->regs[def][3];
+		int32_t offset;

-		if (def_reg != IR_REG_NONE) {
+		IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
+		if (op2_reg != IR_REG_NONE) {
+			if (IR_REG_SPILLED(op2_reg)) {
+				op2_reg = IR_REG_NUM(op2_reg);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+			}
+			offset = 0;
+		} else {
+			IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
+			op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
+			offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
+		}
+		if (insn->op3) {
+			/* long struct arguemnt */
 			IR_ASSERT(type == IR_ADDR);
 			int align = IR_VA_ARG_ALIGN(insn->op3);
+			int size = IR_VA_ARG_SIZE(insn->op3);

+			|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))]
 			if (align > (int)sizeof(void*)) {
 				|	add Ra(tmp_reg), (align-1)
 				|	and Ra(tmp_reg), ~(align-1)
 			}
-			|	mov Ra(def_reg), Ra(tmp_reg)
-		}
-		|	add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
-	}
-#endif
-	|	mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
-	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
-		ir_emit_store(ctx, type, def, def_reg);
-	}
-#elif defined(IR_TARGET_X64)
-|.if X64
-	ir_backend_data *data = ctx->data;
-	dasm_State **Dst = &data->dasm_state;
-	ir_type type = insn->type;
-	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
-	ir_reg op2_reg = ctx->regs[def][2];
-	ir_reg tmp_reg = ctx->regs[def][3];
-	int32_t offset;
-
-	IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
-	if (op2_reg != IR_REG_NONE) {
-		if (IR_REG_SPILLED(op2_reg)) {
-			op2_reg = IR_REG_NUM(op2_reg);
-			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
-		}
-		offset = 0;
-	} else {
-		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
-		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
-		offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
-	}
-	if (insn->op3) {
-		/* long struct arguemnt */
-		IR_ASSERT(type == IR_ADDR);
-		int align = IR_VA_ARG_ALIGN(insn->op3);
-		int size = IR_VA_ARG_SIZE(insn->op3);
-
-		|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))]
-		if (align > (int)sizeof(void*)) {
-			|	add Ra(tmp_reg), (align-1)
-			|	and Ra(tmp_reg), ~(align-1)
-		}
-		if (def_reg != IR_REG_NONE) {
-			|	mov Ra(def_reg), Ra(tmp_reg)
-		}
-		|	add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
-		|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
-	} else if (IR_IS_TYPE_INT(type)) {
-		|	mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))]
-		|	cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS
-		|	jge >1
-		|	add Rd(tmp_reg), sizeof(void*)
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg)
-		|	add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))]
-		|	jmp >2
-		|1:
-		|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))]
-		|	add Ra(tmp_reg), sizeof(void*)
-		|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
-		|2:
-		if (def_reg != IR_REG_NONE) {
-			if (ir_type_size[type] == 8) {
-				|	mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)]
-			} else {
-				|	mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)]
+			if (def_reg != IR_REG_NONE) {
+				|	mov Ra(def_reg), Ra(tmp_reg)
+			}
+			|	add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
+			|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg)
+		} else if (IR_IS_TYPE_INT(type)) {
+			|	mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))]
+			|	cmp Rd(tmp_reg), sizeof(void*) * cc->int_param_regs_count
+			|	jge >1
+			|	add Rd(tmp_reg), sizeof(void*)
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], Rd(tmp_reg)
+			|	add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))]
+			|	jmp >2
+			|1:
+			|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))]
+			|	add Ra(tmp_reg), sizeof(void*)
+			|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg)
+			|2:
+			if (def_reg != IR_REG_NONE) {
+				if (ir_type_size[type] == 8) {
+					|	mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)]
+				} else {
+					|	mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)]
+				}
 			}
+		} else {
+			|	mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))]
+			|	cmp Rd(tmp_reg), sizeof(void*) * cc->int_param_regs_count + 16 * cc->fp_param_regs_count
+			|	jge >1
+			|	add Rd(tmp_reg), 16
+			|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], Rd(tmp_reg)
+			|	add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))]
+			if (def_reg != IR_REG_NONE) {
+				ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16));
+			}
+			|	jmp >2
+			|1:
+			|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))]
+			if (def_reg != IR_REG_NONE) {
+				ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
+			}
+			|	add Ra(tmp_reg), 8
+			|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg)
+			|2:
 		}
-	} else {
-		|	mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))]
-		|	cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS
-		|	jge >1
-		|	add Rd(tmp_reg), 16
-		|	mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg)
-		|	add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))]
-		if (def_reg != IR_REG_NONE) {
-			ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16));
-		}
-		|	jmp >2
-		|1:
-		|	mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))]
-		if (def_reg != IR_REG_NONE) {
-			ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
+		if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
+			ir_emit_store(ctx, type, def, def_reg);
 		}
-		|	add Ra(tmp_reg), 8
-		|	mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
-		|2:
-	}
-	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
-		ir_emit_store(ctx, type, def, def_reg);
-	}
 |.endif
-#else
-	IR_ASSERT(0 && "NIY va_arg");
 #endif
+	}
 }

 static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
@@ -9104,7 +9262,9 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 				val = &ctx->ir_base[use_insn->op2];
 				IR_ASSERT(!IR_IS_SYM_CONST(val->op));
 				label = ir_skip_empty_target_blocks(ctx, use_block);
-				if (IR_IS_32BIT(type, val->val)) {
+				if (val->val.u64 == 0) {
+					|	ASM_REG_REG_OP test, type, op2_reg, op2_reg
+				} else if (IR_IS_32BIT(type, val->val)) {
 					|	ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32
 				} else {
 					IR_ASSERT(sizeof(void*) == 8);
@@ -9158,25 +9318,14 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 	}
 }

-static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_ptr)
+static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, const ir_call_conv_dsc *cc, int *copy_stack_ptr)
 {
 	int j, n;
 	ir_type type;
 	int int_param = 0;
 	int fp_param = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
 	int32_t used_stack = 0;
-#ifdef _WIN64
 	int32_t copy_stack = 0;
-#endif
-
-#ifdef IR_HAVE_FASTCALL
-	if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-	}
-#endif

 	n = insn->inputs_count;
 	for (j = 3; j <= n; j++) {
@@ -9187,55 +9336,49 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_pt
 				int size = arg->op2;
 				int align = arg->op3;

-#ifdef _WIN64
-				copy_stack += size;
-				align = IR_MAX((int)sizeof(void*), align);
-				copy_stack = IR_ALIGNED_SIZE(copy_stack, align);
-				type = IR_ADDR;
-#else
-				align = IR_MAX((int)sizeof(void*), align);
-				used_stack = IR_ALIGNED_SIZE(used_stack, align);
-				used_stack += size;
-				used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*));
-				continue;
-#endif
+				if (!cc->pass_struct_by_val) {
+					copy_stack += size;
+					align = IR_MAX((int)sizeof(void*), align);
+					copy_stack = IR_ALIGNED_SIZE(copy_stack, align);
+					type = IR_ADDR;
+				} else {
+					align = IR_MAX((int)sizeof(void*), align);
+					used_stack = IR_ALIGNED_SIZE(used_stack, align);
+					used_stack += size;
+					used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*));
+					continue;
+				}
 			}
-			if (int_param >= int_reg_params_count) {
+			if (int_param >= cc->int_param_regs_count) {
 				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
 			}
 			int_param++;
-#ifdef _WIN64
-			/* WIN64 calling convention use common couter for int and fp registers */
-			fp_param++;
-#endif
+			if (cc->shadow_param_regs) {
+				fp_param++;
+			}
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(type));
-			if (fp_param >= fp_reg_params_count) {
+			if (fp_param >= cc->fp_param_regs_count) {
 				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
 			}
 			fp_param++;
-#ifdef _WIN64
-			/* WIN64 calling convention use common couter for int and fp registers */
-			int_param++;
-#endif
+			if (cc->shadow_param_regs) {
+				int_param++;
+			}
 		}
 	}

 	/* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */
-	used_stack += IR_SHADOW_ARGS;
+	used_stack += cc->shadow_store_size;

-#ifdef _WIN64
 	copy_stack = IR_ALIGNED_SIZE(copy_stack, 16);
 	used_stack += copy_stack;
 	*copy_stack_ptr = copy_stack;
-#else
-	*copy_stack_ptr = 0;
-#endif

 	return used_stack;
 }

-static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
+static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_proto_t *proto, const ir_call_conv_dsc *cc, ir_reg tmp_reg)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
@@ -9247,11 +9390,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 	int int_param = 0;
 	int fp_param = 0;
 	int count = 0;
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
-	int32_t used_stack, copy_stack = 0, stack_offset = IR_SHADOW_ARGS;
+	int32_t used_stack, copy_stack = 0, stack_offset = cc->shadow_store_size;
 	ir_copy *copies;
 	bool do_pass3 = 0;
 	/* For temporaries we may use any scratch registers except for registers used for parameters */
@@ -9266,40 +9405,24 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		tmp_reg = IR_REG_RAX;
 	}

-#ifdef IR_HAVE_FASTCALL
-	if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-		int_reg_params = _ir_int_fc_reg_params;
-		fp_reg_params = _ir_fp_fc_reg_params;
-	}
-#endif
-
 	if (insn->op == IR_CALL
-	 && (ctx->flags & IR_PREALLOCATED_STACK)
-#ifdef IR_HAVE_FASTCALL
-	 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */
-#endif
-	) {
-		// TODO: support for preallocated stack
-#ifdef _WIN64
-		used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
-#else
-		used_stack = 0;
-#endif
+	 && (ctx->flags2 & IR_PREALLOCATED_STACK)
+	 && !cc->cleanup_stack_by_callee) {
+		if (!cc->pass_struct_by_val) {
+			used_stack = ir_call_used_stack(ctx, insn, cc, &copy_stack);
+		} else {
+			used_stack = 0;
+		}
 	} else {
-		used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
-		if (IR_SHADOW_ARGS
+		used_stack = ir_call_used_stack(ctx, insn, cc, &copy_stack);
+		if (cc->shadow_store_size
 		 && insn->op == IR_TAILCALL
-		 && used_stack == IR_SHADOW_ARGS) {
+		 && used_stack == cc->shadow_store_size) {
 			used_stack = 0;
 		}
 		if (ctx->fixed_call_stack_size
 		 && used_stack <= ctx->fixed_call_stack_size
-#ifdef IR_HAVE_FASTCALL
-		 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */
-#endif
-		) {
+		 && !cc->cleanup_stack_by_callee) {
 			used_stack = 0;
 		} else {
 			/* Stack must be 16 byte aligned */
@@ -9311,10 +9434,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		}
 	}

-#ifdef _WIN64
-|.if X64
 	if (copy_stack) {
 		/* Copy struct arguments */
+		IR_ASSERT(sizeof(void*) == 8);
+|.if X64
 		int copy_stack_offset = 0;

 		for (j = 3; j <= n; j++) {
@@ -9347,9 +9470,8 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 				|	rep; movsb
 			}
 		}
-	}
 |.endif
-#endif
+	}

 	/* 1. move all register arguments that should be passed through stack
 	 *    and collect arguments that should be passed through registers */
@@ -9360,8 +9482,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		arg_insn = &ctx->ir_base[arg];
 		type = arg_insn->type;
 		if (IR_IS_TYPE_INT(type)) {
-#ifndef _WIN64
-			if (arg_insn->op == IR_ARGVAL) {
+			if (arg_insn->op == IR_ARGVAL && cc->pass_struct_by_val) {
 				int size = arg_insn->op2;
 				int align = arg_insn->op3;
 				align = IR_MAX((int)sizeof(void*), align);
@@ -9408,38 +9529,35 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 				stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
 				continue;
 			}
-#endif
-			if (int_param < int_reg_params_count) {
-				dst_reg = int_reg_params[int_param];
+			if (int_param < cc->int_param_regs_count) {
+				dst_reg = cc->int_param_regs[int_param];
 			} else {
 				dst_reg = IR_REG_NONE; /* pass argument through stack */
 			}
 			int_param++;
-#ifdef _WIN64
-			/* WIN64 calling convention use common couter for int and fp registers */
-			fp_param++;
-			if (arg_insn->op == IR_ARGVAL) {
+			if (cc->shadow_param_regs) {
+				fp_param++;
+			}
+			if (arg_insn->op == IR_ARGVAL && !cc->pass_struct_by_val) {
 				do_pass3 = 3;
 				continue;
 			}
-#endif
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(type));
-			if (fp_param < fp_reg_params_count) {
-				dst_reg = fp_reg_params[fp_param];
+			if (fp_param < cc->fp_param_regs_count) {
+				dst_reg = cc->fp_param_regs[fp_param];
 			} else {
 				dst_reg = IR_REG_NONE; /* pass argument through stack */
 			}
 			fp_param++;
-#ifdef _WIN64
-			/* WIN64 calling convention use common couter for int and fp registers */
-			int_param++;
-#endif
+			if (cc->shadow_param_regs) {
+				int_param++;
+			}
 		}
 		if (dst_reg != IR_REG_NONE) {
 			if (IR_IS_CONST_REF(arg) ||
 			    src_reg == IR_REG_NONE ||
-			    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) {
+			    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) {
 				/* delay CONST->REG and MEM->REG moves to third pass */
 				do_pass3 = 1;
 			} else {
@@ -9474,11 +9592,9 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg

 	/* 3. move the remaining memory and immediate values */
 	if (do_pass3) {
-#ifdef _WIN64
 		int copy_stack_offset = 0;
-#endif

-		stack_offset = IR_SHADOW_ARGS;
+		stack_offset = cc->shadow_store_size;
 		int_param = 0;
 		fp_param = 0;
 		for (j = 3; j <= n; j++) {
@@ -9491,60 +9607,57 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 					int size = arg_insn->op2;
 					int align = arg_insn->op3;

-#ifndef _WIN64
-					align = IR_MAX((int)sizeof(void*), align);
-					stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
-					stack_offset += size;
-					stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
-					continue;
-#else
-|.if X64
-					/* pass pointer to the copy on stack */
-					copy_stack_offset += size;
-					align = IR_MAX((int)sizeof(void*), align);
-					copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
-					if (int_param < int_reg_params_count) {
-						dst_reg = int_reg_params[int_param];
-						|	lea Ra(dst_reg), [rsp + (used_stack - copy_stack_offset)]
+					if (cc->pass_struct_by_val) {
+						align = IR_MAX((int)sizeof(void*), align);
+						stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
+						stack_offset += size;
+						stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
+						continue;
 					} else {
-						|	lea Ra(tmp_reg), [rsp + (used_stack - copy_stack_offset)]
-						ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
-						stack_offset += sizeof(void*);
+						/* pass pointer to the copy on stack */
+						copy_stack_offset += size;
+						align = IR_MAX((int)sizeof(void*), align);
+						copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
+						if (int_param < cc->int_param_regs_count) {
+							dst_reg = cc->int_param_regs[int_param];
+							|	lea Ra(dst_reg), [r4 + (used_stack - copy_stack_offset)]
+						} else {
+							|	lea Ra(tmp_reg), [r4 + (used_stack - copy_stack_offset)]
+							ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
+							stack_offset += sizeof(void*);
+						}
+						int_param++;
+						if (cc->shadow_param_regs) {
+							fp_param++;
+						}
+						continue;
 					}
-					int_param++;
-					/* WIN64 calling convention use common couter for int and fp registers */
-					fp_param++;
-					continue;
-|.endif
-#endif
 				}
-				if (int_param < int_reg_params_count) {
-					dst_reg = int_reg_params[int_param];
+				if (int_param < cc->int_param_regs_count) {
+					dst_reg = cc->int_param_regs[int_param];
 				} else {
 					dst_reg = IR_REG_NONE; /* argument already passed through stack */
 				}
 				int_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				fp_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					fp_param++;
+				}
 			} else {
 				IR_ASSERT(IR_IS_TYPE_FP(type));
-				if (fp_param < fp_reg_params_count) {
-					dst_reg = fp_reg_params[fp_param];
+				if (fp_param < cc->fp_param_regs_count) {
+					dst_reg = cc->fp_param_regs[fp_param];
 				} else {
 					dst_reg = IR_REG_NONE; /* argument already passed through stack */
 				}
 				fp_param++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				int_param++;
-#endif
+				if (cc->shadow_param_regs) {
+					int_param++;
+				}
 			}
 			if (dst_reg != IR_REG_NONE) {
 				if (IR_IS_CONST_REF(arg) ||
 				    src_reg == IR_REG_NONE ||
-				    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) {
+				    (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) {
 					if (IR_IS_TYPE_INT(type)) {
 						if (IR_IS_CONST_REF(arg)) {
 							if (type == IR_I8 || type == IR_I16) {
@@ -9612,17 +9725,16 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 		}
 	}

-#ifdef _WIN64
 	/* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */
-	if (ir_is_vararg(ctx, insn)) {
-		n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
+	if (proto && (proto->flags & IR_VARARG_FUNC) && cc->shadow_param_regs) {
+		n = IR_MIN(n, IR_MIN(cc->int_param_regs_count, cc->fp_param_regs_count) + 2);
 		for (j = 3; j <= n; j++) {
 			arg = ir_insn_op(insn, j);
 			arg_insn = &ctx->ir_base[arg];
 			type = arg_insn->type;
 			if (IR_IS_TYPE_FP(type)) {
-				src_reg = fp_reg_params[j-3];
-				dst_reg = int_reg_params[j-3];
+				src_reg = cc->fp_param_regs[j-3];
+				dst_reg = cc->int_param_regs[j-3];
 |.if X64
 				if (ctx->mflags & IR_X86_AVX) {
 					|	vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST)
@@ -9633,41 +9745,46 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
 			}
 		}
 	}
-	if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) {
+
+	if (insn->op == IR_CALL && (ctx->flags2 & IR_PREALLOCATED_STACK)) {
 		used_stack = 0;
 	}
-#endif
-#ifdef IR_REG_VARARG_FP_REGS
-	/* set hidden argument to specify the number of vector registers used */
-	if (ir_is_vararg(ctx, insn)) {
-		fp_param = IR_MIN(fp_param, fp_reg_params_count);
-		|	mov Rd(IR_REG_VARARG_FP_REGS), fp_param
+
+	if (proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) {
+		/* set hidden argument to specify the number of vector registers used */
+		fp_param = IR_MIN(fp_param, cc->fp_param_regs_count);
+		if (fp_param) {
+			|	mov Rd(cc->fp_varargs_reg), fp_param
+		} else {
+			|	xor Rd(cc->fp_varargs_reg), Rd(cc->fp_varargs_reg)
+		}
 	}
-#endif

 	return used_stack;
 }

-static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack)
+static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_proto_t *proto, const ir_call_conv_dsc *cc, int32_t used_stack)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
 	ir_reg def_reg;
+	ir_ref func = insn->op2;

-	if (IR_IS_CONST_REF(insn->op2)) {
-		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
+	if (!IR_IS_CONST_REF(func) && ctx->rules[func] == (IR_FUSED | IR_SIMPLE | IR_PROTO)) {
+		func = ctx->ir_base[func].op1;
+	}
+	if (IR_IS_CONST_REF(func)) {
+		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[func]);

 		if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) {
 			|	call aword &addr
 		} else {
 |.if X64
-||			ir_reg tmp_reg = IR_REG_RAX;
-
-#ifdef IR_REG_VARARG_FP_REGS
-||			if (ir_is_vararg(ctx, insn)) {
-||				tmp_reg = IR_REG_R11;
+||			ir_reg tmp_reg = cc->int_ret_reg;
+||
+||			if (proto && (proto->flags & IR_VARARG_FUNC) && tmp_reg == cc->fp_varargs_reg) {
+||				tmp_reg = IR_REG_R11; // TODO: avoid usage of hardcoded temporary register ???
 ||			}
-#endif
 ||			if (IR_IS_SIGNED_32BIT(addr)) {
 				|	mov Rq(tmp_reg), ((ptrdiff_t)addr)    // 0x48 0xc7 0xc0 <imm-32-bit>
 ||			} else {
@@ -9682,16 +9799,16 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used
 		if (op2_reg != IR_REG_NONE) {
 			if (IR_REG_SPILLED(op2_reg)) {
 				op2_reg = IR_REG_NUM(op2_reg);
-				ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+				ir_emit_load(ctx, IR_ADDR, op2_reg, func);
 			}
 			|	call Ra(op2_reg)
 		} else {
 			ir_mem mem;

-			if (ir_rule(ctx, insn->op2) & IR_FUSED) {
-				mem = ir_fuse_load(ctx, def, insn->op2);
+			if (ir_rule(ctx, func) & IR_FUSED) {
+				mem = ir_fuse_load(ctx, def, func);
 			} else {
-				mem = ir_ref_spill_slot(ctx, insn->op2);
+				mem = ir_ref_spill_slot(ctx, func);
 			}

 			|	ASM_TMEM_OP call, aword, mem
@@ -9702,7 +9819,7 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used
 		int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16);

 		ctx->call_stack_size -= aligned_stack;
-		if (ir_is_fastcall(ctx, insn)) {
+		if (cc->cleanup_stack_by_callee) {
 			aligned_stack -= used_stack;
 			if (aligned_stack) {
 				|	add Ra(IR_REG_RSP), aligned_stack
@@ -9716,31 +9833,32 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used
 		if (IR_IS_TYPE_INT(insn->type)) {
 			def_reg = IR_REG_NUM(ctx->regs[def][0]);
 			if (def_reg != IR_REG_NONE) {
-				if (def_reg != IR_REG_INT_RET1) {
-					ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
+				if (def_reg != cc->int_ret_reg) {
+					ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg);
 				}
 				if (IR_REG_SPILLED(ctx->regs[def][0])) {
 					ir_emit_store(ctx, insn->type, def, def_reg);
 				}
 			} else if (ctx->use_lists[def].count > 1) {
-				ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1);
+				ir_emit_store(ctx, insn->type, def, cc->int_ret_reg);
 			}
 		} else {
 			IR_ASSERT(IR_IS_TYPE_FP(insn->type));
 			def_reg = IR_REG_NUM(ctx->regs[def][0]);
-#ifdef IR_REG_FP_RET1
-			if (def_reg != IR_REG_NONE) {
-				if (def_reg != IR_REG_FP_RET1) {
-					ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1);
-				}
-				if (IR_REG_SPILLED(ctx->regs[def][0])) {
-					ir_emit_store(ctx, insn->type, def, def_reg);
+			if (cc->fp_ret_reg != IR_REG_NONE) {
+				if (def_reg != IR_REG_NONE) {
+					if (def_reg != cc->fp_ret_reg) {
+						ir_emit_fp_mov(ctx, insn->type, def_reg, cc->fp_ret_reg);
+					}
+					if (IR_REG_SPILLED(ctx->regs[def][0])) {
+						ir_emit_store(ctx, insn->type, def, def_reg);
+					}
+				} else if (ctx->use_lists[def].count > 1) {
+					ir_emit_store(ctx, insn->type, def, cc->fp_ret_reg);
 				}
-			} else if (ctx->use_lists[def].count > 1) {
-				ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
 			}
-#else
-			if (ctx->use_lists[def].count > 1) {
+#ifdef IR_TARGET_X86
+			if (ctx->use_lists[def].count > 1 && cc->fp_ret_reg == IR_REG_NONE) {
 				int32_t offset;
 				ir_reg fp;

@@ -9776,18 +9894,23 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used

 static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
-	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
-	ir_emit_call_ex(ctx, def, insn, used_stack);
+	const ir_proto_t *proto = ir_call_proto(ctx, insn);
+	const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+	int32_t used_stack = ir_emit_arguments(ctx, def, insn, proto, cc, ctx->regs[def][1]);
+	ir_emit_call_ex(ctx, def, insn, proto, cc, used_stack);
 }

 static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
 	dasm_State **Dst = &data->dasm_state;
-	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
+	const ir_proto_t *proto = ir_call_proto(ctx, insn);
+	const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+	int32_t used_stack = ir_emit_arguments(ctx, def, insn, proto, cc, ctx->regs[def][1]);
+	ir_ref func = insn->op2;

 	if (used_stack != 0) {
-		ir_emit_call_ex(ctx, def, insn, used_stack);
+		ir_emit_call_ex(ctx, def, insn, proto, cc, used_stack);
 		ir_emit_return_void(ctx);
 		return;
 	}
@@ -9797,7 +9920,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)

 	ir_reg op2_reg = IR_REG_NONE;
 	ir_mem mem = IR_MEM_B(IR_REG_NONE);
-	if (!IR_IS_CONST_REF(insn->op2)) {
+	if (!IR_IS_CONST_REF(func) && ctx->rules[func] == (IR_FUSED | IR_SIMPLE | IR_PROTO)) {
+		func = ctx->ir_base[func].op1;
+	}
+	if (!IR_IS_CONST_REF(func)) {
 		op2_reg = ctx->regs[def][2];

 		ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER);
@@ -9807,7 +9933,7 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)

 		bool is_spill_slot = op2_reg != IR_REG_NONE
 			&& IR_REG_SPILLED(op2_reg)
-			&& ctx->vregs[insn->op2];
+			&& ctx->vregs[func];

 		if (op2_reg != IR_REG_NONE && !is_spill_slot) {
 			if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) {
@@ -9815,20 +9941,20 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 				op2_reg = IR_REG_RAX;

 				if (IR_REG_SPILLED(orig_op2_reg)) {
-					ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
+					ir_emit_load(ctx, IR_ADDR, op2_reg, func);
 				} else {
-					ir_type type = ctx->ir_base[insn->op2].type;
+					ir_type type = ctx->ir_base[func].type;
 					| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
 				}
 			} else {
 				op2_reg = IR_REG_NUM(op2_reg);
 			}
 		} else {
-			if (ir_rule(ctx, insn->op2) & IR_FUSED) {
+			if (ir_rule(ctx, func) & IR_FUSED) {
 				IR_ASSERT(op2_reg == IR_REG_NONE);
-				mem = ir_fuse_load(ctx, def, insn->op2);
+				mem = ir_fuse_load(ctx, def, func);
 			} else {
-				mem = ir_ref_spill_slot(ctx, insn->op2);
+				mem = ir_ref_spill_slot(ctx, func);
 			}
 			ir_reg base = IR_MEM_BASE(mem);
 			ir_reg index = IR_MEM_INDEX(mem);
@@ -9836,7 +9962,7 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 					(index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) {
 				op2_reg = IR_REG_RAX;

-				ir_type type = ctx->ir_base[insn->op2].type;
+				ir_type type = ctx->ir_base[func].type;
 				ir_emit_load_mem_int(ctx, type, op2_reg, mem);
 			} else {
 				op2_reg = IR_REG_NONE;
@@ -9846,20 +9972,18 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)

 	ir_emit_epilogue(ctx);

-	if (IR_IS_CONST_REF(insn->op2)) {
-		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
+	if (IR_IS_CONST_REF(func)) {
+		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[func]);

 		if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) {
 			|	jmp aword &addr
 		} else {
 |.if X64
-||			ir_reg tmp_reg = IR_REG_RAX;
-
-#ifdef IR_REG_VARARG_FP_REGS
-||			if (ir_is_vararg(ctx, insn)) {
-||				tmp_reg = IR_REG_R11;
+||			ir_reg tmp_reg = cc->int_ret_reg;
+||
+||			if (proto && (proto->flags & IR_VARARG_FUNC) && tmp_reg == cc->fp_varargs_reg) {
+||				tmp_reg = IR_REG_R11; // TODO: avoid usage of hardcoded temporary register ???
 ||			}
-#endif
 ||			if (IR_IS_SIGNED_32BIT(addr)) {
 				|	mov Rq(tmp_reg), ((ptrdiff_t)addr)    // 0x48 0xc7 0xc0 <imm-32-bit>
 ||			} else {
@@ -10020,6 +10144,20 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
 							|	jp &addr
 							|	jbe =>target
 							break;
+						case IR_ULT:
+							|	jp =>target
+							|	jae =>target
+							break;
+						case IR_UGE:
+							|	jb =>target
+							break;
+						case IR_ULE:
+							|	jp =>target
+							|	ja =>target
+							break;
+						case IR_UGT:
+							|	jbe =>target
+							break;
 						case IR_ORDERED:
 							|	jnp =>target
 							break;
@@ -10105,6 +10243,20 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
 						|	jp &addr
 						|	jbe &target_addr
 						break;
+					case IR_ULT:
+						|	jp &target_addr
+						|	jae &target_addr
+						break;
+					case IR_UGE:
+						|	jb &target_addr
+						break;
+					case IR_ULE:
+						|	jp &target_addr
+						|	ja &target_addr
+						break;
+					case IR_UGT:
+						|	jbe &target_addr
+						break;
 					case IR_ORDERED:
 						|	jnp &target_addr
 						break;
@@ -10190,16 +10342,26 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
 			case IR_GT:
 				|	ja &addr
 				break;
+			case IR_ULT:
+				|	jb &addr
+				break;
+			case IR_UGE:
+				|	jp &addr
+				|	jae &addr
+				break;
+			case IR_ULE:
+				|	jbe &addr
+				break;
+			case IR_UGT:
+				|	jp &addr
+				|	ja &addr
+				break;
 			case IR_ORDERED:
 				|	jp &addr
 				break;
 			case IR_UNORDERED:
 				|	jnp &addr
 				break;
-//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
-//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
-//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
-//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
 		}
 	}
 	return 0;
@@ -10348,7 +10510,11 @@ static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i
 	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);

 	if (insn->op == IR_GUARD) {
-		op ^= 1; // reverse
+		if (op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) {
+			op ^= 1; // reverse
+		} else {
+			op ^= 5; // reverse
+		}
 	}
 	return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0, 0);
 }
@@ -10565,6 +10731,11 @@ static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_
 static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 {
 	ir_backend_data *data = ctx->data;
+#ifdef IR_TARGET_X86
+	const ir_call_conv_dsc *cc = &ir_call_conv_x86_fastcall;
+#else
+	const ir_call_conv_dsc *cc = &ir_call_conv_default;
+#endif
 	dasm_State **Dst = &data->dasm_state;
 	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);

@@ -10604,13 +10775,13 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	|	movsd qword [rsp+16*8+14*8], xmm14
 	|	movsd qword [rsp+16*8+15*8], xmm15
 	|
-	|	mov Ra(IR_REG_INT_ARG2), rsp
-	|	lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16]
-	|	mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1)
-	|	mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8]
-	|.if X64WIN
-	|	sub rsp, 32 /* shadow space */
-	|.endif
+	|	mov Ra(cc->int_param_regs[1]), rsp
+	|	lea Ra(cc->int_param_regs[0]), [rsp+16*8+16*8+16]
+	|	mov aword [rsp+4*8], Ra(cc->int_param_regs[0])
+	|	mov Ra(cc->int_param_regs[0]), [rsp+16*8+16*8+8]
+	||	if (cc->shadow_store_size) {
+	|		sub rsp, cc->shadow_store_size /* shadow space */
+	||	}
 	|.else
 	|	sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */
 	|	mov aword [esp+0*4], eax
@@ -10629,10 +10800,10 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	|	movsd qword [esp+8*4+6*8], xmm6
 	|	movsd qword [esp+8*4+7*8], xmm7
 	|
-	|	mov Ra(IR_REG_INT_FCARG2), esp
-	|	lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16]
-	|	mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1)
-	|	mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12]
+	|	mov Ra(cc->int_param_regs[1]), esp
+	|	lea Ra(cc->int_param_regs[0]), [esp+8*4+8*8+16]
+	|	mov aword [esp+4*4], Ra(cc->int_param_regs[0])
+	|	mov Ra(cc->int_param_regs[0]), [esp+8*4+8*8+12]
 	|.endif

 	if (IR_IS_CONST_REF(insn->op2)) {
@@ -10655,16 +10826,14 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
 	}

 	//  restore SP
-	|.if X64WIN
-	|	add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */
-	|.elif X64
-	|	add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */
+	|.if X64
+	|	add rsp, cc->shadow_store_size+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */
 	|.else
 	|	add esp, 8*4+8*8+16 /* CPU regs + SSE regs */
 	|.endif

-	if (def_reg != IR_REG_INT_RET1) {
-		ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
+	if (def_reg != cc->int_ret_reg) {
+		ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg);
 	}
 	if (IR_REG_SPILLED(ctx->regs[def][0])) {
 		ir_emit_store(ctx, insn->type, def, def_reg);
@@ -10710,23 +10879,11 @@ static void ir_emit_load_params(ir_ctx *ctx)
 	int fp_param_num = 0;
 	ir_reg src_reg;
 	ir_reg dst_reg;
-	// TODO: Calling convention specific
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	int32_t stack_offset = 0;
 	int32_t stack_start = 0;

-#ifdef IR_TARGET_X86
-	if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-		int_reg_params = _ir_int_fc_reg_params;
-		fp_reg_params = _ir_fp_fc_reg_params;
-	}
-#endif
-
 	if (ctx->flags & IR_USE_FRAME_POINTER) {
 		/* skip old frame pointer and return address */
 		stack_start = sizeof(void*) * 2 + ctx->stack_frame_size;
@@ -10749,27 +10906,25 @@ static void ir_emit_load_params(ir_ctx *ctx)
 					stack_offset += ctx->value_params[insn->op3 - 1].size;
 					stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
 					continue;
-				} else if (int_param_num < int_reg_params_count) {
-					src_reg = int_reg_params[int_param_num];
+				} else if (int_param_num < cc->int_param_regs_count) {
+					src_reg = cc->int_param_regs[int_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				int_param_num++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				fp_param_num++;
-#endif
+				if (cc->shadow_param_regs) {
+					fp_param_num++;
+				}
 			} else {
-				if (fp_param_num < fp_reg_params_count) {
-					src_reg = fp_reg_params[fp_param_num];
+				if (fp_param_num < cc->fp_param_regs_count) {
+					src_reg = cc->fp_param_regs[fp_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				fp_param_num++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				int_param_num++;
-#endif
+				if (cc->shadow_param_regs) {
+					int_param_num++;
+				}
 			}
 			if (ctx->vregs[use]) {
 				dst_reg = IR_REG_NUM(ctx->regs[use][0]);
@@ -10805,10 +10960,9 @@ static ir_reg ir_get_free_reg(ir_type type, ir_regset available)
 	return IR_REGSET_FIRST(available);
 }

-static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
+static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *dessa_from_block)
 {
-	ir_backend_data *data = ctx->data;
-	ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end;
+	ir_ref ref = ctx->cfg_blocks[(intptr_t)dessa_from_block].end;

 	if (to == 0) {
 		if (IR_IS_TYPE_INT(type)) {
@@ -10844,23 +10998,11 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 	int int_param_num = 0;
 	int fp_param_num = 0;
 	ir_reg src_reg;
-	// TODO: Calling convention specific
-	int int_reg_params_count = IR_REG_INT_ARGS;
-	int fp_reg_params_count = IR_REG_FP_ARGS;
-	const int8_t *int_reg_params = _ir_int_reg_params;
-	const int8_t *fp_reg_params = _ir_fp_reg_params;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
 	int32_t stack_start = 0;
 	int32_t stack_offset = 0;

-#ifdef IR_TARGET_X86
-	if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
-		int_reg_params_count = IR_REG_INT_FCARGS;
-		fp_reg_params_count = IR_REG_FP_FCARGS;
-		int_reg_params = _ir_int_fc_reg_params;
-		fp_reg_params = _ir_fp_fc_reg_params;
-	}
-#endif
-
 	if (ctx->flags & IR_USE_FRAME_POINTER) {
 		/* skip old frame pointer and return address */
 		stack_start = sizeof(void*) * 2 + ctx->stack_frame_size;
@@ -10874,8 +11016,7 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 		insn = &ctx->ir_base[use];
 		if (insn->op == IR_PARAM) {
 			if (IR_IS_TYPE_INT(insn->type)) {
-#ifndef _WIN64
-				if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
+				if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) {
 					/* struct passed by value on stack */
 					size_t align = ctx->value_params[insn->op3 - 1].align;

@@ -10886,28 +11027,25 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 					stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
 					continue;
 				}
-#endif
-				if (int_param_num < int_reg_params_count) {
-					src_reg = int_reg_params[int_param_num];
+				if (int_param_num < cc->int_param_regs_count) {
+					src_reg = cc->int_param_regs[int_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				int_param_num++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				fp_param_num++;
-#endif
+				if (cc->shadow_param_regs) {
+					fp_param_num++;
+				}
 			} else {
-				if (fp_param_num < fp_reg_params_count) {
-					src_reg = fp_reg_params[fp_param_num];
+				if (fp_param_num < cc->fp_param_regs_count) {
+					src_reg = cc->fp_param_regs[fp_param_num];
 				} else {
 					src_reg = IR_REG_NONE;
 				}
 				fp_param_num++;
-#ifdef _WIN64
-				/* WIN64 calling convention use common couter for int and fp registers */
-				int_param_num++;
-#endif
+				if (cc->shadow_param_regs) {
+					int_param_num++;
+				}
 			}
 			if (src_reg == IR_REG_NONE) {
 				if (ctx->vregs[use]) {
@@ -10927,12 +11065,13 @@ static void ir_fix_param_spills(ir_ctx *ctx)
 		}
 	}

-#ifdef _WIN64
-	/* WIN64 uses shsow area for registers */
-	stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*);
-#endif
-	ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count);
-	ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count);
+	if (cc->shadow_store_size) {
+		/* WIN64 uses shadow area for registers */
+		stack_offset += IR_MIN(int_param_num, cc->int_param_regs_count) * sizeof(void*);
+	}
+
+	ctx->gp_reg_params = IR_MIN(int_param_num, cc->int_param_regs_count);
+	ctx->fp_reg_params = IR_MIN(fp_param_num, cc->fp_param_regs_count);
 	ctx->param_stack_size = stack_offset;
 }

@@ -10943,17 +11082,20 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 	ir_insn *insn;
 	ir_ref i, n, j, *p;
 	uint32_t *rule, insn_flags;
-	ir_backend_data *data = ctx->data;
 	ir_regset available = 0;
 	ir_target_constraints constraints;
 	uint32_t def_flags;
 	ir_reg reg;
+	ir_backend_data *data = ctx->data;
+	const ir_call_conv_dsc *cc = data->ra_data.cc;
+	ir_regset scratch = ir_scratch_regset[cc->scratch_reg - IR_REG_NUM];

-#ifndef IR_REG_FP_RET1
+#ifdef IR_TARGET_X86
 	if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
-		ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data);
-	} else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
-		ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data);
+		ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE);
+	} else if ((ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE)
+			&& cc->fp_ret_reg == IR_REG_NONE) {
+		ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type);
 	} else {
 		ctx->ret_slot = -1;
 	}
@@ -10986,10 +11128,16 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 				case IR_LOOP_END:
 				case IR_IGOTO_DUP:
 					break;
-#ifndef IR_REG_FP_RET1
+#ifdef IR_TARGET_X86
 				case IR_CALL:
-					if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) {
-						ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data);
+					if (ctx->ret_slot == -1
+					 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) {
+						const ir_proto_t *proto = ir_call_proto(ctx, insn);
+						const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+
+						if (cc->fp_ret_reg == IR_REG_NONE) {
+							ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE);
+						}
 					}
 #endif
 					IR_FALLTHROUGH;
@@ -11001,7 +11149,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 					 && *rule != IR_TEST_AND_BRANCH_INT
 					 && *rule != IR_GUARD_CMP_INT
 					 && *rule != IR_GUARD_CMP_FP) {
-						available = IR_REGSET_SCRATCH;
+						available = scratch;
 					}
 					if (ctx->vregs[i]) {
 						reg = constraints.def_reg;
@@ -11031,7 +11179,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 							if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
 								ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
 							} else {
-								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data);
+								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type);
 							}
 						} else if (insn->op == IR_PARAM) {
 							IR_ASSERT(0 && "unexpected PARAM");
@@ -11042,7 +11190,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 						ir_ref n = use_list->count;

 						if (n > 0) {
-							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data);
+							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type);
 							ir_ref i, *p, use;
 							ir_insn *use_insn;

@@ -11097,10 +11245,14 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 									}
 								}
 								ctx->regs[i][constraints.tmp_regs[n].num] = reg;
-							} else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
-								available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
 							} else {
-								IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
+								ir_reg reg = constraints.tmp_regs[n].reg;
+
+								if (reg > IR_REG_NUM) {
+									available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]);
+								} else {
+									IR_REGSET_EXCL(available, reg);
+								}
 							}
 						} while (n);
 					}
@@ -11136,8 +11288,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
 			rule += n;
 		}
 		if (bb->flags & IR_BB_DESSA_MOVES) {
-			data->dessa_from_block = b;
-			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
+			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, (void*)(intptr_t)b);
 		}
 	}

@@ -11154,12 +11305,12 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)

 	for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
 		if (insn->op == IR_CALL) {
-			call_stack_size = ir_call_used_stack(ctx, insn, &copy_stack);
+			const ir_proto_t *proto = ir_call_proto(ctx, insn);
+			const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT);
+
+			call_stack_size = ir_call_used_stack(ctx, insn, cc, &copy_stack);
 			if (call_stack_size > peak_call_stack_size
-#ifdef IR_HAVE_FASTCALL
-			 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */
-#endif
-			) {
+			 && !cc->cleanup_stack_by_callee) {
 				peak_call_stack_size = call_stack_size;
 			}
 		}
@@ -11169,7 +11320,7 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)
 	}
 	if (peak_call_stack_size) {
 		ctx->call_stack_size = peak_call_stack_size;
-		ctx->flags |= IR_PREALLOCATED_STACK;
+		ctx->flags2 |= IR_PREALLOCATED_STACK;
 	}
 }

@@ -11179,19 +11330,22 @@ void ir_fix_stack_frame(ir_ctx *ctx)

 	ctx->locals_area_size = ctx->stack_frame_size;

-#if defined(IR_TARGET_X64) && !defined(_WIN64)
 	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
-		ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
-		ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
-		ctx->locals_area_size = ctx->stack_frame_size;
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
-			additional_size += sizeof(void*) * IR_REG_INT_ARGS;
-		}
-		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
-			additional_size += 16 * IR_REG_FP_ARGS;
+		ir_backend_data *data = ctx->data;
+		const ir_call_conv_dsc *cc = data->ra_data.cc;
+
+		if (cc->sysv_varargs) {
+			ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
+			ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
+			ctx->locals_area_size = ctx->stack_frame_size;
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) {
+				additional_size += sizeof(void*) * cc->int_param_regs_count;
+			}
+			if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) {
+				additional_size += 16 * cc->fp_param_regs_count;
+			}
 		}
 	}
-#endif

 	if (ctx->used_preserved_regs) {
 		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
@@ -11259,6 +11413,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
 	ir_ref igoto_dup_ref = IR_UNUSED;
 	uint32_t igoto_dup_block = 0;

+	data.ra_data.cc = ir_get_call_conv_dsc(ctx->flags);
 	data.ra_data.unused_slot_4 = 0;
 	data.ra_data.unused_slot_2 = 0;
 	data.ra_data.unused_slot_1 = 0;
diff --git a/ext/opcache/jit/ir/ir_x86.h b/ext/opcache/jit/ir/ir_x86.h
index 06bfa951cf2..6399ca107fd 100644
--- a/ext/opcache/jit/ir/ir_x86.h
+++ b/ext/opcache/jit/ir/ir_x86.h
@@ -82,14 +82,17 @@ enum _ir_reg {
 	IR_GP_REGS(IR_GP_REG_ENUM)
 	IR_FP_REGS(IR_FP_REG_ENUM)
 	IR_REG_NUM,
+	IR_REG_ALL = IR_REG_NUM, /* special name for regset */
+	IR_REG_SET_1,            /* special name for regset */
+	IR_REG_SET_2,            /* special name for regset */
+	IR_REG_SET_3,            /* special name for regset */
+	IR_REG_SET_NUM,
 };

 #define IR_REG_GP_FIRST IR_REG_R0
 #define IR_REG_FP_FIRST IR_REG_XMM0
 #define IR_REG_GP_LAST  (IR_REG_FP_FIRST - 1)
 #define IR_REG_FP_LAST  (IR_REG_NUM - 1)
-#define IR_REG_SCRATCH  (IR_REG_NUM)        /* special name for regset */
-#define IR_REG_ALL      (IR_REG_NUM + 1)    /* special name for regset */

 #define IR_REGSET_64BIT 0

@@ -113,121 +116,4 @@ enum _ir_reg {
 #define IR_REG_RSI IR_REG_R6
 #define IR_REG_RDI IR_REG_R7

-/* Calling Convention */
-#ifdef _WIN64
-
-# define IR_REG_INT_RET1 IR_REG_RAX
-# define IR_REG_FP_RET1  IR_REG_XMM0
-# define IR_REG_INT_ARGS 4
-# define IR_REG_FP_ARGS  4
-# define IR_REG_INT_ARG1 IR_REG_RCX
-# define IR_REG_INT_ARG2 IR_REG_RDX
-# define IR_REG_INT_ARG3 IR_REG_R8
-# define IR_REG_INT_ARG4 IR_REG_R9
-# define IR_REG_FP_ARG1  IR_REG_XMM0
-# define IR_REG_FP_ARG2  IR_REG_XMM1
-# define IR_REG_FP_ARG3  IR_REG_XMM2
-# define IR_REG_FP_ARG4  IR_REG_XMM3
-# define IR_MAX_REG_ARGS 4
-# define IR_SHADOW_ARGS  32 /* Reserved space in bytes - "home space" or "shadow store" for register arguments */
-
-# define IR_REGSET_SCRATCH \
-	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \
-	| IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \
-	| IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5))
-
-# define IR_REGSET_PRESERVED \
-	(IR_REGSET(IR_REG_RBX) \
-	| IR_REGSET_INTERVAL(IR_REG_RBP, IR_REG_RDI) \
-	| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) \
-	| IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15))
-
-#elif defined(IR_TARGET_X64)
-
-# define IR_REG_INT_RET1 IR_REG_RAX
-# define IR_REG_FP_RET1  IR_REG_XMM0
-# define IR_REG_INT_ARGS 6
-# define IR_REG_FP_ARGS  8
-# define IR_REG_INT_ARG1 IR_REG_RDI
-# define IR_REG_INT_ARG2 IR_REG_RSI
-# define IR_REG_INT_ARG3 IR_REG_RDX
-# define IR_REG_INT_ARG4 IR_REG_RCX
-# define IR_REG_INT_ARG5 IR_REG_R8
-# define IR_REG_INT_ARG6 IR_REG_R9
-# define IR_REG_FP_ARG1  IR_REG_XMM0
-# define IR_REG_FP_ARG2  IR_REG_XMM1
-# define IR_REG_FP_ARG3  IR_REG_XMM2
-# define IR_REG_FP_ARG4  IR_REG_XMM3
-# define IR_REG_FP_ARG5  IR_REG_XMM4
-# define IR_REG_FP_ARG6  IR_REG_XMM5
-# define IR_REG_FP_ARG7  IR_REG_XMM6
-# define IR_REG_FP_ARG8  IR_REG_XMM7
-# define IR_MAX_REG_ARGS 14
-# define IR_SHADOW_ARGS  0
-
-# define IR_REG_VARARG_FP_REGS IR_REG_RAX /* hidden argument to specify the number of vector registers used */
-
-# define IR_REGSET_SCRATCH \
-	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \
-	| IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) \
-	| IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \
-	| IR_REGSET_FP)
-
-# define IR_REGSET_PRESERVED \
-	(IR_REGSET(IR_REG_RBX) \
-	| IR_REGSET(IR_REG_RBP) \
-	| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15))
-
-typedef struct _ir_va_list {
-	uint32_t  gp_offset;
-	uint32_t  fp_offset;
-	void     *overflow_arg_area;
-	void     *reg_save_area;
-} ir_va_list;
-
-#elif defined(IR_TARGET_X86)
-
-# define IR_REG_INT_RET1   IR_REG_RAX
-# define IR_REG_INT_RET2   IR_REG_RDX
-# define IR_REG_INT_ARGS   0
-# define IR_REG_FP_ARGS    0
-
-# define IR_HAVE_FASTCALL  1
-# define IR_REG_INT_FCARGS 2
-# define IR_REG_FP_FCARGS  0
-# define IR_REG_INT_FCARG1 IR_REG_RCX
-# define IR_REG_INT_FCARG2 IR_REG_RDX
-# define IR_MAX_REG_ARGS   2
-# define IR_SHADOW_ARGS    0
-
-# define IR_REGSET_SCRATCH \
-	(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP)
-
-# define IR_REGSET_PRESERVED \
-	(IR_REGSET(IR_REG_RBX) \
-	| IR_REGSET(IR_REG_RBP) \
-	| IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI))
-
-#else
-# error "Unsupported target architecture"
-#endif
-
-typedef struct _ir_tmp_reg {
-	union {
-		uint8_t num;
-		int8_t  reg;
-	};
-	uint8_t     type;
-	int8_t      start;
-	int8_t      end;
-} ir_tmp_reg;
-
-struct _ir_target_constraints {
-	int8_t      def_reg;
-	uint8_t     tmps_count;
-	uint8_t     hints_count;
-	ir_tmp_reg  tmp_regs[3];
-	int8_t      hints[IR_MAX_REG_ARGS + 3];
-};
-
 #endif /* IR_X86_H */