Commit 91ff28ae6d05 for kernel

commit 91ff28ae6d050e0ca01ac13eb8ba31d744cf672f
Author: Eric Dumazet <edumazet@google.com>
Date:   Fri Dec 19 11:20:07 2025 +0000

    x86/irqflags: Use ASM_OUTPUT_RM in native_save_fl()

    clang is generating very inefficient code for native_save_fl() which is
    used for local_irq_save() in critical spots.

    Allowing the "pop %0" to use memory:

     1) forces the compiler to add annoying stack canaries when
        CONFIG_STACKPROTECTOR_STRONG=y in many places.

     2) Almost always is followed by an immediate "move memory,register"

    One good example is _raw_spin_lock_irqsave, with 8 extra instructions

      ffffffff82067a30 <_raw_spin_lock_irqsave>:
      ffffffff82067a30:             ...
      ffffffff82067a39:             53                                              push   %rbx

      // Three instructions to ajust the stack, read the per-cpu canary
      // and copy it to 8(%rsp)
      ffffffff82067a3a:             48 83 ec 10                     sub    $0x10,%rsp
      ffffffff82067a3e:             65 48 8b 05 da 15 45 02 mov    %gs:0x24515da(%rip),%rax            # <__stack_chk_guard>
      ffffffff82067a46:             48 89 44 24 08                  mov    %rax,0x8(%rsp)

      ffffffff82067a4b:             9c                                              pushf

      // instead of pop %rbx, compiler uses 2 instructions.
      ffffffff82067a4c:             8f 04 24                                pop    (%rsp)
      ffffffff82067a4f:             48 8b 1c 24                     mov    (%rsp),%rbx

      ffffffff82067a53:             fa                                              cli
      ffffffff82067a54:             b9 01 00 00 00                  mov    $0x1,%ecx
      ffffffff82067a59:             31 c0                                   xor    %eax,%eax
      ffffffff82067a5b:             f0 0f b1 0f                     lock cmpxchg %ecx,(%rdi)
      ffffffff82067a5f:             75 1d                                   jne    ffffffff82067a7e <_raw_spin_lock_irqsave+0x4e>

      // three instructions to check the stack canary
      ffffffff82067a61:             65 48 8b 05 b7 15 45 02 mov    %gs:0x24515b7(%rip),%rax            # <__stack_chk_guard>
      ffffffff82067a69:             48 3b 44 24 08                  cmp    0x8(%rsp),%rax
      ffffffff82067a6e:             75 17                                   jne    ffffffff82067a87

      ...

      // One extra instruction to adjust the stack.
      ffffffff82067a73:             48 83 c4 10                     add    $0x10,%rsp
      ...

      // One more instruction in case the stack was mangled.
      ffffffff82067a87:             e8 a4 35 ff ff                  call   ffffffff8205b030 <__stack_chk_fail>

    This patch changes nothing for gcc, but for clang saves ~20000 bytes of text
    even though more functions are inlined.

      $ size vmlinux.gcc.before vmlinux.gcc.after vmlinux.clang.before vmlinux.clang.after
         text          data         bss             dec             hex     filename
      45565821      25005462        4704800 75276083        47c9f33 vmlinux.gcc.before
      45565821      25005462        4704800 75276083        47c9f33 vmlinux.gcc.after
      45121072      24638617        5533040 75292729        47ce039 vmlinux.clang.before
      45093887      24638633        5536808 75269328        47c84d0 vmlinux.clang.after

      $ scripts/bloat-o-meter -t vmlinux.clang.before vmlinux.clang.after
      add/remove: 1/2 grow/shrink: 21/533 up/down: 2250/-22112 (-19862)

    Signed-off-by: Eric Dumazet <edumazet@google.com>
    Cc: Uros Bizjak <ubizjak@gmail.com>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b30e5474c18e..a1193e9d65f2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
 	 */
 	asm volatile("# __raw_save_flags\n\t"
 		     "pushf ; pop %0"
-		     : "=rm" (flags)
+		     : ASM_OUTPUT_RM (flags)
 		     : /* no input */
 		     : "memory");