Commit 79715662 for xz

commit 7971566247914ec1854b125ff99c2a617f5c1e3a
Author: Lasse Collin <lasse.collin@tukaani.org>
Date:   Tue Dec 9 12:13:36 2025 +0200

    Autotools: Autodetect unaligned access support on LoongArch

    According to [1] sections 7.4, 8.1, and 8.2, desktop and server
    processors support fast unaligned access, but embedded systems likely
    don't.

    It's important that TUKLIB_FAST_UNALIGNED_ACCESS isn't defined when
    -mstrict-align is in use because it will result in slower binaries
    even if running on a processor that supports fast unaligned access.
    It's because compilers will translate multibyte memcpy() to multiple
    byte-by-byte instructions instead of wider loads and stores. The
    compression times from [2] show this well:

        Unaligned access    CFLAGS                     Compression time
            enabled         -O2 -mno-strict-align          66.1 s
            disabled        -O2 -mno-strict-align          79.5 s
            disabled        -O2 -mstrict-align             79.9 s
            enabled         -O2 -mstrict-align            129.1 s

    There currently (GCC 15.2) is no preprocessor macro on LoongArch
    to detect if -mstrict-align or -mno-strict-align is in effect (the
    default is -mno-strict-align). Use heuristics to detect which of the
    flags is in effect.

    [1] https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc
    [2] https://github.com/tukaani-project/xz/pull/186#issuecomment-3494570304

    Thanks-to: Li Chenggang <lichenggang@deepin.org>
    Thanks-to: Xi Ruoyao
    See: https://github.com/tukaani-project/xz/pull/186

diff --git a/m4/tuklib_integer.m4 b/m4/tuklib_integer.m4
index 906ecf1e..a3128a20 100644
--- a/m4/tuklib_integer.m4
+++ b/m4/tuklib_integer.m4
@@ -62,6 +62,58 @@ main(void)
 	fi
 ])

+# On archs that we use tuklib_integer_strict_align() (see below), we need
+# objdump to detect support for unaligned access. (Libtool needs objdump
+# too, so Libtool does this same tool check as well.)
+AC_CHECK_TOOL([OBJDUMP], [objdump], [false])
+
+# An internal helper that attempts to detect if -mstrict-align or
+# -mno-strict-align is in effect. This sets enable_unaligned_access=yes
+# if compilation succeeds and the regex passed as an argument does *not*
+# match the objdump output of a check program. Otherwise this sets
+# enable_unaligned_access=no.
+tuklib_integer_strict_align ()
+{
+	# First guess no.
+	enable_unaligned_access=no
+
+	# Force -O2 because without optimizations the memcpy()
+	# won't be optimized out.
+	tuklib_integer_saved_CFLAGS=$CFLAGS
+	CFLAGS="$CFLAGS -O2"
+	AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+			#include <string.h>
+			unsigned int check_strict_align(const void *p)
+			{
+				unsigned int i;
+				memcpy(&i, p, sizeof(i));
+				return i;
+			}
+		]])], [
+			# Disassemble the test function from the object file.
+			if $OBJDUMP -d conftest.$ac_objext > conftest.s ; then
+				# This function should be passed a regex that
+				# matches if there are instructions that load
+				# unsigned bytes. Such instructions indicate
+				# that -mstrict-align is in effect.
+				#
+				# NOTE: Use braces to avoid M4 parameter
+				# expansion.
+				if grep -- "${1}" conftest.s > /dev/null ; then
+					:
+				else
+					# No single-byte unsigned load
+					# instructions were found,
+					# so it seems that -mno-strict-align
+					# is in effect.
+					# Override our earlier guess.
+					enable_unaligned_access=yes
+				fi
+			fi
+		])
+	CFLAGS=$tuklib_integer_saved_CFLAGS
+}
+
 AC_MSG_CHECKING([if unaligned memory access should be used])
 AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access],
 		[Enable if the system supports *fast* unaligned memory access
@@ -107,6 +159,22 @@ compile error
 int main(void) { return 0; }
 ])], [enable_unaligned_access=yes], [enable_unaligned_access=no])
 			;;
+		loongarch*)
+			# See sections 7.4, 8.1, and 8.2:
+			# https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc
+			#
+			# That is, desktop and server processors likely support
+			# unaligned access in hardware but embedded processors
+			# might not. GCC defaults to -mno-strict-align and so
+			# do majority of GNU/Linux distributions. As of
+			# GCC 15.2, there is no predefined macro to detect
+			# if -mstrict-align or -mno-strict-align is in effect.
+			# Use heuristics based on compiler output.
+			[
+				tuklib_integer_strict_align \
+						'[[:blank:]]ld\.bu[[:blank:]]'
+			]
+			;;
 		*)
 			enable_unaligned_access=no
 			;;