Commit 79715662 for xz
commit 7971566247914ec1854b125ff99c2a617f5c1e3a
Author: Lasse Collin <lasse.collin@tukaani.org>
Date: Tue Dec 9 12:13:36 2025 +0200
Autotools: Autodetect unaligned access support on LoongArch
According to [1] sections 7.4, 8.1, and 8.2, desktop and server
processors support fast unaligned access, but embedded systems likely
don't.
It's important that TUKLIB_FAST_UNALIGNED_ACCESS isn't defined when
-mstrict-align is in use because it will result in slower binaries
even if running on a processor that supports fast unaligned access.
It's because compilers will translate multibyte memcpy() to multiple
byte-by-byte instructions instead of wider loads and stores. The
compression times from [2] show this well:
Unaligned access CFLAGS Compression time
enabled -O2 -mno-strict-align 66.1 s
disabled -O2 -mno-strict-align 79.5 s
disabled -O2 -mstrict-align 79.9 s
enabled -O2 -mstrict-align 129.1 s
There currently (GCC 15.2) is no preprocessor macro on LoongArch
to detect if -mstrict-align or -mno-strict-align is in effect (the
default is -mno-strict-align). Use heuristics to detect which of the
flags is in effect.
[1] https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc
[2] https://github.com/tukaani-project/xz/pull/186#issuecomment-3494570304
Thanks-to: Li Chenggang <lichenggang@deepin.org>
Thanks-to: Xi Ruoyao
See: https://github.com/tukaani-project/xz/pull/186
diff --git a/m4/tuklib_integer.m4 b/m4/tuklib_integer.m4
index 906ecf1e..a3128a20 100644
--- a/m4/tuklib_integer.m4
+++ b/m4/tuklib_integer.m4
@@ -62,6 +62,58 @@ main(void)
fi
])
+# On archs that we use tuklib_integer_strict_align() (see below), we need
+# objdump to detect support for unaligned access. (Libtool needs objdump
+# too, so Libtool does this same tool check as well.)
+AC_CHECK_TOOL([OBJDUMP], [objdump], [false])
+
+# An internal helper that attempts to detect if -mstrict-align or
+# -mno-strict-align is in effect. This sets enable_unaligned_access=yes
+# if compilation succeeds and the regex passed as an argument does *not*
+# match the objdump output of a check program. Otherwise this sets
+# enable_unaligned_access=no.
+tuklib_integer_strict_align ()
+{
+ # First guess no.
+ enable_unaligned_access=no
+
+ # Force -O2 because without optimizations the memcpy()
+ # won't be optimized out.
+ tuklib_integer_saved_CFLAGS=$CFLAGS
+ CFLAGS="$CFLAGS -O2"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+ #include <string.h>
+ unsigned int check_strict_align(const void *p)
+ {
+ unsigned int i;
+ memcpy(&i, p, sizeof(i));
+ return i;
+ }
+ ]])], [
+ # Disassemble the test function from the object file.
+ if $OBJDUMP -d conftest.$ac_objext > conftest.s ; then
+ # This function should be passed a regex that
+ # matches if there are instructions that load
+ # unsigned bytes. Such instructions indicate
+ # that -mstrict-align is in effect.
+ #
+ # NOTE: Use braces to avoid M4 parameter
+ # expansion.
+ if grep -- "${1}" conftest.s > /dev/null ; then
+ :
+ else
+ # No single-byte unsigned load
+ # instructions were found,
+ # so it seems that -mno-strict-align
+ # is in effect.
+ # Override our earlier guess.
+ enable_unaligned_access=yes
+ fi
+ fi
+ ])
+ CFLAGS=$tuklib_integer_saved_CFLAGS
+}
+
AC_MSG_CHECKING([if unaligned memory access should be used])
AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access],
[Enable if the system supports *fast* unaligned memory access
@@ -107,6 +159,22 @@ compile error
int main(void) { return 0; }
])], [enable_unaligned_access=yes], [enable_unaligned_access=no])
;;
+ loongarch*)
+ # See sections 7.4, 8.1, and 8.2:
+ # https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc
+ #
+ # That is, desktop and server processors likely support
+ # unaligned access in hardware but embedded processors
+ # might not. GCC defaults to -mno-strict-align and so
+ # do majority of GNU/Linux distributions. As of
+ # GCC 15.2, there is no predefined macro to detect
+ # if -mstrict-align or -mno-strict-align is in effect.
+ # Use heuristics based on compiler output.
+ [
+ tuklib_integer_strict_align \
+ '[[:blank:]]ld\.bu[[:blank:]]'
+ ]
+ ;;
*)
enable_unaligned_access=no
;;