Commit 225c46e9 for xz
commit 225c46e99c3a3e6e0996649ee8d858b6e07111b3
Author: Lasse Collin <lasse.collin@tukaani.org>
Date: Thu Jun 4 23:33:37 2026 +0300
xzdiff: Use LC_ALL=C with sed and expr
This is required to handle strings that may contain invalid multibyte
sequences. For example, on GNU/Linux and FreeBSD 14 with UTF-8 locale:
$ printf "\201'A\n" | sed s/./x/ | hexdump -C
00000000 81 78 41 0a |.xA.|
That is, the first byte remains untouched and the ' is replaced with x.
This kind of string would result in inbalanced quotes in xzdiff. In the
C locale it works safely:
$ printf "\201'A\n" | LC_ALL=C sed s/./x/ | hexdump -C
00000000 78 27 41 0a |x'A.|
Similar changes were made in xzgrep in 2022 (see the commits 69d1b3fc2967
and 6a4a4a7d2667). Somehow xzdiff was forgotten even though this issue
was fixed in zdiff in gzip in 2022.
Co-authored-by: rootvector2 <dxbnaveed.k@gmail.com>
Fixes: https://github.com/tukaani-project/xz/pull/228
diff --git a/src/scripts/xzdiff.in b/src/scripts/xzdiff.in
index 8e1e0a22..74c1d332 100644
--- a/src/scripts/xzdiff.in
+++ b/src/scripts/xzdiff.in
@@ -56,7 +56,7 @@ while :; do
--h*) printf '%s\n' "$usage" || exit 2; exit;;
--v*) printf '%s\n' "$version" || exit 2; exit;;
--) shift; break;;
- -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | sed "$escape"`;;
+ -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | LC_ALL=C sed "$escape"`;;
-?*) cmp="$cmp '$1'";;
*) break;;
esac
@@ -96,15 +96,15 @@ if test $# -eq 1; then
# shellcheck disable=SC2034
case $1 in
*[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *[-.]lzo | *[-.]zst | *[-.]lz4)
- FILE=`expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;;
+ FILE=`LC_ALL=C expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;;
*.t[abglx]z)
- FILE=`expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;;
+ FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;;
*.tbz2)
- FILE=`expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;;
+ FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;;
*.tzo)
- FILE=`expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;;
+ FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;;
*.tzst)
- FILE=`expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;;
+ FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;;
esac
xz_status=$(
exec 4>&1
@@ -145,7 +145,7 @@ elif test $# -eq 2; then
eval "$cmp" /dev/fd/5 - >&3) 5<&0
)
else
- F=`expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog
+ F=`LC_ALL=C expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog
tmp=
trap '
test -n "$tmp" && rm -rf "$tmp"