Commit 225c46e9 for xz

commit 225c46e99c3a3e6e0996649ee8d858b6e07111b3
Author: Lasse Collin <lasse.collin@tukaani.org>
Date:   Thu Jun 4 23:33:37 2026 +0300

    xzdiff: Use LC_ALL=C with sed and expr

    This is required to handle strings that may contain invalid multibyte
    sequences. For example, on GNU/Linux and FreeBSD 14 with UTF-8 locale:

        $ printf "\201'A\n" | sed s/./x/ | hexdump -C
        00000000  81 78 41 0a                                       |.xA.|

    That is, the first byte remains untouched and the ' is replaced with x.
    This kind of string would result in inbalanced quotes in xzdiff. In the
    C locale it works safely:

        $ printf "\201'A\n" | LC_ALL=C sed s/./x/ | hexdump -C
        00000000  78 27 41 0a                                       |x'A.|

    Similar changes were made in xzgrep in 2022 (see the commits 69d1b3fc2967
    and 6a4a4a7d2667). Somehow xzdiff was forgotten even though this issue
    was fixed in zdiff in gzip in 2022.

    Co-authored-by: rootvector2 <dxbnaveed.k@gmail.com>
    Fixes: https://github.com/tukaani-project/xz/pull/228

diff --git a/src/scripts/xzdiff.in b/src/scripts/xzdiff.in
index 8e1e0a22..74c1d332 100644
--- a/src/scripts/xzdiff.in
+++ b/src/scripts/xzdiff.in
@@ -56,7 +56,7 @@ while :; do
     --h*) printf '%s\n' "$usage" || exit 2; exit;;
     --v*) printf '%s\n' "$version" || exit 2; exit;;
     --) shift; break;;
-    -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | sed "$escape"`;;
+    -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | LC_ALL=C sed "$escape"`;;
     -?*) cmp="$cmp '$1'";;
     *) break;;
   esac
@@ -96,15 +96,15 @@ if test $# -eq 1; then
   # shellcheck disable=SC2034
   case $1 in
     *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *[-.]lzo | *[-.]zst | *[-.]lz4)
-      FILE=`expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;;
+      FILE=`LC_ALL=C expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;;
     *.t[abglx]z)
-      FILE=`expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;;
+      FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;;
     *.tbz2)
-      FILE=`expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;;
+      FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;;
     *.tzo)
-      FILE=`expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;;
+      FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;;
     *.tzst)
-      FILE=`expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;;
+      FILE=`LC_ALL=C expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;;
   esac
   xz_status=$(
     exec 4>&1
@@ -145,7 +145,7 @@ elif test $# -eq 2; then
                 eval "$cmp" /dev/fd/5 - >&3) 5<&0
             )
           else
-            F=`expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog
+            F=`LC_ALL=C expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog
             tmp=
             trap '
               test -n "$tmp" && rm -rf "$tmp"