Commit 8958c48f for libheif

commit 8958c48f5106686f0ce65168a3cdf3143b3fe68f
Author: Dirk Farin <dirk.farin@gmail.com>
Date:   Mon Feb 9 19:01:55 2026 +0100

    unci: add faster component-interleave decoder when component sizes are multiples of whole bytes

diff --git a/libheif/CMakeLists.txt b/libheif/CMakeLists.txt
index fa00815c..efa15282 100644
--- a/libheif/CMakeLists.txt
+++ b/libheif/CMakeLists.txt
@@ -303,6 +303,8 @@ if (WITH_UNCOMPRESSED_CODEC)
             codecs/uncompressed/unc_decoder_pixel_interleave.cc
             codecs/uncompressed/unc_decoder_block_pixel_interleave.h
             codecs/uncompressed/unc_decoder_block_pixel_interleave.cc
+            codecs/uncompressed/unc_decoder_bytealign_component_interleave.h
+            codecs/uncompressed/unc_decoder_bytealign_component_interleave.cc
             codecs/uncompressed/unc_decoder_block_component_interleave.h
             codecs/uncompressed/unc_decoder_block_component_interleave.cc
             codecs/uncompressed/unc_decoder_mixed_interleave.h
diff --git a/libheif/codecs/uncompressed/unc_decoder.cc b/libheif/codecs/uncompressed/unc_decoder.cc
index c77ba4a3..7d74d59a 100644
--- a/libheif/codecs/uncompressed/unc_decoder.cc
+++ b/libheif/codecs/uncompressed/unc_decoder.cc
@@ -28,6 +28,7 @@
 #include "unc_decoder_mixed_interleave.h"
 #include "unc_decoder_row_interleave.h"
 #include "unc_decoder_block_pixel_interleave.h"
+#include "unc_decoder_bytealign_component_interleave.h"
 #include "unc_decoder_block_component_interleave.h"
 #include "unc_codec.h"
 #include "unc_boxes.h"
@@ -338,6 +339,7 @@ Result<std::unique_ptr<unc_decoder>> unc_decoder_factory::get_unc_decoder(
     const std::shared_ptr<const Box_uncC>& uncC)
 {
   static unc_decoder_factory_component_interleave dec_component;
+  static unc_decoder_factory_bytealign_component_interleave dec_bytealign_component;
   static unc_decoder_factory_block_component_interleave dec_block_component;
   static unc_decoder_factory_pixel_interleave dec_pixel;
   static unc_decoder_factory_block_pixel_interleave dec_block_pixel;
@@ -345,7 +347,7 @@ Result<std::unique_ptr<unc_decoder>> unc_decoder_factory::get_unc_decoder(
   static unc_decoder_factory_row_interleave dec_row;

   static const unc_decoder_factory* decoders[]{
-    &dec_block_component, &dec_component, &dec_pixel, &dec_block_pixel, &dec_mixed, &dec_row
+    &dec_bytealign_component, &dec_block_component, &dec_component, &dec_pixel, &dec_block_pixel, &dec_mixed, &dec_row
   };

   for (const unc_decoder_factory* dec : decoders) {
diff --git a/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.cc b/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.cc
new file mode 100644
index 00000000..5189031c
--- /dev/null
+++ b/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.cc
@@ -0,0 +1,234 @@
+/*
+ * HEIF codec.
+ * Copyright (c) 2026 Dirk Farin <dirk.farin@gmail.com>
+ *
+ * This file is part of libheif.
+ *
+ * libheif is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * libheif is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with libheif.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "unc_decoder_bytealign_component_interleave.h"
+#include "unc_decoder_legacybase.h" // for skip_to_alignment
+#include "unc_codec.h"
+#include "unc_types.h"
+#include "error.h"
+
+#include <bit>
+#include <cassert>
+#include <cstring>
+#include <vector>
+
+
+unc_decoder_bytealign_component_interleave::unc_decoder_bytealign_component_interleave(
+    uint32_t width, uint32_t height,
+    std::shared_ptr<const Box_cmpd> cmpd,
+    std::shared_ptr<const Box_uncC> uncC)
+    : unc_decoder(width, height, cmpd, uncC)
+{
+}
+
+
+std::vector<uint64_t> unc_decoder_bytealign_component_interleave::get_tile_data_sizes() const
+{
+  uint64_t total_tile_size = 0;
+
+  for (const auto& component : m_uncC->get_components()) {
+    uint32_t bytes_per_sample = (component.component_bit_depth + 7) / 8;
+    if (component.component_align_size > 0) {
+      skip_to_alignment(bytes_per_sample, component.component_align_size);
+    }
+
+    uint32_t bytes_per_row = bytes_per_sample * m_tile_width;
+    skip_to_alignment(bytes_per_row, m_uncC->get_row_align_size());
+
+    total_tile_size += static_cast<uint64_t>(bytes_per_row) * m_tile_height;
+  }
+
+  if (m_uncC->get_tile_align_size() != 0) {
+    skip_to_alignment(total_tile_size, m_uncC->get_tile_align_size());
+  }
+
+  return {total_tile_size};
+}
+
+
+Error unc_decoder_bytealign_component_interleave::decode_tile(const std::vector<uint8_t>& tile_data,
+                                                               std::shared_ptr<HeifPixelImage>& img,
+                                                               uint32_t out_x0, uint32_t out_y0)
+{
+  const bool little_endian = m_uncC->is_components_little_endian();
+
+  struct ComponentInfo {
+    uint32_t bytes_per_sample;
+    bool use;
+    uint8_t* dst_plane;
+    size_t dst_plane_stride;
+  };
+
+  const auto& components = m_uncC->get_components();
+  const uint32_t num_components = static_cast<uint32_t>(components.size());
+  std::vector<ComponentInfo> comp(num_components);
+
+  for (uint32_t i = 0; i < num_components; i++) {
+    const auto& c = components[i];
+    comp[i].bytes_per_sample = (c.component_bit_depth + 7) / 8;
+
+    heif_channel channel;
+    comp[i].use = map_uncompressed_component_to_channel(m_cmpd, c, &channel);
+    if (comp[i].use) {
+      comp[i].dst_plane = img->get_plane(channel, &comp[i].dst_plane_stride);
+    }
+    else {
+      comp[i].dst_plane = nullptr;
+      comp[i].dst_plane_stride = 0;
+    }
+  }
+
+  const uint8_t* src = tile_data.data();
+  const uint8_t* src_end = src + tile_data.size();
+
+  for (uint32_t c = 0; c < num_components; c++) {
+    uint32_t aligned_bytes_per_sample = comp[c].bytes_per_sample;
+    if (components[c].component_align_size > 0) {
+      skip_to_alignment(aligned_bytes_per_sample, components[c].component_align_size);
+    }
+
+    uint32_t bytes_per_row = aligned_bytes_per_sample * m_tile_width;
+    skip_to_alignment(bytes_per_row, m_uncC->get_row_align_size());
+
+    for (uint32_t tile_y = 0; tile_y < m_tile_height; tile_y++) {
+      const uint8_t* row_start = src;
+
+      for (uint32_t tile_x = 0; tile_x < m_tile_width; tile_x++) {
+        if (src + aligned_bytes_per_sample > src_end) {
+          return {heif_error_Invalid_input, heif_suberror_Unspecified,
+                  "Bytealign-component interleave: insufficient data"};
+        }
+
+        if (comp[c].use) {
+          uint32_t dst_x = out_x0 + tile_x;
+          uint32_t dst_y = out_y0 + tile_y;
+          uint64_t dst_offset = static_cast<uint64_t>(dst_y) * comp[c].dst_plane_stride
+                                + static_cast<uint64_t>(dst_x) * comp[c].bytes_per_sample;
+          uint8_t* dst = comp[c].dst_plane + dst_offset;
+
+          if (comp[c].bytes_per_sample == 1) {
+            *dst = src[0];
+          }
+          else if (comp[c].bytes_per_sample == 2) {
+            uint16_t value;
+            if (little_endian) {
+              value = static_cast<uint16_t>(src[0]) | (static_cast<uint16_t>(src[1]) << 8);
+            }
+            else {
+              value = (static_cast<uint16_t>(src[0]) << 8) | static_cast<uint16_t>(src[1]);
+            }
+            std::memcpy(dst, &value, 2);
+          }
+          else if (comp[c].bytes_per_sample == 4) {
+            uint32_t value;
+            if (little_endian) {
+              value = static_cast<uint32_t>(src[0])
+                      | (static_cast<uint32_t>(src[1]) << 8)
+                      | (static_cast<uint32_t>(src[2]) << 16)
+                      | (static_cast<uint32_t>(src[3]) << 24);
+            }
+            else {
+              value = (static_cast<uint32_t>(src[0]) << 24)
+                      | (static_cast<uint32_t>(src[1]) << 16)
+                      | (static_cast<uint32_t>(src[2]) << 8)
+                      | static_cast<uint32_t>(src[3]);
+            }
+            std::memcpy(dst, &value, 4);
+          }
+          else {
+            // 8-byte sample
+            uint64_t value;
+            if (little_endian) {
+              value = static_cast<uint64_t>(src[0])
+                      | (static_cast<uint64_t>(src[1]) << 8)
+                      | (static_cast<uint64_t>(src[2]) << 16)
+                      | (static_cast<uint64_t>(src[3]) << 24)
+                      | (static_cast<uint64_t>(src[4]) << 32)
+                      | (static_cast<uint64_t>(src[5]) << 40)
+                      | (static_cast<uint64_t>(src[6]) << 48)
+                      | (static_cast<uint64_t>(src[7]) << 56);
+            }
+            else {
+              value = (static_cast<uint64_t>(src[0]) << 56)
+                      | (static_cast<uint64_t>(src[1]) << 48)
+                      | (static_cast<uint64_t>(src[2]) << 40)
+                      | (static_cast<uint64_t>(src[3]) << 32)
+                      | (static_cast<uint64_t>(src[4]) << 24)
+                      | (static_cast<uint64_t>(src[5]) << 16)
+                      | (static_cast<uint64_t>(src[6]) << 8)
+                      | static_cast<uint64_t>(src[7]);
+            }
+            std::memcpy(dst, &value, 8);
+          }
+        }
+
+        src += aligned_bytes_per_sample;
+      }
+
+      // Skip row alignment padding
+      src = row_start + bytes_per_row;
+    }
+  }
+
+  return Error::Ok;
+}
+
+
+// --- Factory ---
+
+bool unc_decoder_factory_bytealign_component_interleave::can_decode(const std::shared_ptr<const Box_uncC>& uncC) const
+{
+  if (uncC->get_interleave_type() != interleave_mode_component) {
+    return false;
+  }
+
+  if (uncC->get_block_size() != 0) {
+    return false;
+  }
+
+  if (uncC->get_pixel_size() != 0) {
+    return false;
+  }
+
+  if (uncC->get_sampling_type() != sampling_mode_no_subsampling) {
+    return false;
+  }
+
+  for (const auto& component : uncC->get_components()) {
+    uint32_t d = component.component_bit_depth;
+    if (d != 8 && d != 16 && d != 32 && d != 64) {
+      return false;
+    }
+    if (component.component_format != component_format_unsigned) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+std::unique_ptr<unc_decoder> unc_decoder_factory_bytealign_component_interleave::create(
+    uint32_t width, uint32_t height,
+    const std::shared_ptr<const Box_cmpd>& cmpd,
+    const std::shared_ptr<const Box_uncC>& uncC) const
+{
+  return std::make_unique<unc_decoder_bytealign_component_interleave>(width, height, cmpd, uncC);
+}
diff --git a/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.h b/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.h
new file mode 100644
index 00000000..4a2326ab
--- /dev/null
+++ b/libheif/codecs/uncompressed/unc_decoder_bytealign_component_interleave.h
@@ -0,0 +1,55 @@
+/*
+ * HEIF codec.
+ * Copyright (c) 2026 Dirk Farin <dirk.farin@gmail.com>
+ *
+ * This file is part of libheif.
+ *
+ * libheif is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * libheif is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with libheif.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBHEIF_UNC_DECODER_BYTEALIGN_COMPONENT_INTERLEAVE_H
+#define LIBHEIF_UNC_DECODER_BYTEALIGN_COMPONENT_INTERLEAVE_H
+
+#include "unc_decoder.h"
+#include <memory>
+#include <vector>
+
+
+class unc_decoder_bytealign_component_interleave : public unc_decoder
+{
+public:
+  unc_decoder_bytealign_component_interleave(uint32_t width, uint32_t height,
+                                              std::shared_ptr<const Box_cmpd> cmpd,
+                                              std::shared_ptr<const Box_uncC> uncC);
+
+  std::vector<uint64_t> get_tile_data_sizes() const override;
+
+  Error decode_tile(const std::vector<uint8_t>& tile_data,
+                    std::shared_ptr<HeifPixelImage>& img,
+                    uint32_t out_x0, uint32_t out_y0) override;
+};
+
+
+class unc_decoder_factory_bytealign_component_interleave : public unc_decoder_factory
+{
+private:
+  bool can_decode(const std::shared_ptr<const Box_uncC>& uncC) const override;
+
+  std::unique_ptr<unc_decoder> create(
+      uint32_t width, uint32_t height,
+      const std::shared_ptr<const Box_cmpd>& cmpd,
+      const std::shared_ptr<const Box_uncC>& uncC) const override;
+};
+
+#endif // LIBHEIF_UNC_DECODER_BYTEALIGN_COMPONENT_INTERLEAVE_H