summaryrefslogtreecommitdiff
path: root/subprojects/pixpat/pixpat-native/src/io
diff options
context:
space:
mode:
authorTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2026-05-08 17:22:58 +0300
committerTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2026-05-08 17:22:58 +0300
commit4e2b291a4acdc2cbd39f005c88bda363bc06bd34 (patch)
treee90048d5973ad1164b109d575cf577af7daf50be /subprojects/pixpat/pixpat-native/src/io
parent8f94b39040e79eccd9312ed1e467fe8ebfab8860 (diff)
parente0b7d30fd437292c88141fb08d60681870b86c6e (diff)
Merge commit 'e0b7d30fd437292c88141fb08d60681870b86c6e' as 'subprojects/pixpat'
Diffstat (limited to 'subprojects/pixpat/pixpat-native/src/io')
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/bayer.h318
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/csi2.h80
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/detail.h62
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/gray.h153
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/gray_packed.h78
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/mono_rgb.h72
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/packed.h106
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/packed_yuv.h89
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/planar.h257
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/semiplanar.h242
10 files changed, 1457 insertions, 0 deletions
diff --git a/subprojects/pixpat/pixpat-native/src/io/bayer.h b/subprojects/pixpat/pixpat-native/src/io/bayer.h
new file mode 100644
index 0000000..6b30c0e
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/bayer.h
@@ -0,0 +1,318 @@
+#pragma once
+
+// Bayer raw read/write support.
+//
+// Write side: each pixel carries one of R/G/B selected by (x mod 2,
+// y mod 2) and a fixed BayerOrder. Two missing channels per pixel are
+// dropped on encode.
+//
+// Read side: bilinear demosaic over a 3x3 window. The pixel's own
+// channel comes from self; missing channels are averaged from the
+// same-channel neighbours that the Bayer phase guarantees to exist:
+//
+// * At an R or B pixel, all four cardinal (N, E, S, W) neighbours
+// carry G and all four diagonal (NE, NW, SE, SW) neighbours carry
+// the other colour, so each missing channel averages four samples.
+// * At a G pixel, one missing colour sits in the row neighbours
+// (W, E) and the other in the column neighbours (N, S), so each
+// missing channel averages two samples.
+//
+// Sampled coordinates are clamped to the image bounds.
+//
+// The Layout shape is the same as a Y-only single-plane format
+// (storage carries one component plus optional X padding); the
+// BayerOrder is a separate template parameter on the Source / Sink.
+
+#include <array>
+#include <cstdint>
+
+#include "../layout.h"
+#include "csi2.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+enum class BayerOrder { RGGB, BGGR, GRBG, GBRG };
+
+namespace detail
+{
+constexpr C bayer_pick(BayerOrder o, bool x_even, bool y_even) noexcept
+{
+ switch (o) {
+ case BayerOrder::RGGB:
+ return y_even ? (x_even ? C::R : C::G)
+ : (x_even ? C::G : C::B);
+ case BayerOrder::BGGR:
+ return y_even ? (x_even ? C::B : C::G)
+ : (x_even ? C::G : C::R);
+ case BayerOrder::GRBG:
+ return y_even ? (x_even ? C::G : C::R)
+ : (x_even ? C::B : C::G);
+ case BayerOrder::GBRG:
+ return y_even ? (x_even ? C::G : C::B)
+ : (x_even ? C::R : C::G);
+ }
+ return C::G;
+}
+
+constexpr size_t clamp_coord(int v, size_t max_excl) noexcept
+{
+ if (v < 0)
+ return 0;
+ if (size_t(v) >= max_excl)
+ return max_excl - 1;
+ return size_t(v);
+}
+} // namespace detail
+
+template <typename L, BayerOrder Order>
+struct BayerSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static_assert(y_idx < P::num_comps);
+
+ static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ return detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]);
+ }
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ size_t W, size_t H) noexcept
+ {
+ const bool x_even = (x & 1) == 0;
+ const bool y_even = (y & 1) == 0;
+ const C self = detail::bayer_pick(Order, x_even, y_even);
+
+ const size_t xL = detail::clamp_coord(int(x) - 1, W);
+ const size_t xR = detail::clamp_coord(int(x) + 1, W);
+ const size_t yT = detail::clamp_coord(int(y) - 1, H);
+ const size_t yB = detail::clamp_coord(int(y) + 1, H);
+
+ const uint16_t s = read_sample(buf, x, y);
+
+ uint16_t r = 0, g = 0, b = 0;
+
+ if (self == C::G) {
+ const C h_color = detail::bayer_pick(Order, !x_even, y_even);
+ const uint16_t h_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, y))
+ + read_sample(buf, xR, y) + 1u) >> 1);
+ const uint16_t v_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB) + 1u) >> 1);
+ g = s;
+ if (h_color == C::R) { r = h_avg; b = v_avg; }
+ else { b = h_avg; r = v_avg; }
+ } else {
+ const uint16_t g_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB)
+ + read_sample(buf, xL, y)
+ + read_sample(buf, xR, y) + 2u) >> 2);
+ const uint16_t o_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, yT))
+ + read_sample(buf, xR, yT)
+ + read_sample(buf, xL, yB)
+ + read_sample(buf, xR, yB) + 2u) >> 2);
+ g = g_avg;
+ if (self == C::R) { r = s; b = o_avg; }
+ else { b = s; r = o_avg; }
+ }
+
+ return RGB16{ r, g, b, uint16_t(0) };
+ }
+};
+
+template <typename L, BayerOrder Order>
+struct BayerSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(y_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][1]) noexcept
+ {
+ const C pick = detail::bayer_pick(Order, (bx & 1) == 0,
+ (by & 1) == 0);
+ const uint16_t val = pick == C::R ? block[0][0].r
+ : pick == C::G ? block[0][0].g
+ : block[0][0].b;
+
+ std::array<uint16_t, P::num_comps> v{};
+ v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, val);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+// Aliases so X-macro can register without nested template-template params.
+template <typename L> using BayerSource_RGGB = BayerSource<L, BayerOrder::RGGB>;
+template <typename L> using BayerSource_BGGR = BayerSource<L, BayerOrder::BGGR>;
+template <typename L> using BayerSource_GRBG = BayerSource<L, BayerOrder::GRBG>;
+template <typename L> using BayerSource_GBRG = BayerSource<L, BayerOrder::GBRG>;
+
+template <typename L> using BayerSink_RGGB = BayerSink<L, BayerOrder::RGGB>;
+template <typename L> using BayerSink_BGGR = BayerSink<L, BayerOrder::BGGR>;
+template <typename L> using BayerSink_GRBG = BayerSink<L, BayerOrder::GRBG>;
+template <typename L> using BayerSink_GBRG = BayerSink<L, BayerOrder::GBRG>;
+
+// MIPI CSI-2 packed Bayer. The bit layout doesn't fit
+// `Plane<Storage, Comp...>` because each pixel's bits span two
+// non-contiguous bytes, so we use the shared CSI-2 helper (io/csi2.h)
+// to (un)pack samples.
+//
+// The Layout slot is a placeholder (matches the unpacked Bayer of the
+// same bit-depth so the user-facing API can pick the right buffer
+// shape); bytes_per_pixel from the Plane is unused.
+template <typename L, BayerOrder Order, size_t BitDepth>
+struct BayerPackedSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ // Stored N-bit value upshifts to normalized-16 by `<< (16-N)`,
+ // matching the unpacked Bayer source.
+ static constexpr unsigned shift = 16 - BitDepth;
+
+ static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept
+ {
+ const uint8_t* src = buf.data[0] + y * buf.stride[0]
+ + (x / ppg) * bpg;
+ const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg);
+ return uint16_t(val << shift);
+ }
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ size_t W, size_t H) noexcept
+ {
+ const bool x_even = (x & 1) == 0;
+ const bool y_even = (y & 1) == 0;
+ const C self = detail::bayer_pick(Order, x_even, y_even);
+
+ const size_t xL = detail::clamp_coord(int(x) - 1, W);
+ const size_t xR = detail::clamp_coord(int(x) + 1, W);
+ const size_t yT = detail::clamp_coord(int(y) - 1, H);
+ const size_t yB = detail::clamp_coord(int(y) + 1, H);
+
+ const uint16_t s = read_sample(buf, x, y);
+
+ uint16_t r = 0, g = 0, b = 0;
+
+ if (self == C::G) {
+ const C h_color = detail::bayer_pick(Order, !x_even, y_even);
+ const uint16_t h_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, y))
+ + read_sample(buf, xR, y) + 1u) >> 1);
+ const uint16_t v_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB) + 1u) >> 1);
+ g = s;
+ if (h_color == C::R) { r = h_avg; b = v_avg; }
+ else { b = h_avg; r = v_avg; }
+ } else {
+ const uint16_t g_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB)
+ + read_sample(buf, xL, y)
+ + read_sample(buf, xR, y) + 2u) >> 2);
+ const uint16_t o_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, yT))
+ + read_sample(buf, xR, yT)
+ + read_sample(buf, xL, yB)
+ + read_sample(buf, xR, yB) + 2u) >> 2);
+ g = g_avg;
+ if (self == C::R) { r = s; b = o_avg; }
+ else { b = s; r = o_avg; }
+ }
+
+ return RGB16{ r, g, b, uint16_t(0) };
+ }
+};
+
+template <typename L, BayerOrder Order, size_t BitDepth>
+struct BayerPackedSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppg;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][ppg]) noexcept
+ {
+ std::array<uint16_t, ppg> vals{};
+ for (size_t i = 0; i < ppg; ++i) {
+ const C pick = detail::bayer_pick(
+ Order, ((bx + i) & 1) == 0, (by & 1) == 0);
+ const uint16_t norm =
+ pick == C::R ? block[0][i].r
+ : pick == C::G ? block[0][i].g
+ : block[0][i].b;
+ vals[i] = uint16_t(norm >> (16 - BitDepth));
+ }
+
+ uint8_t* dst = buf.data[0] + by * buf.stride[0]
+ + (bx / ppg) * bpg;
+ detail::csi2::pack_group<BitDepth>(dst, vals);
+ }
+};
+
+template <typename L> using BayerPackedSource_RGGB10 = BayerPackedSource<L, BayerOrder::RGGB, 10>;
+template <typename L> using BayerPackedSource_BGGR10 = BayerPackedSource<L, BayerOrder::BGGR, 10>;
+template <typename L> using BayerPackedSource_GRBG10 = BayerPackedSource<L, BayerOrder::GRBG, 10>;
+template <typename L> using BayerPackedSource_GBRG10 = BayerPackedSource<L, BayerOrder::GBRG, 10>;
+template <typename L> using BayerPackedSource_RGGB12 = BayerPackedSource<L, BayerOrder::RGGB, 12>;
+template <typename L> using BayerPackedSource_BGGR12 = BayerPackedSource<L, BayerOrder::BGGR, 12>;
+template <typename L> using BayerPackedSource_GRBG12 = BayerPackedSource<L, BayerOrder::GRBG, 12>;
+template <typename L> using BayerPackedSource_GBRG12 = BayerPackedSource<L, BayerOrder::GBRG, 12>;
+
+template <typename L> using BayerPackedSink_RGGB10 = BayerPackedSink<L, BayerOrder::RGGB, 10>;
+template <typename L> using BayerPackedSink_BGGR10 = BayerPackedSink<L, BayerOrder::BGGR, 10>;
+template <typename L> using BayerPackedSink_GRBG10 = BayerPackedSink<L, BayerOrder::GRBG, 10>;
+template <typename L> using BayerPackedSink_GBRG10 = BayerPackedSink<L, BayerOrder::GBRG, 10>;
+template <typename L> using BayerPackedSink_RGGB12 = BayerPackedSink<L, BayerOrder::RGGB, 12>;
+template <typename L> using BayerPackedSink_BGGR12 = BayerPackedSink<L, BayerOrder::BGGR, 12>;
+template <typename L> using BayerPackedSink_GRBG12 = BayerPackedSink<L, BayerOrder::GRBG, 12>;
+template <typename L> using BayerPackedSink_GBRG12 = BayerPackedSink<L, BayerOrder::GBRG, 12>;
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/csi2.h b/subprojects/pixpat/pixpat-native/src/io/csi2.h
new file mode 100644
index 0000000..59a8f8d
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/csi2.h
@@ -0,0 +1,80 @@
+#pragma once
+
+// Shared MIPI CSI-2 byte (un)packing for the 10P / 12P forms used by
+// Bayer raw and Y-only grayscale.
+//
+// 10P: 4 samples in 5 bytes — bytes 0..3 hold the high 8 bits of
+// samples 0..3; byte 4 holds 4 x 2 LSBs (sample 0 in bits 6..7,
+// sample 1 in bits 4..5, ...).
+// 12P: 2 samples in 3 bytes — bytes 0..1 hold the high 8 bits of
+// samples 0..1; byte 2 holds 2 x 4 LSBs (sample 0 in bits 4..7,
+// sample 1 in bits 0..3).
+//
+// Helpers deal in the stored integer (low BitDepth bits set);
+// normalization to/from the 16-bit pivot stays in the caller.
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+namespace pixpat::detail::csi2
+{
+
+template <size_t BitDepth>
+struct packed_traits;
+
+template <>
+struct packed_traits<10> {
+ static constexpr size_t ppg = 4;
+ static constexpr size_t bpg = 5;
+};
+
+template <>
+struct packed_traits<12> {
+ static constexpr size_t ppg = 2;
+ static constexpr size_t bpg = 3;
+};
+
+// Extract one BitDepth-bit sample from a packed group, where `i` is the
+// in-group index (0..ppg-1). The returned value occupies the low
+// BitDepth bits.
+template <size_t BitDepth>
+inline uint16_t unpack_sample(const uint8_t* src, size_t i) noexcept
+{
+ if constexpr (BitDepth == 10) {
+ const uint8_t hi = src[i];
+ const uint8_t lsb = (src[4] >> ((3 - i) * 2)) & 0x03;
+ return uint16_t((hi << 2) | lsb);
+ } else { // 12
+ const uint8_t hi = src[i];
+ const uint8_t lsb = (i == 0) ? ((src[2] >> 4) & 0x0F)
+ : (src[2] & 0x0F);
+ return uint16_t((hi << 4) | lsb);
+ }
+}
+
+// Write `ppg` BitDepth-bit samples (low BitDepth bits significant) into
+// a packed group of `bpg` bytes.
+template <size_t BitDepth>
+inline void pack_group(
+ uint8_t* dst,
+ const std::array<uint16_t, packed_traits<BitDepth>::ppg>& vals) noexcept
+{
+ if constexpr (BitDepth == 10) {
+ dst[0] = (vals[0] >> 2) & 0xFF;
+ dst[1] = (vals[1] >> 2) & 0xFF;
+ dst[2] = (vals[2] >> 2) & 0xFF;
+ dst[3] = (vals[3] >> 2) & 0xFF;
+ dst[4] = ((vals[0] & 0x03) << 6)
+ | ((vals[1] & 0x03) << 4)
+ | ((vals[2] & 0x03) << 2)
+ | ((vals[3] & 0x03) << 0);
+ } else { // 12
+ dst[0] = (vals[0] >> 4) & 0xFF;
+ dst[1] = (vals[1] >> 4) & 0xFF;
+ dst[2] = ((vals[0] & 0x0F) << 4)
+ | ((vals[1] & 0x0F) << 0);
+ }
+}
+
+} // namespace pixpat::detail::csi2
diff --git a/subprojects/pixpat/pixpat-native/src/io/detail.h b/subprojects/pixpat/pixpat-native/src/io/detail.h
new file mode 100644
index 0000000..cb2b9fb
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/detail.h
@@ -0,0 +1,62 @@
+#pragma once
+
+// Per-component encode/decode against the descriptor + memcpy-based
+// load/store_word helpers. Shared by every Source / Sink template.
+
+#include <cstdint>
+#include <cstring>
+
+#include "../layout.h"
+
+namespace pixpat::detail
+{
+
+// Decode an N-bit stored value into the 16-bit normalized space and
+// encode it back. Decode bit-replicates the stored value across the 16
+// bits so that N-bit max maps to normalized max (e.g. 8-bit 0xFF →
+// 0xFFFF, not 0xFF00). Encode is a plain truncating right-shift: the
+// replicated bits land in the low (16-N) bits and get dropped, so
+// stored→norm→stored is exact for any N in [1, 16].
+//
+// `bits` is taken at runtime; in every call site it traces back to a
+// constexpr Plane::comps[I].bits read, which the optimizer constant-
+// folds after inlining.
+
+constexpr uint16_t decode_norm(unsigned bits, uint16_t stored) noexcept
+{
+ const int N = int(bits);
+ // Loop, not a single OR: one replication only covers 2N bits, so
+ // N < 8 (RGB565, RGBA4444, 1-bit alpha, ...) needs multiple tiles.
+ uint32_t result = 0;
+ for (int s = 16 - N; s > -N; s -= N) {
+ if (s >= 0)
+ result |= uint32_t(stored) << s;
+ else
+ result |= uint32_t(stored) >> -s;
+ }
+ return uint16_t(result);
+}
+
+constexpr uint16_t encode_norm(unsigned bits, uint16_t norm) noexcept
+{
+ return uint16_t(norm >> (16u - bits));
+}
+
+// Read one storage word from `p`. memcpy is uniform for tight and
+// non-tight (e.g. BGR888 24-bit) layouts; the optimizer folds it to a
+// single load when the size is constant.
+template <typename Plane>
+inline typename Plane::storage_t load_word(const uint8_t* p) noexcept
+{
+ typename Plane::storage_t word{};
+ std::memcpy(&word, p, Plane::bytes_per_pixel);
+ return word;
+}
+
+template <typename Plane>
+inline void store_word(uint8_t* p, typename Plane::storage_t word) noexcept
+{
+ std::memcpy(p, &word, Plane::bytes_per_pixel);
+}
+
+} // namespace pixpat::detail
diff --git a/subprojects/pixpat/pixpat-native/src/io/gray.h b/subprojects/pixpat/pixpat-native/src/io/gray.h
new file mode 100644
index 0000000..d175b68
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/gray.h
@@ -0,0 +1,153 @@
+#pragma once
+
+// Grayscale (Y8 / Y10 / Y12 / Y16) and multi-pixel-per-word grayscale
+// (XYYY2101010: 3 Y components in one uint32_t). Modeled as a YUV format
+// with neutral chroma synthesized on read so cross-color-kind ColorXfm
+// produces R=G=B=Y'. The sink encodes Y from YUV16 and ignores U/V.
+// Y10/Y12 carry an X padding bitfield which we zero out on write.
+// Neutral chroma in normalized-16 is 0x8000 (the midpoint of [0, 0xFFFF]).
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct GraySource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static_assert(y_idx < P::num_comps);
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ return YUV16{
+ detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct GraySink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(y_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][1]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, block[0][0].y);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+// Multi-pixel-per-word grayscale. The Layout carries one C::Y entry per
+// pixel in the group; pixels_per_word is derived from how many C::Y
+// entries the layout has. All Y components must share the same bit width
+// (so the encode/decode shift is shared). block_w = ppw so the sink
+// writes one storage word per block.
+template <typename L>
+struct MultiPixelGraySource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t ppw = P::template component_count<C::Y>();
+ static_assert(ppw >= 1);
+
+ // All Y positions share the same bit width.
+ static constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits;
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const size_t gx = x / ppw;
+ const size_t off = x % ppw;
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + gx * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+
+ // find_pos walks the comps array at runtime; comps is constexpr
+ // and num_comps is small (≤4 for these formats), so it inlines.
+ const size_t y_pos = P::template find_pos<C::Y>(off);
+
+ return YUV16{
+ detail::decode_norm(y_bits, vals[y_pos]),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelGraySink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t ppw = P::template component_count<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(ppw >= 1);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppw;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppw]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ // All Y slots share the same bit width.
+ constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits;
+ for (size_t i = 0; i < ppw; ++i) {
+ const size_t pos = P::template find_pos<C::Y>(i);
+ v[pos] = detail::encode_norm(y_bits, block[0][i].y);
+ }
+
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + (bx / ppw) * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/gray_packed.h b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h
new file mode 100644
index 0000000..dc1fa68
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h
@@ -0,0 +1,78 @@
+#pragma once
+
+// MIPI CSI-2 packed grayscale (Y10P / Y12P). Same byte packing as
+// Bayer10P/Bayer12P (see io/csi2.h) but every sample is Y; the source
+// emits neutral chroma to keep cross-color-kind ColorXfm consistent
+// with GraySource.
+//
+// The Layout slot is a placeholder (matches the unpacked Y8 storage
+// shape so dispatch plumbing is uniform); bytes_per_pixel from the
+// Plane is unused.
+
+#include <array>
+#include <cstdint>
+
+#include "../layout.h"
+#include "csi2.h"
+
+namespace pixpat
+{
+
+template <typename L, size_t BitDepth>
+struct GrayPackedSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+ static constexpr unsigned shift = 16 - BitDepth;
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* src = buf.data[0] + y * buf.stride[0]
+ + (x / ppg) * bpg;
+ const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg);
+ return YUV16{
+ uint16_t(val << shift),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L, size_t BitDepth>
+struct GrayPackedSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppg;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppg]) noexcept
+ {
+ std::array<uint16_t, ppg> vals{};
+ for (size_t i = 0; i < ppg; ++i)
+ vals[i] = uint16_t(block[0][i].y >> (16 - BitDepth));
+
+ uint8_t* dst = buf.data[0] + by * buf.stride[0]
+ + (bx / ppg) * bpg;
+ detail::csi2::pack_group<BitDepth>(dst, vals);
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h
new file mode 100644
index 0000000..f2f8206
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h
@@ -0,0 +1,72 @@
+#pragma once
+
+// Single-channel RGB formats (R8). Storage carries one R component;
+// MonoRGBSource synthesizes G=B=R on read so cross-color-kind ColorXfm
+// produces sensible Y from R alone. MonoRGBSink encodes R and ignores
+// G/B/A (and zeroes any X padding). Symmetric to GraySource/GraySink
+// (io/gray.h) but for ColorKind::RGB on C::R.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct MonoRGBSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t r_idx = P::template find_pos<C::R>();
+ static_assert(r_idx < P::num_comps);
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ const uint16_t r = detail::decode_norm(P::comps[r_idx].bits, vals[r_idx]);
+ return RGB16{ r, r, r, uint16_t(0) };
+ }
+};
+
+template <typename L>
+struct MonoRGBSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t r_idx = P::template find_pos<C::R>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(r_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][1]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[r_idx] = detail::encode_norm(P::comps[r_idx].bits, block[0][0].r);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/packed.h b/subprojects/pixpat/pixpat-native/src/io/packed.h
new file mode 100644
index 0000000..9d953bc
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/packed.h
@@ -0,0 +1,106 @@
+#pragma once
+
+// Single-plane, single-pixel-per-storage-word formats. Works for both
+// RGB layouts (XRGB8888, RGB565, ABGR16161616, ...) and YUV
+// single-pixel layouts (XVUY2101010, AVUY16161616). Pixel type follows
+// L::kind; the three mandatory components are R/G/B for RGB or Y/U/V
+// for YUV. Both `RGB16` and `YUV16` are 4 uint16_t with the alpha last,
+// so aggregate-init by position works for either.
+
+#include <array>
+#include <type_traits>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PackedSource {
+ using Layout = L;
+ using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>;
+
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y;
+ static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U;
+ static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V;
+
+ static constexpr size_t i0 = P::template find_pos<c0>();
+ static constexpr size_t i1 = P::template find_pos<c1>();
+ static constexpr size_t i2 = P::template find_pos<c2>();
+ static constexpr size_t a_idx = P::template find_pos<C::A>();
+ static constexpr bool has_a = (a_idx < P::num_comps);
+ static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps);
+
+ static Pixel read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0] + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ Pixel out{
+ detail::decode_norm(P::comps[i0].bits, vals[i0]),
+ detail::decode_norm(P::comps[i1].bits, vals[i1]),
+ detail::decode_norm(P::comps[i2].bits, vals[i2]),
+ uint16_t(0),
+ };
+ if constexpr (has_a)
+ out.a = detail::decode_norm(P::comps[a_idx].bits, vals[a_idx]);
+ return out;
+ }
+};
+
+template <typename L>
+struct PackedSink {
+ using Layout = L;
+ using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>;
+
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y;
+ static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U;
+ static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V;
+
+ static constexpr size_t i0 = P::template find_pos<c0>();
+ static constexpr size_t i1 = P::template find_pos<c1>();
+ static constexpr size_t i2 = P::template find_pos<c2>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr size_t a_idx = P::template find_pos<C::A>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static constexpr bool has_a = (a_idx < P::num_comps);
+ static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ // Aggregate-init access to RGB16/YUV16 by position: .r/.y, .g/.u, .b/.v.
+ // We use the field names corresponding to L::kind.
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const Pixel (&block)[1][1]) noexcept
+ {
+ const Pixel& pix = block[0][0];
+ std::array<uint16_t, P::num_comps> v{};
+ if constexpr (L::kind == ColorKind::RGB) {
+ v[i0] = detail::encode_norm(P::comps[i0].bits, pix.r);
+ v[i1] = detail::encode_norm(P::comps[i1].bits, pix.g);
+ v[i2] = detail::encode_norm(P::comps[i2].bits, pix.b);
+ } else {
+ v[i0] = detail::encode_norm(P::comps[i0].bits, pix.y);
+ v[i1] = detail::encode_norm(P::comps[i1].bits, pix.u);
+ v[i2] = detail::encode_norm(P::comps[i2].bits, pix.v);
+ }
+ if constexpr (has_x)
+ v[x_idx] = 0;
+ if constexpr (has_a)
+ v[a_idx] = detail::encode_norm(P::comps[a_idx].bits, pix.a);
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0] + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h
new file mode 100644
index 0000000..90c8b2f
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h
@@ -0,0 +1,89 @@
+#pragma once
+
+// Packed YUV 4:2:2 (YUYV / YVYU / UYVY / VYUY): two pixels per 32-bit
+// word, one shared chroma pair. The Layout uses two C::Y entries plus
+// one each of C::U / C::V; we resolve the duplicate Y via
+// find_pos<C::Y>(n).
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PackedYUVSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(L::h_sub == 2 && L::v_sub == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y0_idx = P::template find_pos<C::Y>(0);
+ static constexpr size_t y1_idx = P::template find_pos<C::Y>(1);
+ static constexpr size_t u_idx = P::template find_pos<C::U>();
+ static constexpr size_t v_idx = P::template find_pos<C::V>();
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + (x / 2) * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ const size_t y_pick = (x & 1) ? y1_idx : y0_idx;
+ // Both Y components share the same bit width, so the bit-width
+ // for y0 and y1 is identical — pick either.
+ return YUV16{
+ detail::decode_norm(P::comps[y0_idx].bits, vals[y_pick]),
+ detail::decode_norm(P::comps[u_idx].bits, vals[u_idx]),
+ detail::decode_norm(P::comps[v_idx].bits, vals[v_idx]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct PackedYUVSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(L::h_sub == 2 && L::v_sub == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y0_idx = P::template find_pos<C::Y>(0);
+ static constexpr size_t y1_idx = P::template find_pos<C::Y>(1);
+ static constexpr size_t u_idx = P::template find_pos<C::U>();
+ static constexpr size_t v_idx = P::template find_pos<C::V>();
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 2;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][2]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[y0_idx] = detail::encode_norm(P::comps[y0_idx].bits, block[0][0].y);
+ v[y1_idx] = detail::encode_norm(P::comps[y1_idx].bits, block[0][1].y);
+ // Integer chroma averaging in normalized-16 space. Truncates
+ // (no round-half-up).
+ v[u_idx] = detail::encode_norm(P::comps[u_idx].bits, uint16_t(
+ (uint32_t(block[0][0].u) +
+ uint32_t(block[0][1].u)) / 2));
+ v[v_idx] = detail::encode_norm(P::comps[v_idx].bits, uint16_t(
+ (uint32_t(block[0][0].v) +
+ uint32_t(block[0][1].v)) / 2));
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + (bx / 2) * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/planar.h b/subprojects/pixpat/pixpat-native/src/io/planar.h
new file mode 100644
index 0000000..0dab685
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/planar.h
@@ -0,0 +1,257 @@
+#pragma once
+
+// 3-plane planar YUV. Two flavours:
+//
+// PlanarSource / PlanarSink — YUV/YVU 420/422/444, single Y per word,
+// single chroma per word. Chroma is averaged over h_sub × v_sub
+// on write.
+//
+// MultiPixelPlanarSource / MultiPixelPlanarSink — T430, multi-pixel-
+// per-word planar 4:4:4 (3 samples per uint32_t in each of 3
+// planes, plus 2-bit X padding). block_w = ppw, block_h = 1.
+//
+// Plane indices for Y / U / V are looked up via Layout::find_plane<C>(),
+// so swap_uv layouts (YVU vs YUV) work without separate templates.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PlanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static YUV16 read(const Buffer<3>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane]
+ + x * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const uint8_t* up = buf.data[u_plane] + cy * buf.stride[u_plane]
+ + cx * UP::bytes_per_pixel;
+ const uint8_t* vp = buf.data[v_plane] + cy * buf.stride[v_plane]
+ + cx * VP::bytes_per_pixel;
+ const auto u_vals = UP::unpack(detail::load_word<UP>(up));
+ const auto v_vals = VP::unpack(detail::load_word<VP>(vp));
+
+ return YUV16{
+ detail::decode_norm(YP::comps[0].bits, y_vals[0]),
+ detail::decode_norm(UP::comps[0].bits, u_vals[0]),
+ detail::decode_norm(VP::comps[0].bits, v_vals[0]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct PlanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t block_h = L::v_sub;
+ static constexpr size_t block_w = L::h_sub;
+
+ static void write_block(Buffer<3>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y per pixel.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[y_plane]
+ + (by + dy) * buf.stride[y_plane];
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ std::array<uint16_t, YP::num_comps> v{};
+ v[0] = detail::encode_norm(YP::comps[0].bits, block[dy][dx].y);
+ detail::store_word<YP>(
+ y_row + (bx + dx) * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One averaged U and V sample per block. Integer truncation
+ // (no round-half-up).
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ u_sum += block[dy][dx].u;
+ v_sum += block[dy][dx].v;
+ }
+ }
+ constexpr uint32_t n = block_h * block_w;
+
+ const size_t cx = bx / L::h_sub;
+ const size_t cy = by / L::v_sub;
+
+ std::array<uint16_t, UP::num_comps> uw{};
+ uw[0] = detail::encode_norm(UP::comps[0].bits, uint16_t(u_sum / n));
+ detail::store_word<UP>(
+ buf.data[u_plane] + cy * buf.stride[u_plane]
+ + cx * UP::bytes_per_pixel,
+ UP::pack(uw));
+
+ std::array<uint16_t, VP::num_comps> vw{};
+ vw[0] = detail::encode_norm(VP::comps[0].bits, uint16_t(v_sum / n));
+ detail::store_word<VP>(
+ buf.data[v_plane] + cy * buf.stride[v_plane]
+ + cx * VP::bytes_per_pixel,
+ VP::pack(vw));
+ }
+};
+
+// T430-style 3-plane multi-pixel-per-word planar 4:4:4. Each plane has
+// `ppw` samples of the same component (Y in plane 0, U in 1, V in 2 —
+// or whichever ordering find_plane resolves) packed into a single
+// storage word. block_w = ppw, block_h = 1. No chroma subsampling.
+template <typename L>
+struct MultiPixelPlanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+ static_assert(L::h_sub == 1 && L::v_sub == 1);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t ppw = YP::template component_count<C::Y>();
+ static_assert(ppw == UP::template component_count<C::U>());
+ static_assert(ppw == VP::template component_count<C::V>());
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits;
+
+ static YUV16 read(const Buffer<3>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const size_t gx = x / ppw;
+ const size_t off = x % ppw;
+
+ const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane]
+ + gx * YP::bytes_per_pixel;
+ const uint8_t* up = buf.data[u_plane] + y * buf.stride[u_plane]
+ + gx * UP::bytes_per_pixel;
+ const uint8_t* vp = buf.data[v_plane] + y * buf.stride[v_plane]
+ + gx * VP::bytes_per_pixel;
+
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+ const auto u_vals = UP::unpack(detail::load_word<UP>(up));
+ const auto v_vals = VP::unpack(detail::load_word<VP>(vp));
+
+ return YUV16{
+ detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(off)]),
+ detail::decode_norm(u_bits, u_vals[UP::template find_pos<C::U>(off)]),
+ detail::decode_norm(v_bits, v_vals[VP::template find_pos<C::V>(off)]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelPlanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+ static_assert(L::h_sub == 1 && L::v_sub == 1);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t ppw = YP::template component_count<C::Y>();
+
+ static constexpr size_t y_x_idx = YP::template find_pos<C::X>();
+ static constexpr size_t u_x_idx = UP::template find_pos<C::X>();
+ static constexpr size_t v_x_idx = VP::template find_pos<C::X>();
+ static constexpr bool y_has_x = (y_x_idx < YP::num_comps);
+ static constexpr bool u_has_x = (u_x_idx < UP::num_comps);
+ static constexpr bool v_has_x = (v_x_idx < VP::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppw;
+
+ static void write_block(Buffer<3>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppw]) noexcept
+ {
+ std::array<uint16_t, YP::num_comps> yv{};
+ std::array<uint16_t, UP::num_comps> uv{};
+ std::array<uint16_t, VP::num_comps> vv{};
+
+ // All same-tag positions share the same bit width.
+ constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits;
+ constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits;
+ for (size_t i = 0; i < ppw; ++i) {
+ yv[YP::template find_pos<C::Y>(i)] =
+ detail::encode_norm(y_bits, block[0][i].y);
+ uv[UP::template find_pos<C::U>(i)] =
+ detail::encode_norm(u_bits, block[0][i].u);
+ vv[VP::template find_pos<C::V>(i)] =
+ detail::encode_norm(v_bits, block[0][i].v);
+ }
+
+ if constexpr (y_has_x) yv[y_x_idx] = 0;
+ if constexpr (u_has_x) uv[u_x_idx] = 0;
+ if constexpr (v_has_x) vv[v_x_idx] = 0;
+
+ const size_t gx = bx / ppw;
+ detail::store_word<YP>(
+ buf.data[y_plane] + by * buf.stride[y_plane]
+ + gx * YP::bytes_per_pixel,
+ YP::pack(yv));
+ detail::store_word<UP>(
+ buf.data[u_plane] + by * buf.stride[u_plane]
+ + gx * UP::bytes_per_pixel,
+ UP::pack(uv));
+ detail::store_word<VP>(
+ buf.data[v_plane] + by * buf.stride[v_plane]
+ + gx * VP::bytes_per_pixel,
+ VP::pack(vv));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/semiplanar.h b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h
new file mode 100644
index 0000000..00e7731
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h
@@ -0,0 +1,242 @@
+#pragma once
+
+// 2-plane semiplanar YUV. Two flavours:
+//
+// SemiplanarSource / SemiplanarSink — NV12/NV21/NV16/NV61, single
+// pixel per Y storage word, single chroma pair per chroma word.
+//
+// MultiPixelSemiplanarSource / MultiPixelSemiplanarSink — P030/P230,
+// multiple Y pixels per Y word and multiple chroma pairs per
+// chroma word. The Y plane has `ppw_y = component_count<Y>()` Y
+// samples per storage word; the chroma plane has `pairs =
+// component_count<U>()` U/V pairs per storage word. block_w =
+// pairs × h_sub, block_h = v_sub — each block exactly fills one
+// chroma word.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct SemiplanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t y_idx = YP::template find_pos<C::Y>();
+ static constexpr size_t u_idx = CP::template find_pos<C::U>();
+ static constexpr size_t v_idx = CP::template find_pos<C::V>();
+
+ static YUV16 read(const Buffer<2>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* yp = buf.data[0] + y * buf.stride[0] + x * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel;
+ const auto c_vals = CP::unpack(detail::load_word<CP>(cp));
+
+ return YUV16{
+ detail::decode_norm(YP::comps[y_idx].bits, y_vals[y_idx]),
+ detail::decode_norm(CP::comps[u_idx].bits, c_vals[u_idx]),
+ detail::decode_norm(CP::comps[v_idx].bits, c_vals[v_idx]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct SemiplanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t y_idx = YP::template find_pos<C::Y>();
+ static constexpr size_t u_idx = CP::template find_pos<C::U>();
+ static constexpr size_t v_idx = CP::template find_pos<C::V>();
+
+ static constexpr size_t block_h = L::v_sub;
+ static constexpr size_t block_w = L::h_sub;
+
+ static void write_block(Buffer<2>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y per pixel.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[0] + (by + dy) * buf.stride[0];
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ std::array<uint16_t, YP::num_comps> v{};
+ v[y_idx] = detail::encode_norm(YP::comps[y_idx].bits,
+ block[dy][dx].y);
+ detail::store_word<YP>(
+ y_row + (bx + dx) * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One averaged UV pair for the whole block. Integer truncation
+ // (no round-half-up).
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ u_sum += block[dy][dx].u;
+ v_sum += block[dy][dx].v;
+ }
+ }
+ constexpr uint32_t n = block_h * block_w;
+ const uint16_t u_avg = uint16_t(u_sum / n);
+ const uint16_t v_avg = uint16_t(v_sum / n);
+
+ std::array<uint16_t, CP::num_comps> uv{};
+ uv[u_idx] = detail::encode_norm(CP::comps[u_idx].bits, u_avg);
+ uv[v_idx] = detail::encode_norm(CP::comps[v_idx].bits, v_avg);
+
+ const size_t cx = bx / L::h_sub;
+ const size_t cy = by / L::v_sub;
+ uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel;
+ detail::store_word<CP>(cp, CP::pack(uv));
+ }
+};
+
+// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). All Y
+// components share the same bit width; same for U and V.
+template <typename L>
+struct MultiPixelSemiplanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t ppw_y = YP::template component_count<C::Y>();
+ static constexpr size_t pairs = CP::template component_count<C::U>();
+ static_assert(ppw_y >= 1 && pairs >= 1);
+ static_assert(pairs == CP::template component_count<C::V>());
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits;
+
+ static YUV16 read(const Buffer<2>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ // Y read.
+ const size_t y_gx = x / ppw_y;
+ const size_t y_off = x % ppw_y;
+ const uint8_t* yp = buf.data[0] + y * buf.stride[0]
+ + y_gx * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ // Chroma read.
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const size_t c_gx = cx / pairs;
+ const size_t c_off = cx % pairs;
+ const uint8_t* cp = buf.data[1] + cy * buf.stride[1]
+ + c_gx * CP::bytes_per_pixel;
+ const auto c_vals = CP::unpack(detail::load_word<CP>(cp));
+
+ return YUV16{
+ detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(y_off)]),
+ detail::decode_norm(u_bits, c_vals[CP::template find_pos<C::U>(c_off)]),
+ detail::decode_norm(v_bits, c_vals[CP::template find_pos<C::V>(c_off)]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelSemiplanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t ppw_y = YP::template component_count<C::Y>();
+ static constexpr size_t pairs = CP::template component_count<C::U>();
+ static_assert(ppw_y >= 1 && pairs >= 1);
+
+ // One block exactly fills one chroma word: `pairs` chroma pairs,
+ // each covering h_sub luma columns × v_sub rows.
+ static constexpr size_t block_w = pairs * L::h_sub;
+ static constexpr size_t block_h = L::v_sub;
+ static_assert(block_w % ppw_y == 0,
+ "block width must be a multiple of Y-pixels-per-word");
+ static constexpr size_t y_words_per_row = block_w / ppw_y;
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits;
+
+ static void write_block(Buffer<2>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y plane: y_words_per_row Y-words per row, block_h rows.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[0]
+ + (by + dy) * buf.stride[0];
+ for (size_t w = 0; w < y_words_per_row; ++w) {
+ std::array<uint16_t, YP::num_comps> v{};
+ for (size_t i = 0; i < ppw_y; ++i) {
+ const size_t pos = YP::template find_pos<C::Y>(i);
+ v[pos] = detail::encode_norm(
+ y_bits, block[dy][w * ppw_y + i].y);
+ }
+ detail::store_word<YP>(
+ y_row + (bx / ppw_y + w)
+ * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One UV-word: `pairs` chroma pairs. Each pair averages h_sub
+ // horizontally × v_sub vertically luma values.
+ std::array<uint16_t, CP::num_comps> uv{};
+ constexpr uint32_t n = L::h_sub * L::v_sub;
+ for (size_t p = 0; p < pairs; ++p) {
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < L::h_sub; ++dx) {
+ u_sum += block[dy][p * L::h_sub + dx].u;
+ v_sum += block[dy][p * L::h_sub + dx].v;
+ }
+ }
+ uv[CP::template find_pos<C::U>(p)] =
+ detail::encode_norm(u_bits, uint16_t(u_sum / n));
+ uv[CP::template find_pos<C::V>(p)] =
+ detail::encode_norm(v_bits, uint16_t(v_sum / n));
+ }
+
+ const size_t cy = by / L::v_sub;
+ const size_t uv_word_idx = bx / block_w;
+ detail::store_word<CP>(
+ buf.data[1] + cy * buf.stride[1]
+ + uv_word_idx * CP::bytes_per_pixel,
+ CP::pack(uv));
+ }
+};
+
+} // namespace pixpat