summaryrefslogtreecommitdiff
path: root/subprojects/pixpat/pixpat-native/src
diff options
context:
space:
mode:
authorTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2026-05-08 17:22:58 +0300
committerTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2026-05-08 17:22:58 +0300
commit4e2b291a4acdc2cbd39f005c88bda363bc06bd34 (patch)
treee90048d5973ad1164b109d575cf577af7daf50be /subprojects/pixpat/pixpat-native/src
parent8f94b39040e79eccd9312ed1e467fe8ebfab8860 (diff)
parente0b7d30fd437292c88141fb08d60681870b86c6e (diff)
Merge commit 'e0b7d30fd437292c88141fb08d60681870b86c6e' as 'subprojects/pixpat'
Diffstat (limited to 'subprojects/pixpat/pixpat-native/src')
-rw-r--r--subprojects/pixpat/pixpat-native/src/color.h199
-rw-r--r--subprojects/pixpat/pixpat-native/src/error.h16
-rw-r--r--subprojects/pixpat/pixpat-native/src/format_catalog.h140
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats.h13
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/bayer.h97
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/grayscale.h78
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/rgb.h267
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h136
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h76
-rw-r--r--subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h79
-rw-r--r--subprojects/pixpat/pixpat-native/src/io.h13
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/bayer.h318
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/csi2.h80
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/detail.h62
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/gray.h153
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/gray_packed.h78
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/mono_rgb.h72
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/packed.h106
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/packed_yuv.h89
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/planar.h257
-rw-r--r--subprojects/pixpat/pixpat-native/src/io/semiplanar.h242
-rw-r--r--subprojects/pixpat/pixpat-native/src/layout.h141
-rw-r--r--subprojects/pixpat/pixpat-native/src/params.h219
-rw-r--r--subprojects/pixpat/pixpat-native/src/pattern.h597
-rw-r--r--subprojects/pixpat/pixpat-native/src/pattern_catalog.h64
-rw-r--r--subprojects/pixpat/pixpat-native/src/pipeline.h44
-rw-r--r--subprojects/pixpat/pixpat-native/src/pixpat.cpp355
-rw-r--r--subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp201
-rw-r--r--subprojects/pixpat/pixpat-native/src/pixpat_internal.h89
-rw-r--r--subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp168
-rw-r--r--subprojects/pixpat/pixpat-native/src/threading.h95
31 files changed, 4544 insertions, 0 deletions
diff --git a/subprojects/pixpat/pixpat-native/src/color.h b/subprojects/pixpat/pixpat-native/src/color.h
new file mode 100644
index 0000000..16dfb7d
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/color.h
@@ -0,0 +1,199 @@
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+
+#include "layout.h"
+
+namespace pixpat
+{
+
+// BT.601 / BT.709 / BT.2020 × Limited / Full range, dispatched at
+// runtime via a small `ColorCoeffs` struct that the caller hoists out
+// of the per-pixel loop. The convert and pattern entry points compute
+// `coeffs_for(spec)` once before the stripe loop, then pass the
+// resulting struct into every `ColorXfm::apply()` in the inner loop.
+// This avoids per-pixel matrix branching and also the alternative of
+// a 6×-instantiated template (would push the hot pivot from 121 to
+// 726 `Converter` bodies). The coefficient values are loop-invariant
+// broadcast scalars, so the compiler vectorizes the inner loop with
+// vbroadcastss + vmulps in place of constant folds.
+//
+// Math runs in float.
+
+enum class Rec : uint8_t { BT601, BT709, BT2020 };
+enum class Range : uint8_t { Limited, Full };
+
+struct ColorSpec {
+ Rec rec;
+ Range range;
+ constexpr bool operator==(const ColorSpec&) const = default;
+};
+
+inline constexpr ColorSpec kDefaultColorSpec{ Rec::BT601, Range::Limited };
+
+struct ColorCoeffs {
+ // RGB->YUV
+ float kr, kg, kb;
+ float y_scale, y_offset;
+ float c_scale, c_offset;
+ float u_factor, v_factor;
+ // YUV->RGB
+ float y_inv, c_inv;
+ float gu, gv, ru, bv;
+ // normalized 16-bit scale (kNormMax in float, plus its inverse)
+ float norm_scale, norm_inv_scale;
+};
+
+namespace detail
+{
+constexpr ColorCoeffs make_coeffs(float kr, float kg, float kb, bool full) noexcept
+{
+ const float y_min = full ? 0.0f : 16.0f / 255.0f;
+ const float y_max = full ? 1.0f : 235.0f / 255.0f;
+ const float c_min = full ? 0.0f : 16.0f / 255.0f;
+ const float c_max = full ? 1.0f : 240.0f / 255.0f;
+
+ const float y_scale = y_max - y_min;
+ const float y_offset = y_min;
+ const float c_scale = c_max - c_min;
+ const float c_offset = (c_max + c_min) * 0.5f;
+
+ const float u_factor = 1.0f / (2.0f * (1.0f - kb));
+ const float v_factor = 1.0f / (2.0f * (1.0f - kr));
+ const float y_inv = 1.0f / y_scale;
+ const float c_inv = 1.0f / c_scale;
+ const float gu = -2.0f * (1.0f - kb) * kb / kg;
+ const float gv = -2.0f * (1.0f - kr) * kr / kg;
+ const float ru = 2.0f * (1.0f - kr);
+ const float bv = 2.0f * (1.0f - kb);
+
+ const float norm_scale = float(kNormMax);
+ const float norm_inv_scale = 1.0f / norm_scale;
+
+ return ColorCoeffs{
+ kr, kg, kb,
+ y_scale, y_offset,
+ c_scale, c_offset,
+ u_factor, v_factor,
+ y_inv, c_inv,
+ gu, gv, ru, bv,
+ norm_scale, norm_inv_scale,
+ };
+}
+} // namespace detail
+
+constexpr ColorCoeffs coeffs_for(ColorSpec spec) noexcept
+{
+ const bool full = spec.range == Range::Full;
+ switch (spec.rec) {
+ case Rec::BT601: return detail::make_coeffs(0.299f, 0.587f, 0.114f, full);
+ case Rec::BT2020: return detail::make_coeffs(0.2627f, 0.6780f, 0.0593f, full);
+ default: return detail::make_coeffs(0.2126f, 0.7152f, 0.0722f, full);
+ }
+}
+
+template <typename SrcPix, typename DstPix>
+struct ColorXfm;
+
+template <>
+struct ColorXfm<RGB16, RGB16> {
+ static constexpr RGB16 apply(RGB16 p) noexcept {
+ return p;
+ }
+ static constexpr RGB16 apply(RGB16 p, const ColorCoeffs&) noexcept {
+ return p;
+ }
+};
+
+template <>
+struct ColorXfm<YUV16, YUV16> {
+ static constexpr YUV16 apply(YUV16 p) noexcept {
+ return p;
+ }
+ static constexpr YUV16 apply(YUV16 p, const ColorCoeffs&) noexcept {
+ return p;
+ }
+};
+
+// Cross-color-kind conversions reset `a` to kNormMax (sinks with X
+// write 0; sinks with A see fully opaque pixels). Within the same
+// color kind, identity ColorXfm propagates `a` unchanged.
+template <>
+struct ColorXfm<RGB16, YUV16> {
+ static YUV16 apply(RGB16 rgb, const ColorCoeffs& c) noexcept
+ {
+ const float r = float(rgb.r) * c.norm_inv_scale;
+ const float g = float(rgb.g) * c.norm_inv_scale;
+ const float b = float(rgb.b) * c.norm_inv_scale;
+
+ const float yp = c.kr * r + c.kg * g + c.kb * b;
+ const float u = (b - yp) * c.u_factor;
+ const float v = (r - yp) * c.v_factor;
+
+ // No clamp on RGB→YUV: for any uint16_t (RGB) input the
+ // output Y/U/V is structurally in [0, 1] (limited-range
+ // chroma stays within [c_min, c_max] ⊂ [0, 1]). The +0.5
+ // rounds half-up before the integer cast.
+ return YUV16{
+ uint16_t((yp * c.y_scale + c.y_offset) * c.norm_scale + 0.5f),
+ uint16_t((u * c.c_scale + c.c_offset) * c.norm_scale + 0.5f),
+ uint16_t((v * c.c_scale + c.c_offset) * c.norm_scale + 0.5f),
+ kNormMax,
+ };
+ }
+};
+
+template <>
+struct ColorXfm<YUV16, RGB16> {
+ static RGB16 apply(YUV16 yuv, const ColorCoeffs& c) noexcept
+ {
+ const float yp = (float(yuv.y) * c.norm_inv_scale - c.y_offset) * c.y_inv;
+ const float u = (float(yuv.u) * c.norm_inv_scale - c.c_offset) * c.c_inv;
+ const float v = (float(yuv.v) * c.norm_inv_scale - c.c_offset) * c.c_inv;
+
+ const float r = yp + c.ru * v;
+ const float g = yp + c.gu * u + c.gv * v;
+ const float b = yp + c.bv * u;
+
+ // Clamp on YUV→RGB: the inverse matrix produces out-of-range
+ // RGB for some valid YUV inputs. Written as min/max so it
+ // vectorizes to vminps/vmaxps; std::clamp can defeat that.
+ auto pack = [&](float x) -> uint16_t {
+ x = x * c.norm_scale + 0.5f;
+ x = std::min(std::max(x, 0.0f), c.norm_scale);
+ return uint16_t(x);
+ };
+
+ return RGB16{
+ pack(r), pack(g), pack(b),
+ kNormMax,
+ };
+ }
+};
+
+// In-place cross-color-kind passes over a normalized line buffer.
+// RGB16 and YUV16 are both 4 uint16_t with identical layout, so we
+// can memcpy through the same buffer pixel-by-pixel without aliasing.
+inline void norm_rgb_to_yuv(uint8_t* buf, size_t n, const ColorCoeffs& c) noexcept
+{
+ for (size_t i = 0; i < n; ++i) {
+ RGB16 rgb;
+ std::memcpy(&rgb, buf + i * sizeof(RGB16), sizeof(RGB16));
+ YUV16 yuv = ColorXfm<RGB16, YUV16>::apply(rgb, c);
+ std::memcpy(buf + i * sizeof(YUV16), &yuv, sizeof(YUV16));
+ }
+}
+
+inline void norm_yuv_to_rgb(uint8_t* buf, size_t n, const ColorCoeffs& c) noexcept
+{
+ for (size_t i = 0; i < n; ++i) {
+ YUV16 yuv;
+ std::memcpy(&yuv, buf + i * sizeof(YUV16), sizeof(YUV16));
+ RGB16 rgb = ColorXfm<YUV16, RGB16>::apply(yuv, c);
+ std::memcpy(buf + i * sizeof(RGB16), &rgb, sizeof(RGB16));
+ }
+}
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/error.h b/subprojects/pixpat/pixpat-native/src/error.h
new file mode 100644
index 0000000..83a3596
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/error.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <stdexcept>
+
+namespace pixpat
+{
+
+struct error : std::runtime_error {
+ using std::runtime_error::runtime_error;
+};
+
+struct invalid_argument : error {
+ using error::error;
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/format_catalog.h b/subprojects/pixpat/pixpat-native/src/format_catalog.h
new file mode 100644
index 0000000..287d773
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/format_catalog.h
@@ -0,0 +1,140 @@
+#pragma once
+
+// Catalog of every pixel format the C++ side handles. The X-macro is a
+// flat list of names:
+//
+// X(name)
+//
+// `name` is the canonical format identifier — both an internal FormatId
+// enum entry and the public string accepted by pixpat_buffer::format.
+// Each name resolves to a struct in `pixpat::formats::` (defined under
+// pixpat-native/src/formats/) that carries:
+//
+// - the layout (subsampling, planes, components)
+// - nested `Source` / `Sink` aliases for the matching I/O templates
+//
+// Adding a format = a row here AND a struct in the right
+// pixpat-native/src/formats/*.h. The codegen
+// (pixpat-native/codegen/gen_pixpat.py) parses this X-macro to learn
+// the format set; pixpat.cpp re-expands it to build s_format_info via
+// `formats::name::Source` / `formats::name::Sink`.
+//
+// FormatId is internal — the public C ABI deals in format names only.
+
+#include <cstddef>
+
+namespace pixpat
+{
+
+#define PIXPAT_FORMAT_LIST(X) \
+ X(XRGB8888) \
+ X(ARGB8888) \
+ X(XBGR8888) \
+ X(ABGR8888) \
+ X(RGBX8888) \
+ X(RGBA8888) \
+ X(BGRX8888) \
+ X(BGRA8888) \
+ X(RGB888) \
+ X(BGR888) \
+ X(RGB332) \
+ X(RGB565) \
+ X(BGR565) \
+ X(XRGB1555) \
+ X(ARGB1555) \
+ X(XBGR1555) \
+ X(ABGR1555) \
+ X(XRGB4444) \
+ X(ARGB4444) \
+ X(XBGR4444) \
+ X(ABGR4444) \
+ X(RGBX4444) \
+ X(RGBA4444) \
+ X(XRGB2101010) \
+ X(ARGB2101010) \
+ X(XBGR2101010) \
+ X(ABGR2101010) \
+ X(RGBX1010102) \
+ X(RGBA1010102) \
+ X(BGRX1010102) \
+ X(BGRA1010102) \
+ X(ABGR16161616) \
+ X(NV12) \
+ X(NV21) \
+ X(NV16) \
+ X(NV61) \
+ X(P030) \
+ X(P230) \
+ X(YUV420) \
+ X(YVU420) \
+ X(YUV422) \
+ X(YVU422) \
+ X(YUV444) \
+ X(YVU444) \
+ X(T430) \
+ X(VUY888) \
+ X(XVUY8888) \
+ X(XVUY2101010) \
+ X(AVUY16161616) \
+ X(YUYV) \
+ X(YVYU) \
+ X(UYVY) \
+ X(VYUY) \
+ X(Y210) \
+ X(Y212) \
+ X(Y216) \
+ X(Y8) \
+ X(Y10) \
+ X(Y12) \
+ X(Y16) \
+ X(R8) \
+ X(XYYY2101010) \
+ X(Y10P) \
+ X(Y12P) \
+ X(SRGGB8) \
+ X(SBGGR8) \
+ X(SGRBG8) \
+ X(SGBRG8) \
+ X(SRGGB10) \
+ X(SBGGR10) \
+ X(SGRBG10) \
+ X(SGBRG10) \
+ X(SRGGB12) \
+ X(SBGGR12) \
+ X(SGRBG12) \
+ X(SGBRG12) \
+ X(SRGGB16) \
+ X(SBGGR16) \
+ X(SGRBG16) \
+ X(SGBRG16) \
+ X(SRGGB10P) \
+ X(SBGGR10P) \
+ X(SGRBG10P) \
+ X(SGBRG10P) \
+ X(SRGGB12P) \
+ X(SBGGR12P) \
+ X(SGRBG12P) \
+ X(SGBRG12P)
+
+enum class FormatId {
+#define X(name) name,
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+ Unknown,
+};
+
+struct FormatEntry {
+ const char* name;
+ FormatId id;
+};
+
+inline constexpr FormatEntry s_format_table[] = {
+#define X(name) { #name, FormatId::name },
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+};
+
+inline constexpr size_t s_format_catalog_count =
+ sizeof(s_format_table) / sizeof(s_format_table[0]);
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/formats.h b/subprojects/pixpat/pixpat-native/src/formats.h
new file mode 100644
index 0000000..68bdeec
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats.h
@@ -0,0 +1,13 @@
+#pragma once
+
+// Aggregator: every named layout the X-macro registers lives in one of
+// the headers under formats/, organized by color kind. Format names
+// follow the kms++/pixutils convention (see formats/rgb.h for the
+// longer note; the YUYV group is an exception, see formats/yuv_packed.h).
+
+#include "formats/rgb.h"
+#include "formats/yuv_semiplanar.h"
+#include "formats/yuv_planar.h"
+#include "formats/yuv_packed.h"
+#include "formats/grayscale.h"
+#include "formats/bayer.h"
diff --git a/subprojects/pixpat/pixpat-native/src/formats/bayer.h b/subprojects/pixpat/pixpat-native/src/formats/bayer.h
new file mode 100644
index 0000000..057c342
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/bayer.h
@@ -0,0 +1,97 @@
+#pragma once
+
+// Bayer raw layouts. Each pixel carries one of R/G/B selected by
+// (x mod 2, y mod 2) and BayerOrder; the pattern is on the
+// BayerSource/BayerSink template, not the layout itself. Storage shape
+// is single-component (C::Y reused as the storage tag) so the same
+// 8/10/12/16-bit shapes apply across all four phase patterns.
+//
+// Each format is a distinct struct (rather than a type alias of one
+// another) so each format type can carry its own pattern-specific
+// Source/Sink aliases. The shared bit layout lives in a base struct per
+// (depth,packing) combination.
+//
+// ColorKind is RGB because the normalized pixel passed through ColorXfm
+// is RGB16 — the sink picks one of r/g/b at write time, and the
+// source nearest-neighbor demosaics into RGB16 at read time.
+
+#include "../layout.h"
+#include "../io/bayer.h"
+
+namespace pixpat::formats
+{
+
+namespace bayer_detail
+{
+
+// Per-(depth,packing) base layouts. Every Bayer format derives from
+// one of these and pins its own pattern-specific I/O templates.
+using Bayer8 = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint8_t, Comp { C::Y, 8, 0 }> >;
+using Bayer10 = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint16_t, Comp { C::Y, 10, 0 }, Comp { C::X, 6, 10 }> >;
+using Bayer12 = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint16_t, Comp { C::Y, 12, 0 }, Comp { C::X, 4, 12 }> >;
+using Bayer16 = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint16_t, Comp { C::Y, 16, 0 }> >;
+// MIPI CSI-2 packed Bayer (10P: 4 pix in 5 bytes; 12P: 2 pix in 3
+// bytes). The Layout doesn't capture the packed bit layout — the
+// BayerPackedSink hand-rolls the byte writes. uint8_t plane shape is
+// a placeholder so the dispatch plumbing is uniform.
+using Bayer10P = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint8_t, Comp { C::Y, 8, 0 }> >;
+using Bayer12P = Layout<ColorKind::RGB, 1, 1,
+ Plane<uint8_t, Comp { C::Y, 8, 0 }> >;
+
+} // namespace bayer_detail
+
+// Unpacked Bayer (4 patterns × 4 bit depths).
+#define PIXPAT_BAYER(name, base, pat) \
+ struct name : bayer_detail::base { \
+ using Source = BayerSource_ ## pat<name>; \
+ using Sink = BayerSink_ ## pat<name>; \
+ }
+
+PIXPAT_BAYER(SRGGB8, Bayer8, RGGB);
+PIXPAT_BAYER(SBGGR8, Bayer8, BGGR);
+PIXPAT_BAYER(SGRBG8, Bayer8, GRBG);
+PIXPAT_BAYER(SGBRG8, Bayer8, GBRG);
+
+PIXPAT_BAYER(SRGGB10, Bayer10, RGGB);
+PIXPAT_BAYER(SBGGR10, Bayer10, BGGR);
+PIXPAT_BAYER(SGRBG10, Bayer10, GRBG);
+PIXPAT_BAYER(SGBRG10, Bayer10, GBRG);
+
+PIXPAT_BAYER(SRGGB12, Bayer12, RGGB);
+PIXPAT_BAYER(SBGGR12, Bayer12, BGGR);
+PIXPAT_BAYER(SGRBG12, Bayer12, GRBG);
+PIXPAT_BAYER(SGBRG12, Bayer12, GBRG);
+
+PIXPAT_BAYER(SRGGB16, Bayer16, RGGB);
+PIXPAT_BAYER(SBGGR16, Bayer16, BGGR);
+PIXPAT_BAYER(SGRBG16, Bayer16, GRBG);
+PIXPAT_BAYER(SGBRG16, Bayer16, GBRG);
+
+#undef PIXPAT_BAYER
+
+// MIPI-packed Bayer: pattern + bit depth both encoded in the I/O
+// template name (BayerPackedSource_RGGB10, ...).
+#define PIXPAT_BAYER_PACKED(name, base, pat_depth) \
+ struct name : bayer_detail::base { \
+ using Source = BayerPackedSource_ ## pat_depth<name>; \
+ using Sink = BayerPackedSink_ ## pat_depth<name>; \
+ }
+
+PIXPAT_BAYER_PACKED(SRGGB10P, Bayer10P, RGGB10);
+PIXPAT_BAYER_PACKED(SBGGR10P, Bayer10P, BGGR10);
+PIXPAT_BAYER_PACKED(SGRBG10P, Bayer10P, GRBG10);
+PIXPAT_BAYER_PACKED(SGBRG10P, Bayer10P, GBRG10);
+
+PIXPAT_BAYER_PACKED(SRGGB12P, Bayer12P, RGGB12);
+PIXPAT_BAYER_PACKED(SBGGR12P, Bayer12P, BGGR12);
+PIXPAT_BAYER_PACKED(SGRBG12P, Bayer12P, GRBG12);
+PIXPAT_BAYER_PACKED(SGBRG12P, Bayer12P, GBRG12);
+
+#undef PIXPAT_BAYER_PACKED
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/formats/grayscale.h b/subprojects/pixpat/pixpat-native/src/formats/grayscale.h
new file mode 100644
index 0000000..b1cd294
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/grayscale.h
@@ -0,0 +1,78 @@
+#pragma once
+
+// Single-component-per-pixel formats. Most are grayscale (Y) modeled as
+// a YUV format with synthesized neutral chroma; R8 is the RGB-kind
+// counterpart, modeled grey-style with G=B=R on read. Y10/Y12 carry an
+// explicit X padding bitfield. XYYY2101010 is multi-pixel-per-word: 3 Y
+// samples in 32 bits.
+
+#include "../layout.h"
+#include "../io/gray.h"
+#include "../io/gray_packed.h"
+#include "../io/mono_rgb.h"
+
+namespace pixpat::formats
+{
+
+#define PIXPAT_GRAY(name, ...) \
+ struct name : Layout<ColorKind::YUV, 1, 1, __VA_ARGS__> { \
+ using Source = GraySource<name>; \
+ using Sink = GraySink<name>; \
+ }
+
+PIXPAT_GRAY(Y8,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>);
+
+PIXPAT_GRAY(Y10,
+ Plane<uint16_t, Comp{ C::Y, 10, 0 }, Comp{ C::X, 6, 10 }>);
+
+PIXPAT_GRAY(Y12,
+ Plane<uint16_t, Comp{ C::Y, 12, 0 }, Comp{ C::X, 4, 12 }>);
+
+PIXPAT_GRAY(Y16,
+ Plane<uint16_t, Comp{ C::Y, 16, 0 }>);
+
+#undef PIXPAT_GRAY
+
+// R8: single 8-bit R channel. Read synthesizes G=B=R; write encodes R
+// and drops G/B/A. Symmetric to Y8 but ColorKind::RGB so cross-pipeline
+// conversions go through the RGB->YUV ColorXfm direction.
+struct R8 : Layout<ColorKind::RGB, 1, 1,
+ Plane<uint8_t, Comp{ C::R, 8, 0 }> > {
+ using Source = MonoRGBSource<R8>;
+ using Sink = MonoRGBSink<R8>;
+};
+
+struct XYYY2101010 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 10, 0 },
+ Comp{ C::Y, 10, 10 },
+ Comp{ C::Y, 10, 20 },
+ Comp{ C::X, 2, 30 }> > {
+ using Source = MultiPixelGraySource<XYYY2101010>;
+ using Sink = MultiPixelGraySink<XYYY2101010>;
+};
+
+// MIPI CSI-2 packed grayscale (Y10P / Y12P). The Layout doesn't capture
+// the packed bit layout — GrayPackedSource/Sink delegate to the shared
+// CSI-2 helper (io/csi2.h). uint8_t plane shape is a placeholder so
+// dispatch plumbing is uniform (mirrors bayer_detail::Bayer10P/12P).
+namespace gray_csi2_detail
+{
+using Gray10P = Layout<ColorKind::YUV, 1, 1,
+ Plane<uint8_t, Comp { C::Y, 8, 0 }> >;
+using Gray12P = Layout<ColorKind::YUV, 1, 1,
+ Plane<uint8_t, Comp { C::Y, 8, 0 }> >;
+} // namespace gray_csi2_detail
+
+struct Y10P : gray_csi2_detail::Gray10P {
+ using Source = GrayPackedSource<Y10P, 10>;
+ using Sink = GrayPackedSink<Y10P, 10>;
+};
+
+struct Y12P : gray_csi2_detail::Gray12P {
+ using Source = GrayPackedSource<Y12P, 12>;
+ using Sink = GrayPackedSink<Y12P, 12>;
+};
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/formats/rgb.h b/subprojects/pixpat/pixpat-native/src/formats/rgb.h
new file mode 100644
index 0000000..19d007a
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/rgb.h
@@ -0,0 +1,267 @@
+#pragma once
+
+// RGB packed layouts: 8-bit / 16-bit (sub-byte) / 32-bit (10-bit) /
+// 64-bit-normalized, all single-plane single-pixel-per-storage-word.
+// Names follow the kms++/pixutils register-order convention (MSB-first
+// in the storage word), so XRGB8888 has X at bits 31..24 and B at 7..0.
+
+#include "../layout.h"
+#include "../io/packed.h"
+
+namespace pixpat::formats
+{
+
+// Helper: every format in this file pairs with PackedSource/PackedSink.
+// Each format struct exposes Source / Sink aliases so the catalog row
+// in format_catalog.h can stay name-only.
+#define PIXPAT_RGB_PACKED(name, ...) \
+ struct name : Layout<ColorKind::RGB, 1, 1, __VA_ARGS__> { \
+ using Source = PackedSource<name>; \
+ using Sink = PackedSink<name>; \
+ }
+
+// ---------------------------------------------------------------------
+// 32-bit packed RGB, 8-bit components.
+// ---------------------------------------------------------------------
+
+PIXPAT_RGB_PACKED(XRGB8888,
+ Plane<uint32_t,
+ Comp{ C::B, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::R, 8, 16 },
+ Comp{ C::X, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(ARGB8888,
+ Plane<uint32_t,
+ Comp{ C::B, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::R, 8, 16 },
+ Comp{ C::A, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(XBGR8888,
+ Plane<uint32_t,
+ Comp{ C::R, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::B, 8, 16 },
+ Comp{ C::X, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(ABGR8888,
+ Plane<uint32_t,
+ Comp{ C::R, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::B, 8, 16 },
+ Comp{ C::A, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(RGBX8888,
+ Plane<uint32_t,
+ Comp{ C::X, 8, 0 },
+ Comp{ C::B, 8, 8 },
+ Comp{ C::G, 8, 16 },
+ Comp{ C::R, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(RGBA8888,
+ Plane<uint32_t,
+ Comp{ C::A, 8, 0 },
+ Comp{ C::B, 8, 8 },
+ Comp{ C::G, 8, 16 },
+ Comp{ C::R, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(BGRX8888,
+ Plane<uint32_t,
+ Comp{ C::X, 8, 0 },
+ Comp{ C::R, 8, 8 },
+ Comp{ C::G, 8, 16 },
+ Comp{ C::B, 8, 24 }>);
+
+PIXPAT_RGB_PACKED(BGRA8888,
+ Plane<uint32_t,
+ Comp{ C::A, 8, 0 },
+ Comp{ C::R, 8, 8 },
+ Comp{ C::G, 8, 16 },
+ Comp{ C::B, 8, 24 }>);
+
+// ---------------------------------------------------------------------
+// 24-bit packed RGB, three bytes per pixel. storage_t is uint32_t but
+// only bytes_per_pixel = 3 are read/written via memcpy.
+// ---------------------------------------------------------------------
+
+PIXPAT_RGB_PACKED(RGB888,
+ Plane<uint32_t,
+ Comp{ C::B, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::R, 8, 16 }>);
+
+PIXPAT_RGB_PACKED(BGR888,
+ Plane<uint32_t,
+ Comp{ C::R, 8, 0 },
+ Comp{ C::G, 8, 8 },
+ Comp{ C::B, 8, 16 }>);
+
+// ---------------------------------------------------------------------
+// 16-bit packed RGB, sub-byte components.
+// ---------------------------------------------------------------------
+
+PIXPAT_RGB_PACKED(RGB565,
+ Plane<uint16_t,
+ Comp{ C::B, 5, 0 },
+ Comp{ C::G, 6, 5 },
+ Comp{ C::R, 5, 11 }>);
+
+PIXPAT_RGB_PACKED(BGR565,
+ Plane<uint16_t,
+ Comp{ C::R, 5, 0 },
+ Comp{ C::G, 6, 5 },
+ Comp{ C::B, 5, 11 }>);
+
+// 8-bit packed RGB: 3-bit R / 3-bit G / 2-bit B in a single byte.
+
+PIXPAT_RGB_PACKED(RGB332,
+ Plane<uint8_t,
+ Comp{ C::B, 2, 0 },
+ Comp{ C::G, 3, 2 },
+ Comp{ C::R, 3, 5 }>);
+
+PIXPAT_RGB_PACKED(XRGB1555,
+ Plane<uint16_t,
+ Comp{ C::B, 5, 0 },
+ Comp{ C::G, 5, 5 },
+ Comp{ C::R, 5, 10 },
+ Comp{ C::X, 1, 15 }>);
+
+PIXPAT_RGB_PACKED(ARGB1555,
+ Plane<uint16_t,
+ Comp{ C::B, 5, 0 },
+ Comp{ C::G, 5, 5 },
+ Comp{ C::R, 5, 10 },
+ Comp{ C::A, 1, 15 }>);
+
+PIXPAT_RGB_PACKED(XBGR1555,
+ Plane<uint16_t,
+ Comp{ C::R, 5, 0 },
+ Comp{ C::G, 5, 5 },
+ Comp{ C::B, 5, 10 },
+ Comp{ C::X, 1, 15 }>);
+
+PIXPAT_RGB_PACKED(ABGR1555,
+ Plane<uint16_t,
+ Comp{ C::R, 5, 0 },
+ Comp{ C::G, 5, 5 },
+ Comp{ C::B, 5, 10 },
+ Comp{ C::A, 1, 15 }>);
+
+PIXPAT_RGB_PACKED(XRGB4444,
+ Plane<uint16_t,
+ Comp{ C::B, 4, 0 },
+ Comp{ C::G, 4, 4 },
+ Comp{ C::R, 4, 8 },
+ Comp{ C::X, 4, 12 }>);
+
+PIXPAT_RGB_PACKED(ARGB4444,
+ Plane<uint16_t,
+ Comp{ C::B, 4, 0 },
+ Comp{ C::G, 4, 4 },
+ Comp{ C::R, 4, 8 },
+ Comp{ C::A, 4, 12 }>);
+
+PIXPAT_RGB_PACKED(XBGR4444,
+ Plane<uint16_t,
+ Comp{ C::R, 4, 0 },
+ Comp{ C::G, 4, 4 },
+ Comp{ C::B, 4, 8 },
+ Comp{ C::X, 4, 12 }>);
+
+PIXPAT_RGB_PACKED(ABGR4444,
+ Plane<uint16_t,
+ Comp{ C::R, 4, 0 },
+ Comp{ C::G, 4, 4 },
+ Comp{ C::B, 4, 8 },
+ Comp{ C::A, 4, 12 }>);
+
+PIXPAT_RGB_PACKED(RGBX4444,
+ Plane<uint16_t,
+ Comp{ C::X, 4, 0 },
+ Comp{ C::B, 4, 4 },
+ Comp{ C::G, 4, 8 },
+ Comp{ C::R, 4, 12 }>);
+
+PIXPAT_RGB_PACKED(RGBA4444,
+ Plane<uint16_t,
+ Comp{ C::A, 4, 0 },
+ Comp{ C::B, 4, 4 },
+ Comp{ C::G, 4, 8 },
+ Comp{ C::R, 4, 12 }>);
+
+// ---------------------------------------------------------------------
+// 32-bit packed RGB, 10-bit components.
+// ---------------------------------------------------------------------
+
+PIXPAT_RGB_PACKED(XRGB2101010,
+ Plane<uint32_t,
+ Comp{ C::B, 10, 0 },
+ Comp{ C::G, 10, 10 },
+ Comp{ C::R, 10, 20 },
+ Comp{ C::X, 2, 30 }>);
+
+PIXPAT_RGB_PACKED(ARGB2101010,
+ Plane<uint32_t,
+ Comp{ C::B, 10, 0 },
+ Comp{ C::G, 10, 10 },
+ Comp{ C::R, 10, 20 },
+ Comp{ C::A, 2, 30 }>);
+
+PIXPAT_RGB_PACKED(XBGR2101010,
+ Plane<uint32_t,
+ Comp{ C::R, 10, 0 },
+ Comp{ C::G, 10, 10 },
+ Comp{ C::B, 10, 20 },
+ Comp{ C::X, 2, 30 }>);
+
+PIXPAT_RGB_PACKED(ABGR2101010,
+ Plane<uint32_t,
+ Comp{ C::R, 10, 0 },
+ Comp{ C::G, 10, 10 },
+ Comp{ C::B, 10, 20 },
+ Comp{ C::A, 2, 30 }>);
+
+PIXPAT_RGB_PACKED(RGBX1010102,
+ Plane<uint32_t,
+ Comp{ C::X, 2, 0 },
+ Comp{ C::B, 10, 2 },
+ Comp{ C::G, 10, 12 },
+ Comp{ C::R, 10, 22 }>);
+
+PIXPAT_RGB_PACKED(RGBA1010102,
+ Plane<uint32_t,
+ Comp{ C::A, 2, 0 },
+ Comp{ C::B, 10, 2 },
+ Comp{ C::G, 10, 12 },
+ Comp{ C::R, 10, 22 }>);
+
+PIXPAT_RGB_PACKED(BGRX1010102,
+ Plane<uint32_t,
+ Comp{ C::X, 2, 0 },
+ Comp{ C::R, 10, 2 },
+ Comp{ C::G, 10, 12 },
+ Comp{ C::B, 10, 22 }>);
+
+PIXPAT_RGB_PACKED(BGRA1010102,
+ Plane<uint32_t,
+ Comp{ C::A, 2, 0 },
+ Comp{ C::R, 10, 2 },
+ Comp{ C::G, 10, 12 },
+ Comp{ C::B, 10, 22 }>);
+
+// ---------------------------------------------------------------------
+// 64-bit normalized wide RGB (16 bits per component).
+// ---------------------------------------------------------------------
+
+PIXPAT_RGB_PACKED(ABGR16161616,
+ Plane<uint64_t,
+ Comp{ C::R, 16, 0 },
+ Comp{ C::G, 16, 16 },
+ Comp{ C::B, 16, 32 },
+ Comp{ C::A, 16, 48 }>);
+
+#undef PIXPAT_RGB_PACKED
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h
new file mode 100644
index 0000000..8e88f10
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h
@@ -0,0 +1,136 @@
+#pragma once
+
+// Packed YUV layouts:
+// VUY888 — 1 pixel / 24-bit, 8-bit Y/U/V (storage uint32_t,
+// bytes_per_pixel = 3; parallels BGR888 in the YUV
+// register order)
+// XVUY8888 — 1 pixel / 32-bit word, 8-bit Y/U/V + 8-bit padding
+// XVUY2101010 — 1 pixel / 32-bit word, 10-bit Y/U/V + 2-bit padding
+// AVUY16161616 — 1 pixel / 64-bit word, 16-bit Y/U/V/A (normalized)
+// YUYV / YVYU / UYVY / VYUY — 4:2:2, 2 pixels / 32-bit word
+// Y210 / Y212 / Y216 — 4:2:2, 2 pixels / 64-bit word, with
+// each component MSB-aligned in a 16-bit slot
+//
+// XVUY/AVUY name is register MSB-first (X/A in the top bits). The
+// YUYV names follow V4L2 / pixpat memory-byte order (Y0 in byte 0),
+// so shifts ascend in name order — opposite of XRGB-style.
+
+#include "../layout.h"
+#include "../io/packed.h"
+#include "../io/packed_yuv.h"
+
+namespace pixpat::formats
+{
+
+// 1-pixel-per-word packed (single Pixel/Word; uses PackedSource/Sink).
+
+struct VUY888 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 8, 0 },
+ Comp{ C::U, 8, 8 },
+ Comp{ C::V, 8, 16 }> > {
+ using Source = PackedSource<VUY888>;
+ using Sink = PackedSink<VUY888>;
+};
+
+struct XVUY8888 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 8, 0 },
+ Comp{ C::U, 8, 8 },
+ Comp{ C::V, 8, 16 },
+ Comp{ C::X, 8, 24 }> > {
+ using Source = PackedSource<XVUY8888>;
+ using Sink = PackedSink<XVUY8888>;
+};
+
+struct XVUY2101010 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 10, 0 },
+ Comp{ C::U, 10, 10 },
+ Comp{ C::V, 10, 20 },
+ Comp{ C::X, 2, 30 }> > {
+ using Source = PackedSource<XVUY2101010>;
+ using Sink = PackedSink<XVUY2101010>;
+};
+
+struct AVUY16161616 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint64_t,
+ Comp{ C::Y, 16, 0 },
+ Comp{ C::U, 16, 16 },
+ Comp{ C::V, 16, 32 },
+ Comp{ C::A, 16, 48 }> > {
+ using Source = PackedSource<AVUY16161616>;
+ using Sink = PackedSink<AVUY16161616>;
+};
+
+// 2-pixel-per-word 4:2:2 (uses PackedYUVSource/Sink).
+
+#define PIXPAT_PACKED_YUV422(name, ...) \
+ struct name : Layout<ColorKind::YUV, 2, 1, \
+ Plane<uint32_t, __VA_ARGS__> > { \
+ using Source = PackedYUVSource<name>; \
+ using Sink = PackedYUVSink<name>; \
+ }
+
+PIXPAT_PACKED_YUV422(YUYV,
+ Comp{ C::Y, 8, 0 }, Comp{ C::U, 8, 8 },
+ Comp{ C::Y, 8, 16 }, Comp{ C::V, 8, 24 });
+
+PIXPAT_PACKED_YUV422(YVYU,
+ Comp{ C::Y, 8, 0 }, Comp{ C::V, 8, 8 },
+ Comp{ C::Y, 8, 16 }, Comp{ C::U, 8, 24 });
+
+PIXPAT_PACKED_YUV422(UYVY,
+ Comp{ C::U, 8, 0 }, Comp{ C::Y, 8, 8 },
+ Comp{ C::V, 8, 16 }, Comp{ C::Y, 8, 24 });
+
+PIXPAT_PACKED_YUV422(VYUY,
+ Comp{ C::V, 8, 0 }, Comp{ C::Y, 8, 8 },
+ Comp{ C::U, 8, 16 }, Comp{ C::Y, 8, 24 });
+
+#undef PIXPAT_PACKED_YUV422
+
+// Y210 / Y212 / Y216: 4:2:2, 2 pixels per 64-bit word, MSB-aligned in
+// 16-bit slots. Y210 has 6 unused LSBs per slot, Y212 has 4, Y216 has
+// none. The X padding entries pad total_bits to 64 so bytes_per_pixel
+// resolves to 8; PackedYUVSink leaves their slots zero via the
+// value-array zero-init (see io/packed_yuv.h).
+struct Y210 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint64_t,
+ Comp{ C::X, 6, 0 },
+ Comp{ C::Y, 10, 6 },
+ Comp{ C::X, 6, 16 },
+ Comp{ C::U, 10, 22 },
+ Comp{ C::X, 6, 32 },
+ Comp{ C::Y, 10, 38 },
+ Comp{ C::X, 6, 48 },
+ Comp{ C::V, 10, 54 }> > {
+ using Source = PackedYUVSource<Y210>;
+ using Sink = PackedYUVSink<Y210>;
+};
+
+struct Y212 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint64_t,
+ Comp{ C::X, 4, 0 },
+ Comp{ C::Y, 12, 4 },
+ Comp{ C::X, 4, 16 },
+ Comp{ C::U, 12, 20 },
+ Comp{ C::X, 4, 32 },
+ Comp{ C::Y, 12, 36 },
+ Comp{ C::X, 4, 48 },
+ Comp{ C::V, 12, 52 }> > {
+ using Source = PackedYUVSource<Y212>;
+ using Sink = PackedYUVSink<Y212>;
+};
+
+struct Y216 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint64_t,
+ Comp{ C::Y, 16, 0 },
+ Comp{ C::U, 16, 16 },
+ Comp{ C::Y, 16, 32 },
+ Comp{ C::V, 16, 48 }> > {
+ using Source = PackedYUVSource<Y216>;
+ using Sink = PackedYUVSink<Y216>;
+};
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h
new file mode 100644
index 0000000..bb6a415
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h
@@ -0,0 +1,76 @@
+#pragma once
+
+// YUV planar layouts: 3 separate planes (Y, then U/V or V/U), 8-bit
+// components.
+// YUV420/YVU420 — h_sub=2, v_sub=2 (a.k.a. I420 / YV12)
+// YUV422/YVU422 — h_sub=2, v_sub=1
+// YUV444/YVU444 — h_sub=1, v_sub=1
+// T430 — multi-pixel-per-word planar 4:4:4.
+
+#include "../layout.h"
+#include "../io/planar.h"
+
+namespace pixpat::formats
+{
+
+#define PIXPAT_PLANAR(name, ...) \
+ struct name : Layout<ColorKind::YUV, __VA_ARGS__> { \
+ using Source = PlanarSource<name>; \
+ using Sink = PlanarSink<name>; \
+ }
+
+PIXPAT_PLANAR(YUV420, 2, 2,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>);
+
+PIXPAT_PLANAR(YVU420, 2, 2,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>);
+
+PIXPAT_PLANAR(YUV422, 2, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>);
+
+PIXPAT_PLANAR(YVU422, 2, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>);
+
+PIXPAT_PLANAR(YUV444, 1, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>);
+
+PIXPAT_PLANAR(YVU444, 1, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::V, 8, 0 }>,
+ Plane<uint8_t, Comp{ C::U, 8, 0 }>);
+
+#undef PIXPAT_PLANAR
+
+// T430: 3-plane multi-pixel-per-word planar 4:4:4. Each plane carries
+// 3 × 10-bit samples per uint32_t plus a 2-bit X padding bit-field.
+struct T430 : Layout<ColorKind::YUV, 1, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 10, 0 },
+ Comp{ C::Y, 10, 10 },
+ Comp{ C::Y, 10, 20 },
+ Comp{ C::X, 2, 30 }>,
+ Plane<uint32_t,
+ Comp{ C::U, 10, 0 },
+ Comp{ C::U, 10, 10 },
+ Comp{ C::U, 10, 20 },
+ Comp{ C::X, 2, 30 }>,
+ Plane<uint32_t,
+ Comp{ C::V, 10, 0 },
+ Comp{ C::V, 10, 10 },
+ Comp{ C::V, 10, 20 },
+ Comp{ C::X, 2, 30 }> > {
+ using Source = MultiPixelPlanarSource<T430>;
+ using Sink = MultiPixelPlanarSink<T430>;
+};
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h
new file mode 100644
index 0000000..34aea22
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h
@@ -0,0 +1,79 @@
+#pragma once
+
+// YUV semiplanar layouts: Y plane + interleaved UV plane.
+// NV12/NV21 — 4:2:0 (h_sub=2, v_sub=2)
+// NV16/NV61 — 4:2:2 (h_sub=2, v_sub=1)
+// P030/P230 — multi-pixel-per-word semiplanar (10-bit Y triplets).
+
+#include "../layout.h"
+#include "../io/semiplanar.h"
+
+namespace pixpat::formats
+{
+
+struct NV12 : Layout<ColorKind::YUV, 2, 2,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint16_t, Comp{ C::U, 8, 0 }, Comp{ C::V, 8, 8 }> > {
+ using Source = SemiplanarSource<NV12>;
+ using Sink = SemiplanarSink<NV12>;
+};
+
+struct NV21 : Layout<ColorKind::YUV, 2, 2,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint16_t, Comp{ C::V, 8, 0 }, Comp{ C::U, 8, 8 }> > {
+ using Source = SemiplanarSource<NV21>;
+ using Sink = SemiplanarSink<NV21>;
+};
+
+struct NV16 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint16_t, Comp{ C::U, 8, 0 }, Comp{ C::V, 8, 8 }> > {
+ using Source = SemiplanarSource<NV16>;
+ using Sink = SemiplanarSink<NV16>;
+};
+
+struct NV61 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint8_t, Comp{ C::Y, 8, 0 }>,
+ Plane<uint16_t, Comp{ C::V, 8, 0 }, Comp{ C::U, 8, 8 }> > {
+ using Source = SemiplanarSource<NV61>;
+ using Sink = SemiplanarSink<NV61>;
+};
+
+// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). Y plane
+// holds 3 × 10-bit Y samples per uint32_t (top 2 bits unused). UV plane
+// holds 3 × (Cb,Cr) pairs per uint64_t (10 bits each, with 2-bit gaps
+// at bits 30-31 and 62-63 — left implicit, no X declared).
+
+struct P030 : Layout<ColorKind::YUV, 2, 2,
+ Plane<uint32_t,
+ Comp{ C::Y, 10, 0 },
+ Comp{ C::Y, 10, 10 },
+ Comp{ C::Y, 10, 20 }>,
+ Plane<uint64_t,
+ Comp{ C::U, 10, 0 },
+ Comp{ C::V, 10, 10 },
+ Comp{ C::U, 10, 20 },
+ Comp{ C::V, 10, 32 },
+ Comp{ C::U, 10, 42 },
+ Comp{ C::V, 10, 52 }> > {
+ using Source = MultiPixelSemiplanarSource<P030>;
+ using Sink = MultiPixelSemiplanarSink<P030>;
+};
+
+struct P230 : Layout<ColorKind::YUV, 2, 1,
+ Plane<uint32_t,
+ Comp{ C::Y, 10, 0 },
+ Comp{ C::Y, 10, 10 },
+ Comp{ C::Y, 10, 20 }>,
+ Plane<uint64_t,
+ Comp{ C::U, 10, 0 },
+ Comp{ C::V, 10, 10 },
+ Comp{ C::U, 10, 20 },
+ Comp{ C::V, 10, 32 },
+ Comp{ C::U, 10, 42 },
+ Comp{ C::V, 10, 52 }> > {
+ using Source = MultiPixelSemiplanarSource<P230>;
+ using Sink = MultiPixelSemiplanarSink<P230>;
+};
+
+} // namespace pixpat::formats
diff --git a/subprojects/pixpat/pixpat-native/src/io.h b/subprojects/pixpat/pixpat-native/src/io.h
new file mode 100644
index 0000000..af24232
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io.h
@@ -0,0 +1,13 @@
+#pragma once
+
+// Aggregator: every Source / Sink template lives in one of the
+// per-iteration-shape headers under io/. Encode/decode helpers and
+// load_word/store_word are in io/detail.h, used by all the others.
+
+#include "io/detail.h"
+#include "io/packed.h"
+#include "io/semiplanar.h"
+#include "io/planar.h"
+#include "io/packed_yuv.h"
+#include "io/gray.h"
+#include "io/bayer.h"
diff --git a/subprojects/pixpat/pixpat-native/src/io/bayer.h b/subprojects/pixpat/pixpat-native/src/io/bayer.h
new file mode 100644
index 0000000..6b30c0e
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/bayer.h
@@ -0,0 +1,318 @@
+#pragma once
+
+// Bayer raw read/write support.
+//
+// Write side: each pixel carries one of R/G/B selected by (x mod 2,
+// y mod 2) and a fixed BayerOrder. Two missing channels per pixel are
+// dropped on encode.
+//
+// Read side: bilinear demosaic over a 3x3 window. The pixel's own
+// channel comes from self; missing channels are averaged from the
+// same-channel neighbours that the Bayer phase guarantees to exist:
+//
+// * At an R or B pixel, all four cardinal (N, E, S, W) neighbours
+// carry G and all four diagonal (NE, NW, SE, SW) neighbours carry
+// the other colour, so each missing channel averages four samples.
+// * At a G pixel, one missing colour sits in the row neighbours
+// (W, E) and the other in the column neighbours (N, S), so each
+// missing channel averages two samples.
+//
+// Sampled coordinates are clamped to the image bounds.
+//
+// The Layout shape is the same as a Y-only single-plane format
+// (storage carries one component plus optional X padding); the
+// BayerOrder is a separate template parameter on the Source / Sink.
+
+#include <array>
+#include <cstdint>
+
+#include "../layout.h"
+#include "csi2.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+enum class BayerOrder { RGGB, BGGR, GRBG, GBRG };
+
+namespace detail
+{
+constexpr C bayer_pick(BayerOrder o, bool x_even, bool y_even) noexcept
+{
+ switch (o) {
+ case BayerOrder::RGGB:
+ return y_even ? (x_even ? C::R : C::G)
+ : (x_even ? C::G : C::B);
+ case BayerOrder::BGGR:
+ return y_even ? (x_even ? C::B : C::G)
+ : (x_even ? C::G : C::R);
+ case BayerOrder::GRBG:
+ return y_even ? (x_even ? C::G : C::R)
+ : (x_even ? C::B : C::G);
+ case BayerOrder::GBRG:
+ return y_even ? (x_even ? C::G : C::B)
+ : (x_even ? C::R : C::G);
+ }
+ return C::G;
+}
+
+constexpr size_t clamp_coord(int v, size_t max_excl) noexcept
+{
+ if (v < 0)
+ return 0;
+ if (size_t(v) >= max_excl)
+ return max_excl - 1;
+ return size_t(v);
+}
+} // namespace detail
+
+template <typename L, BayerOrder Order>
+struct BayerSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static_assert(y_idx < P::num_comps);
+
+ static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ return detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]);
+ }
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ size_t W, size_t H) noexcept
+ {
+ const bool x_even = (x & 1) == 0;
+ const bool y_even = (y & 1) == 0;
+ const C self = detail::bayer_pick(Order, x_even, y_even);
+
+ const size_t xL = detail::clamp_coord(int(x) - 1, W);
+ const size_t xR = detail::clamp_coord(int(x) + 1, W);
+ const size_t yT = detail::clamp_coord(int(y) - 1, H);
+ const size_t yB = detail::clamp_coord(int(y) + 1, H);
+
+ const uint16_t s = read_sample(buf, x, y);
+
+ uint16_t r = 0, g = 0, b = 0;
+
+ if (self == C::G) {
+ const C h_color = detail::bayer_pick(Order, !x_even, y_even);
+ const uint16_t h_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, y))
+ + read_sample(buf, xR, y) + 1u) >> 1);
+ const uint16_t v_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB) + 1u) >> 1);
+ g = s;
+ if (h_color == C::R) { r = h_avg; b = v_avg; }
+ else { b = h_avg; r = v_avg; }
+ } else {
+ const uint16_t g_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB)
+ + read_sample(buf, xL, y)
+ + read_sample(buf, xR, y) + 2u) >> 2);
+ const uint16_t o_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, yT))
+ + read_sample(buf, xR, yT)
+ + read_sample(buf, xL, yB)
+ + read_sample(buf, xR, yB) + 2u) >> 2);
+ g = g_avg;
+ if (self == C::R) { r = s; b = o_avg; }
+ else { b = s; r = o_avg; }
+ }
+
+ return RGB16{ r, g, b, uint16_t(0) };
+ }
+};
+
+template <typename L, BayerOrder Order>
+struct BayerSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(y_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][1]) noexcept
+ {
+ const C pick = detail::bayer_pick(Order, (bx & 1) == 0,
+ (by & 1) == 0);
+ const uint16_t val = pick == C::R ? block[0][0].r
+ : pick == C::G ? block[0][0].g
+ : block[0][0].b;
+
+ std::array<uint16_t, P::num_comps> v{};
+ v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, val);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+// Aliases so X-macro can register without nested template-template params.
+template <typename L> using BayerSource_RGGB = BayerSource<L, BayerOrder::RGGB>;
+template <typename L> using BayerSource_BGGR = BayerSource<L, BayerOrder::BGGR>;
+template <typename L> using BayerSource_GRBG = BayerSource<L, BayerOrder::GRBG>;
+template <typename L> using BayerSource_GBRG = BayerSource<L, BayerOrder::GBRG>;
+
+template <typename L> using BayerSink_RGGB = BayerSink<L, BayerOrder::RGGB>;
+template <typename L> using BayerSink_BGGR = BayerSink<L, BayerOrder::BGGR>;
+template <typename L> using BayerSink_GRBG = BayerSink<L, BayerOrder::GRBG>;
+template <typename L> using BayerSink_GBRG = BayerSink<L, BayerOrder::GBRG>;
+
+// MIPI CSI-2 packed Bayer. The bit layout doesn't fit
+// `Plane<Storage, Comp...>` because each pixel's bits span two
+// non-contiguous bytes, so we use the shared CSI-2 helper (io/csi2.h)
+// to (un)pack samples.
+//
+// The Layout slot is a placeholder (matches the unpacked Bayer of the
+// same bit-depth so the user-facing API can pick the right buffer
+// shape); bytes_per_pixel from the Plane is unused.
+template <typename L, BayerOrder Order, size_t BitDepth>
+struct BayerPackedSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ // Stored N-bit value upshifts to normalized-16 by `<< (16-N)`,
+ // matching the unpacked Bayer source.
+ static constexpr unsigned shift = 16 - BitDepth;
+
+ static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept
+ {
+ const uint8_t* src = buf.data[0] + y * buf.stride[0]
+ + (x / ppg) * bpg;
+ const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg);
+ return uint16_t(val << shift);
+ }
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ size_t W, size_t H) noexcept
+ {
+ const bool x_even = (x & 1) == 0;
+ const bool y_even = (y & 1) == 0;
+ const C self = detail::bayer_pick(Order, x_even, y_even);
+
+ const size_t xL = detail::clamp_coord(int(x) - 1, W);
+ const size_t xR = detail::clamp_coord(int(x) + 1, W);
+ const size_t yT = detail::clamp_coord(int(y) - 1, H);
+ const size_t yB = detail::clamp_coord(int(y) + 1, H);
+
+ const uint16_t s = read_sample(buf, x, y);
+
+ uint16_t r = 0, g = 0, b = 0;
+
+ if (self == C::G) {
+ const C h_color = detail::bayer_pick(Order, !x_even, y_even);
+ const uint16_t h_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, y))
+ + read_sample(buf, xR, y) + 1u) >> 1);
+ const uint16_t v_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB) + 1u) >> 1);
+ g = s;
+ if (h_color == C::R) { r = h_avg; b = v_avg; }
+ else { b = h_avg; r = v_avg; }
+ } else {
+ const uint16_t g_avg = uint16_t(
+ (uint32_t(read_sample(buf, x, yT))
+ + read_sample(buf, x, yB)
+ + read_sample(buf, xL, y)
+ + read_sample(buf, xR, y) + 2u) >> 2);
+ const uint16_t o_avg = uint16_t(
+ (uint32_t(read_sample(buf, xL, yT))
+ + read_sample(buf, xR, yT)
+ + read_sample(buf, xL, yB)
+ + read_sample(buf, xR, yB) + 2u) >> 2);
+ g = g_avg;
+ if (self == C::R) { r = s; b = o_avg; }
+ else { b = s; r = o_avg; }
+ }
+
+ return RGB16{ r, g, b, uint16_t(0) };
+ }
+};
+
+template <typename L, BayerOrder Order, size_t BitDepth>
+struct BayerPackedSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppg;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][ppg]) noexcept
+ {
+ std::array<uint16_t, ppg> vals{};
+ for (size_t i = 0; i < ppg; ++i) {
+ const C pick = detail::bayer_pick(
+ Order, ((bx + i) & 1) == 0, (by & 1) == 0);
+ const uint16_t norm =
+ pick == C::R ? block[0][i].r
+ : pick == C::G ? block[0][i].g
+ : block[0][i].b;
+ vals[i] = uint16_t(norm >> (16 - BitDepth));
+ }
+
+ uint8_t* dst = buf.data[0] + by * buf.stride[0]
+ + (bx / ppg) * bpg;
+ detail::csi2::pack_group<BitDepth>(dst, vals);
+ }
+};
+
+template <typename L> using BayerPackedSource_RGGB10 = BayerPackedSource<L, BayerOrder::RGGB, 10>;
+template <typename L> using BayerPackedSource_BGGR10 = BayerPackedSource<L, BayerOrder::BGGR, 10>;
+template <typename L> using BayerPackedSource_GRBG10 = BayerPackedSource<L, BayerOrder::GRBG, 10>;
+template <typename L> using BayerPackedSource_GBRG10 = BayerPackedSource<L, BayerOrder::GBRG, 10>;
+template <typename L> using BayerPackedSource_RGGB12 = BayerPackedSource<L, BayerOrder::RGGB, 12>;
+template <typename L> using BayerPackedSource_BGGR12 = BayerPackedSource<L, BayerOrder::BGGR, 12>;
+template <typename L> using BayerPackedSource_GRBG12 = BayerPackedSource<L, BayerOrder::GRBG, 12>;
+template <typename L> using BayerPackedSource_GBRG12 = BayerPackedSource<L, BayerOrder::GBRG, 12>;
+
+template <typename L> using BayerPackedSink_RGGB10 = BayerPackedSink<L, BayerOrder::RGGB, 10>;
+template <typename L> using BayerPackedSink_BGGR10 = BayerPackedSink<L, BayerOrder::BGGR, 10>;
+template <typename L> using BayerPackedSink_GRBG10 = BayerPackedSink<L, BayerOrder::GRBG, 10>;
+template <typename L> using BayerPackedSink_GBRG10 = BayerPackedSink<L, BayerOrder::GBRG, 10>;
+template <typename L> using BayerPackedSink_RGGB12 = BayerPackedSink<L, BayerOrder::RGGB, 12>;
+template <typename L> using BayerPackedSink_BGGR12 = BayerPackedSink<L, BayerOrder::BGGR, 12>;
+template <typename L> using BayerPackedSink_GRBG12 = BayerPackedSink<L, BayerOrder::GRBG, 12>;
+template <typename L> using BayerPackedSink_GBRG12 = BayerPackedSink<L, BayerOrder::GBRG, 12>;
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/csi2.h b/subprojects/pixpat/pixpat-native/src/io/csi2.h
new file mode 100644
index 0000000..59a8f8d
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/csi2.h
@@ -0,0 +1,80 @@
+#pragma once
+
+// Shared MIPI CSI-2 byte (un)packing for the 10P / 12P forms used by
+// Bayer raw and Y-only grayscale.
+//
+// 10P: 4 samples in 5 bytes — bytes 0..3 hold the high 8 bits of
+// samples 0..3; byte 4 holds 4 x 2 LSBs (sample 0 in bits 6..7,
+// sample 1 in bits 4..5, ...).
+// 12P: 2 samples in 3 bytes — bytes 0..1 hold the high 8 bits of
+// samples 0..1; byte 2 holds 2 x 4 LSBs (sample 0 in bits 4..7,
+// sample 1 in bits 0..3).
+//
+// Helpers deal in the stored integer (low BitDepth bits set);
+// normalization to/from the 16-bit pivot stays in the caller.
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+namespace pixpat::detail::csi2
+{
+
+template <size_t BitDepth>
+struct packed_traits;
+
+template <>
+struct packed_traits<10> {
+ static constexpr size_t ppg = 4;
+ static constexpr size_t bpg = 5;
+};
+
+template <>
+struct packed_traits<12> {
+ static constexpr size_t ppg = 2;
+ static constexpr size_t bpg = 3;
+};
+
+// Extract one BitDepth-bit sample from a packed group, where `i` is the
+// in-group index (0..ppg-1). The returned value occupies the low
+// BitDepth bits.
+template <size_t BitDepth>
+inline uint16_t unpack_sample(const uint8_t* src, size_t i) noexcept
+{
+ if constexpr (BitDepth == 10) {
+ const uint8_t hi = src[i];
+ const uint8_t lsb = (src[4] >> ((3 - i) * 2)) & 0x03;
+ return uint16_t((hi << 2) | lsb);
+ } else { // 12
+ const uint8_t hi = src[i];
+ const uint8_t lsb = (i == 0) ? ((src[2] >> 4) & 0x0F)
+ : (src[2] & 0x0F);
+ return uint16_t((hi << 4) | lsb);
+ }
+}
+
+// Write `ppg` BitDepth-bit samples (low BitDepth bits significant) into
+// a packed group of `bpg` bytes.
+template <size_t BitDepth>
+inline void pack_group(
+ uint8_t* dst,
+ const std::array<uint16_t, packed_traits<BitDepth>::ppg>& vals) noexcept
+{
+ if constexpr (BitDepth == 10) {
+ dst[0] = (vals[0] >> 2) & 0xFF;
+ dst[1] = (vals[1] >> 2) & 0xFF;
+ dst[2] = (vals[2] >> 2) & 0xFF;
+ dst[3] = (vals[3] >> 2) & 0xFF;
+ dst[4] = ((vals[0] & 0x03) << 6)
+ | ((vals[1] & 0x03) << 4)
+ | ((vals[2] & 0x03) << 2)
+ | ((vals[3] & 0x03) << 0);
+ } else { // 12
+ dst[0] = (vals[0] >> 4) & 0xFF;
+ dst[1] = (vals[1] >> 4) & 0xFF;
+ dst[2] = ((vals[0] & 0x0F) << 4)
+ | ((vals[1] & 0x0F) << 0);
+ }
+}
+
+} // namespace pixpat::detail::csi2
diff --git a/subprojects/pixpat/pixpat-native/src/io/detail.h b/subprojects/pixpat/pixpat-native/src/io/detail.h
new file mode 100644
index 0000000..cb2b9fb
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/detail.h
@@ -0,0 +1,62 @@
+#pragma once
+
+// Per-component encode/decode against the descriptor + memcpy-based
+// load/store_word helpers. Shared by every Source / Sink template.
+
+#include <cstdint>
+#include <cstring>
+
+#include "../layout.h"
+
+namespace pixpat::detail
+{
+
+// Decode an N-bit stored value into the 16-bit normalized space and
+// encode it back. Decode bit-replicates the stored value across the 16
+// bits so that N-bit max maps to normalized max (e.g. 8-bit 0xFF →
+// 0xFFFF, not 0xFF00). Encode is a plain truncating right-shift: the
+// replicated bits land in the low (16-N) bits and get dropped, so
+// stored→norm→stored is exact for any N in [1, 16].
+//
+// `bits` is taken at runtime; in every call site it traces back to a
+// constexpr Plane::comps[I].bits read, which the optimizer constant-
+// folds after inlining.
+
+constexpr uint16_t decode_norm(unsigned bits, uint16_t stored) noexcept
+{
+ const int N = int(bits);
+ // Loop, not a single OR: one replication only covers 2N bits, so
+ // N < 8 (RGB565, RGBA4444, 1-bit alpha, ...) needs multiple tiles.
+ uint32_t result = 0;
+ for (int s = 16 - N; s > -N; s -= N) {
+ if (s >= 0)
+ result |= uint32_t(stored) << s;
+ else
+ result |= uint32_t(stored) >> -s;
+ }
+ return uint16_t(result);
+}
+
+constexpr uint16_t encode_norm(unsigned bits, uint16_t norm) noexcept
+{
+ return uint16_t(norm >> (16u - bits));
+}
+
+// Read one storage word from `p`. memcpy is uniform for tight and
+// non-tight (e.g. BGR888 24-bit) layouts; the optimizer folds it to a
+// single load when the size is constant.
+template <typename Plane>
+inline typename Plane::storage_t load_word(const uint8_t* p) noexcept
+{
+ typename Plane::storage_t word{};
+ std::memcpy(&word, p, Plane::bytes_per_pixel);
+ return word;
+}
+
+template <typename Plane>
+inline void store_word(uint8_t* p, typename Plane::storage_t word) noexcept
+{
+ std::memcpy(p, &word, Plane::bytes_per_pixel);
+}
+
+} // namespace pixpat::detail
diff --git a/subprojects/pixpat/pixpat-native/src/io/gray.h b/subprojects/pixpat/pixpat-native/src/io/gray.h
new file mode 100644
index 0000000..d175b68
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/gray.h
@@ -0,0 +1,153 @@
+#pragma once
+
+// Grayscale (Y8 / Y10 / Y12 / Y16) and multi-pixel-per-word grayscale
+// (XYYY2101010: 3 Y components in one uint32_t). Modeled as a YUV format
+// with neutral chroma synthesized on read so cross-color-kind ColorXfm
+// produces R=G=B=Y'. The sink encodes Y from YUV16 and ignores U/V.
+// Y10/Y12 carry an X padding bitfield which we zero out on write.
+// Neutral chroma in normalized-16 is 0x8000 (the midpoint of [0, 0xFFFF]).
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct GraySource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static_assert(y_idx < P::num_comps);
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ return YUV16{
+ detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct GraySink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y_idx = P::template find_pos<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(y_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][1]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, block[0][0].y);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+// Multi-pixel-per-word grayscale. The Layout carries one C::Y entry per
+// pixel in the group; pixels_per_word is derived from how many C::Y
+// entries the layout has. All Y components must share the same bit width
+// (so the encode/decode shift is shared). block_w = ppw so the sink
+// writes one storage word per block.
+template <typename L>
+struct MultiPixelGraySource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t ppw = P::template component_count<C::Y>();
+ static_assert(ppw >= 1);
+
+ // All Y positions share the same bit width.
+ static constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits;
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const size_t gx = x / ppw;
+ const size_t off = x % ppw;
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + gx * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+
+ // find_pos walks the comps array at runtime; comps is constexpr
+ // and num_comps is small (≤4 for these formats), so it inlines.
+ const size_t y_pos = P::template find_pos<C::Y>(off);
+
+ return YUV16{
+ detail::decode_norm(y_bits, vals[y_pos]),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelGraySink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t ppw = P::template component_count<C::Y>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(ppw >= 1);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppw;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppw]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ // All Y slots share the same bit width.
+ constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits;
+ for (size_t i = 0; i < ppw; ++i) {
+ const size_t pos = P::template find_pos<C::Y>(i);
+ v[pos] = detail::encode_norm(y_bits, block[0][i].y);
+ }
+
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + (bx / ppw) * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/gray_packed.h b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h
new file mode 100644
index 0000000..dc1fa68
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h
@@ -0,0 +1,78 @@
+#pragma once
+
+// MIPI CSI-2 packed grayscale (Y10P / Y12P). Same byte packing as
+// Bayer10P/Bayer12P (see io/csi2.h) but every sample is Y; the source
+// emits neutral chroma to keep cross-color-kind ColorXfm consistent
+// with GraySource.
+//
+// The Layout slot is a placeholder (matches the unpacked Y8 storage
+// shape so dispatch plumbing is uniform); bytes_per_pixel from the
+// Plane is unused.
+
+#include <array>
+#include <cstdint>
+
+#include "../layout.h"
+#include "csi2.h"
+
+namespace pixpat
+{
+
+template <typename L, size_t BitDepth>
+struct GrayPackedSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+ static constexpr unsigned shift = 16 - BitDepth;
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* src = buf.data[0] + y * buf.stride[0]
+ + (x / ppg) * bpg;
+ const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg);
+ return YUV16{
+ uint16_t(val << shift),
+ 0x8000, 0x8000, uint16_t(0),
+ };
+ }
+};
+
+template <typename L, size_t BitDepth>
+struct GrayPackedSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(BitDepth == 10 || BitDepth == 12);
+
+ using Traits = detail::csi2::packed_traits<BitDepth>;
+ static constexpr size_t ppg = Traits::ppg;
+ static constexpr size_t bpg = Traits::bpg;
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppg;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppg]) noexcept
+ {
+ std::array<uint16_t, ppg> vals{};
+ for (size_t i = 0; i < ppg; ++i)
+ vals[i] = uint16_t(block[0][i].y >> (16 - BitDepth));
+
+ uint8_t* dst = buf.data[0] + by * buf.stride[0]
+ + (bx / ppg) * bpg;
+ detail::csi2::pack_group<BitDepth>(dst, vals);
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h
new file mode 100644
index 0000000..f2f8206
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h
@@ -0,0 +1,72 @@
+#pragma once
+
+// Single-channel RGB formats (R8). Storage carries one R component;
+// MonoRGBSource synthesizes G=B=R on read so cross-color-kind ColorXfm
+// produces sensible Y from R alone. MonoRGBSink encodes R and ignores
+// G/B/A (and zeroes any X padding). Symmetric to GraySource/GraySink
+// (io/gray.h) but for ColorKind::RGB on C::R.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct MonoRGBSource {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t r_idx = P::template find_pos<C::R>();
+ static_assert(r_idx < P::num_comps);
+
+ static RGB16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ const uint16_t r = detail::decode_norm(P::comps[r_idx].bits, vals[r_idx]);
+ return RGB16{ r, r, r, uint16_t(0) };
+ }
+};
+
+template <typename L>
+struct MonoRGBSink {
+ using Layout = L;
+ using Pixel = RGB16;
+
+ static_assert(L::kind == ColorKind::RGB);
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t r_idx = P::template find_pos<C::R>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static_assert(r_idx < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const RGB16 (&block)[1][1]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[r_idx] = detail::encode_norm(P::comps[r_idx].bits, block[0][0].r);
+ if constexpr (has_x)
+ v[x_idx] = 0;
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/packed.h b/subprojects/pixpat/pixpat-native/src/io/packed.h
new file mode 100644
index 0000000..9d953bc
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/packed.h
@@ -0,0 +1,106 @@
+#pragma once
+
+// Single-plane, single-pixel-per-storage-word formats. Works for both
+// RGB layouts (XRGB8888, RGB565, ABGR16161616, ...) and YUV
+// single-pixel layouts (XVUY2101010, AVUY16161616). Pixel type follows
+// L::kind; the three mandatory components are R/G/B for RGB or Y/U/V
+// for YUV. Both `RGB16` and `YUV16` are 4 uint16_t with the alpha last,
+// so aggregate-init by position works for either.
+
+#include <array>
+#include <type_traits>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PackedSource {
+ using Layout = L;
+ using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>;
+
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y;
+ static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U;
+ static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V;
+
+ static constexpr size_t i0 = P::template find_pos<c0>();
+ static constexpr size_t i1 = P::template find_pos<c1>();
+ static constexpr size_t i2 = P::template find_pos<c2>();
+ static constexpr size_t a_idx = P::template find_pos<C::A>();
+ static constexpr bool has_a = (a_idx < P::num_comps);
+ static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps);
+
+ static Pixel read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0] + x * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ Pixel out{
+ detail::decode_norm(P::comps[i0].bits, vals[i0]),
+ detail::decode_norm(P::comps[i1].bits, vals[i1]),
+ detail::decode_norm(P::comps[i2].bits, vals[i2]),
+ uint16_t(0),
+ };
+ if constexpr (has_a)
+ out.a = detail::decode_norm(P::comps[a_idx].bits, vals[a_idx]);
+ return out;
+ }
+};
+
+template <typename L>
+struct PackedSink {
+ using Layout = L;
+ using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>;
+
+ static_assert(L::num_planes == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y;
+ static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U;
+ static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V;
+
+ static constexpr size_t i0 = P::template find_pos<c0>();
+ static constexpr size_t i1 = P::template find_pos<c1>();
+ static constexpr size_t i2 = P::template find_pos<c2>();
+ static constexpr size_t x_idx = P::template find_pos<C::X>();
+ static constexpr size_t a_idx = P::template find_pos<C::A>();
+ static constexpr bool has_x = (x_idx < P::num_comps);
+ static constexpr bool has_a = (a_idx < P::num_comps);
+ static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 1;
+
+ // Aggregate-init access to RGB16/YUV16 by position: .r/.y, .g/.u, .b/.v.
+ // We use the field names corresponding to L::kind.
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const Pixel (&block)[1][1]) noexcept
+ {
+ const Pixel& pix = block[0][0];
+ std::array<uint16_t, P::num_comps> v{};
+ if constexpr (L::kind == ColorKind::RGB) {
+ v[i0] = detail::encode_norm(P::comps[i0].bits, pix.r);
+ v[i1] = detail::encode_norm(P::comps[i1].bits, pix.g);
+ v[i2] = detail::encode_norm(P::comps[i2].bits, pix.b);
+ } else {
+ v[i0] = detail::encode_norm(P::comps[i0].bits, pix.y);
+ v[i1] = detail::encode_norm(P::comps[i1].bits, pix.u);
+ v[i2] = detail::encode_norm(P::comps[i2].bits, pix.v);
+ }
+ if constexpr (has_x)
+ v[x_idx] = 0;
+ if constexpr (has_a)
+ v[a_idx] = detail::encode_norm(P::comps[a_idx].bits, pix.a);
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0] + bx * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h
new file mode 100644
index 0000000..90c8b2f
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h
@@ -0,0 +1,89 @@
+#pragma once
+
+// Packed YUV 4:2:2 (YUYV / YVYU / UYVY / VYUY): two pixels per 32-bit
+// word, one shared chroma pair. The Layout uses two C::Y entries plus
+// one each of C::U / C::V; we resolve the duplicate Y via
+// find_pos<C::Y>(n).
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PackedYUVSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(L::h_sub == 2 && L::v_sub == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y0_idx = P::template find_pos<C::Y>(0);
+ static constexpr size_t y1_idx = P::template find_pos<C::Y>(1);
+ static constexpr size_t u_idx = P::template find_pos<C::U>();
+ static constexpr size_t v_idx = P::template find_pos<C::V>();
+
+ static YUV16 read(const Buffer<1>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* p = buf.data[0] + y * buf.stride[0]
+ + (x / 2) * P::bytes_per_pixel;
+ const auto vals = P::unpack(detail::load_word<P>(p));
+ const size_t y_pick = (x & 1) ? y1_idx : y0_idx;
+ // Both Y components share the same bit width, so the bit-width
+ // for y0 and y1 is identical — pick either.
+ return YUV16{
+ detail::decode_norm(P::comps[y0_idx].bits, vals[y_pick]),
+ detail::decode_norm(P::comps[u_idx].bits, vals[u_idx]),
+ detail::decode_norm(P::comps[v_idx].bits, vals[v_idx]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct PackedYUVSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 1);
+ static_assert(L::h_sub == 2 && L::v_sub == 1);
+
+ using P = typename L::template plane<0>;
+ static constexpr size_t y0_idx = P::template find_pos<C::Y>(0);
+ static constexpr size_t y1_idx = P::template find_pos<C::Y>(1);
+ static constexpr size_t u_idx = P::template find_pos<C::U>();
+ static constexpr size_t v_idx = P::template find_pos<C::V>();
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = 2;
+
+ static void write_block(Buffer<1>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][2]) noexcept
+ {
+ std::array<uint16_t, P::num_comps> v{};
+ v[y0_idx] = detail::encode_norm(P::comps[y0_idx].bits, block[0][0].y);
+ v[y1_idx] = detail::encode_norm(P::comps[y1_idx].bits, block[0][1].y);
+ // Integer chroma averaging in normalized-16 space. Truncates
+ // (no round-half-up).
+ v[u_idx] = detail::encode_norm(P::comps[u_idx].bits, uint16_t(
+ (uint32_t(block[0][0].u) +
+ uint32_t(block[0][1].u)) / 2));
+ v[v_idx] = detail::encode_norm(P::comps[v_idx].bits, uint16_t(
+ (uint32_t(block[0][0].v) +
+ uint32_t(block[0][1].v)) / 2));
+
+ uint8_t* p = buf.data[0] + by * buf.stride[0]
+ + (bx / 2) * P::bytes_per_pixel;
+ detail::store_word<P>(p, P::pack(v));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/planar.h b/subprojects/pixpat/pixpat-native/src/io/planar.h
new file mode 100644
index 0000000..0dab685
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/planar.h
@@ -0,0 +1,257 @@
+#pragma once
+
+// 3-plane planar YUV. Two flavours:
+//
+// PlanarSource / PlanarSink — YUV/YVU 420/422/444, single Y per word,
+// single chroma per word. Chroma is averaged over h_sub × v_sub
+// on write.
+//
+// MultiPixelPlanarSource / MultiPixelPlanarSink — T430, multi-pixel-
+// per-word planar 4:4:4 (3 samples per uint32_t in each of 3
+// planes, plus 2-bit X padding). block_w = ppw, block_h = 1.
+//
+// Plane indices for Y / U / V are looked up via Layout::find_plane<C>(),
+// so swap_uv layouts (YVU vs YUV) work without separate templates.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct PlanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static YUV16 read(const Buffer<3>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane]
+ + x * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const uint8_t* up = buf.data[u_plane] + cy * buf.stride[u_plane]
+ + cx * UP::bytes_per_pixel;
+ const uint8_t* vp = buf.data[v_plane] + cy * buf.stride[v_plane]
+ + cx * VP::bytes_per_pixel;
+ const auto u_vals = UP::unpack(detail::load_word<UP>(up));
+ const auto v_vals = VP::unpack(detail::load_word<VP>(vp));
+
+ return YUV16{
+ detail::decode_norm(YP::comps[0].bits, y_vals[0]),
+ detail::decode_norm(UP::comps[0].bits, u_vals[0]),
+ detail::decode_norm(VP::comps[0].bits, v_vals[0]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct PlanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t block_h = L::v_sub;
+ static constexpr size_t block_w = L::h_sub;
+
+ static void write_block(Buffer<3>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y per pixel.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[y_plane]
+ + (by + dy) * buf.stride[y_plane];
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ std::array<uint16_t, YP::num_comps> v{};
+ v[0] = detail::encode_norm(YP::comps[0].bits, block[dy][dx].y);
+ detail::store_word<YP>(
+ y_row + (bx + dx) * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One averaged U and V sample per block. Integer truncation
+ // (no round-half-up).
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ u_sum += block[dy][dx].u;
+ v_sum += block[dy][dx].v;
+ }
+ }
+ constexpr uint32_t n = block_h * block_w;
+
+ const size_t cx = bx / L::h_sub;
+ const size_t cy = by / L::v_sub;
+
+ std::array<uint16_t, UP::num_comps> uw{};
+ uw[0] = detail::encode_norm(UP::comps[0].bits, uint16_t(u_sum / n));
+ detail::store_word<UP>(
+ buf.data[u_plane] + cy * buf.stride[u_plane]
+ + cx * UP::bytes_per_pixel,
+ UP::pack(uw));
+
+ std::array<uint16_t, VP::num_comps> vw{};
+ vw[0] = detail::encode_norm(VP::comps[0].bits, uint16_t(v_sum / n));
+ detail::store_word<VP>(
+ buf.data[v_plane] + cy * buf.stride[v_plane]
+ + cx * VP::bytes_per_pixel,
+ VP::pack(vw));
+ }
+};
+
+// T430-style 3-plane multi-pixel-per-word planar 4:4:4. Each plane has
+// `ppw` samples of the same component (Y in plane 0, U in 1, V in 2 —
+// or whichever ordering find_plane resolves) packed into a single
+// storage word. block_w = ppw, block_h = 1. No chroma subsampling.
+template <typename L>
+struct MultiPixelPlanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+ static_assert(L::h_sub == 1 && L::v_sub == 1);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t ppw = YP::template component_count<C::Y>();
+ static_assert(ppw == UP::template component_count<C::U>());
+ static_assert(ppw == VP::template component_count<C::V>());
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits;
+
+ static YUV16 read(const Buffer<3>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const size_t gx = x / ppw;
+ const size_t off = x % ppw;
+
+ const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane]
+ + gx * YP::bytes_per_pixel;
+ const uint8_t* up = buf.data[u_plane] + y * buf.stride[u_plane]
+ + gx * UP::bytes_per_pixel;
+ const uint8_t* vp = buf.data[v_plane] + y * buf.stride[v_plane]
+ + gx * VP::bytes_per_pixel;
+
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+ const auto u_vals = UP::unpack(detail::load_word<UP>(up));
+ const auto v_vals = VP::unpack(detail::load_word<VP>(vp));
+
+ return YUV16{
+ detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(off)]),
+ detail::decode_norm(u_bits, u_vals[UP::template find_pos<C::U>(off)]),
+ detail::decode_norm(v_bits, v_vals[VP::template find_pos<C::V>(off)]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelPlanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 3);
+ static_assert(L::h_sub == 1 && L::v_sub == 1);
+
+ static constexpr size_t y_plane = L::template find_plane<C::Y>();
+ static constexpr size_t u_plane = L::template find_plane<C::U>();
+ static constexpr size_t v_plane = L::template find_plane<C::V>();
+
+ using YP = typename L::template plane<y_plane>;
+ using UP = typename L::template plane<u_plane>;
+ using VP = typename L::template plane<v_plane>;
+
+ static constexpr size_t ppw = YP::template component_count<C::Y>();
+
+ static constexpr size_t y_x_idx = YP::template find_pos<C::X>();
+ static constexpr size_t u_x_idx = UP::template find_pos<C::X>();
+ static constexpr size_t v_x_idx = VP::template find_pos<C::X>();
+ static constexpr bool y_has_x = (y_x_idx < YP::num_comps);
+ static constexpr bool u_has_x = (u_x_idx < UP::num_comps);
+ static constexpr bool v_has_x = (v_x_idx < VP::num_comps);
+
+ static constexpr size_t block_h = 1;
+ static constexpr size_t block_w = ppw;
+
+ static void write_block(Buffer<3>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[1][ppw]) noexcept
+ {
+ std::array<uint16_t, YP::num_comps> yv{};
+ std::array<uint16_t, UP::num_comps> uv{};
+ std::array<uint16_t, VP::num_comps> vv{};
+
+ // All same-tag positions share the same bit width.
+ constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits;
+ constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits;
+ for (size_t i = 0; i < ppw; ++i) {
+ yv[YP::template find_pos<C::Y>(i)] =
+ detail::encode_norm(y_bits, block[0][i].y);
+ uv[UP::template find_pos<C::U>(i)] =
+ detail::encode_norm(u_bits, block[0][i].u);
+ vv[VP::template find_pos<C::V>(i)] =
+ detail::encode_norm(v_bits, block[0][i].v);
+ }
+
+ if constexpr (y_has_x) yv[y_x_idx] = 0;
+ if constexpr (u_has_x) uv[u_x_idx] = 0;
+ if constexpr (v_has_x) vv[v_x_idx] = 0;
+
+ const size_t gx = bx / ppw;
+ detail::store_word<YP>(
+ buf.data[y_plane] + by * buf.stride[y_plane]
+ + gx * YP::bytes_per_pixel,
+ YP::pack(yv));
+ detail::store_word<UP>(
+ buf.data[u_plane] + by * buf.stride[u_plane]
+ + gx * UP::bytes_per_pixel,
+ UP::pack(uv));
+ detail::store_word<VP>(
+ buf.data[v_plane] + by * buf.stride[v_plane]
+ + gx * VP::bytes_per_pixel,
+ VP::pack(vv));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/io/semiplanar.h b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h
new file mode 100644
index 0000000..00e7731
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h
@@ -0,0 +1,242 @@
+#pragma once
+
+// 2-plane semiplanar YUV. Two flavours:
+//
+// SemiplanarSource / SemiplanarSink — NV12/NV21/NV16/NV61, single
+// pixel per Y storage word, single chroma pair per chroma word.
+//
+// MultiPixelSemiplanarSource / MultiPixelSemiplanarSink — P030/P230,
+// multiple Y pixels per Y word and multiple chroma pairs per
+// chroma word. The Y plane has `ppw_y = component_count<Y>()` Y
+// samples per storage word; the chroma plane has `pairs =
+// component_count<U>()` U/V pairs per storage word. block_w =
+// pairs × h_sub, block_h = v_sub — each block exactly fills one
+// chroma word.
+
+#include <array>
+
+#include "../layout.h"
+#include "detail.h"
+
+namespace pixpat
+{
+
+template <typename L>
+struct SemiplanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t y_idx = YP::template find_pos<C::Y>();
+ static constexpr size_t u_idx = CP::template find_pos<C::U>();
+ static constexpr size_t v_idx = CP::template find_pos<C::V>();
+
+ static YUV16 read(const Buffer<2>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ const uint8_t* yp = buf.data[0] + y * buf.stride[0] + x * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel;
+ const auto c_vals = CP::unpack(detail::load_word<CP>(cp));
+
+ return YUV16{
+ detail::decode_norm(YP::comps[y_idx].bits, y_vals[y_idx]),
+ detail::decode_norm(CP::comps[u_idx].bits, c_vals[u_idx]),
+ detail::decode_norm(CP::comps[v_idx].bits, c_vals[v_idx]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct SemiplanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t y_idx = YP::template find_pos<C::Y>();
+ static constexpr size_t u_idx = CP::template find_pos<C::U>();
+ static constexpr size_t v_idx = CP::template find_pos<C::V>();
+
+ static constexpr size_t block_h = L::v_sub;
+ static constexpr size_t block_w = L::h_sub;
+
+ static void write_block(Buffer<2>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y per pixel.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[0] + (by + dy) * buf.stride[0];
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ std::array<uint16_t, YP::num_comps> v{};
+ v[y_idx] = detail::encode_norm(YP::comps[y_idx].bits,
+ block[dy][dx].y);
+ detail::store_word<YP>(
+ y_row + (bx + dx) * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One averaged UV pair for the whole block. Integer truncation
+ // (no round-half-up).
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < block_w; ++dx) {
+ u_sum += block[dy][dx].u;
+ v_sum += block[dy][dx].v;
+ }
+ }
+ constexpr uint32_t n = block_h * block_w;
+ const uint16_t u_avg = uint16_t(u_sum / n);
+ const uint16_t v_avg = uint16_t(v_sum / n);
+
+ std::array<uint16_t, CP::num_comps> uv{};
+ uv[u_idx] = detail::encode_norm(CP::comps[u_idx].bits, u_avg);
+ uv[v_idx] = detail::encode_norm(CP::comps[v_idx].bits, v_avg);
+
+ const size_t cx = bx / L::h_sub;
+ const size_t cy = by / L::v_sub;
+ uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel;
+ detail::store_word<CP>(cp, CP::pack(uv));
+ }
+};
+
+// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). All Y
+// components share the same bit width; same for U and V.
+template <typename L>
+struct MultiPixelSemiplanarSource {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t ppw_y = YP::template component_count<C::Y>();
+ static constexpr size_t pairs = CP::template component_count<C::U>();
+ static_assert(ppw_y >= 1 && pairs >= 1);
+ static_assert(pairs == CP::template component_count<C::V>());
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits;
+
+ static YUV16 read(const Buffer<2>& buf, size_t x, size_t y,
+ [[maybe_unused]] size_t W,
+ [[maybe_unused]] size_t H) noexcept
+ {
+ // Y read.
+ const size_t y_gx = x / ppw_y;
+ const size_t y_off = x % ppw_y;
+ const uint8_t* yp = buf.data[0] + y * buf.stride[0]
+ + y_gx * YP::bytes_per_pixel;
+ const auto y_vals = YP::unpack(detail::load_word<YP>(yp));
+
+ // Chroma read.
+ const size_t cx = x / L::h_sub;
+ const size_t cy = y / L::v_sub;
+ const size_t c_gx = cx / pairs;
+ const size_t c_off = cx % pairs;
+ const uint8_t* cp = buf.data[1] + cy * buf.stride[1]
+ + c_gx * CP::bytes_per_pixel;
+ const auto c_vals = CP::unpack(detail::load_word<CP>(cp));
+
+ return YUV16{
+ detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(y_off)]),
+ detail::decode_norm(u_bits, c_vals[CP::template find_pos<C::U>(c_off)]),
+ detail::decode_norm(v_bits, c_vals[CP::template find_pos<C::V>(c_off)]),
+ uint16_t(0),
+ };
+ }
+};
+
+template <typename L>
+struct MultiPixelSemiplanarSink {
+ using Layout = L;
+ using Pixel = YUV16;
+
+ static_assert(L::kind == ColorKind::YUV);
+ static_assert(L::num_planes == 2);
+
+ using YP = typename L::template plane<0>;
+ using CP = typename L::template plane<1>;
+ static constexpr size_t ppw_y = YP::template component_count<C::Y>();
+ static constexpr size_t pairs = CP::template component_count<C::U>();
+ static_assert(ppw_y >= 1 && pairs >= 1);
+
+ // One block exactly fills one chroma word: `pairs` chroma pairs,
+ // each covering h_sub luma columns × v_sub rows.
+ static constexpr size_t block_w = pairs * L::h_sub;
+ static constexpr size_t block_h = L::v_sub;
+ static_assert(block_w % ppw_y == 0,
+ "block width must be a multiple of Y-pixels-per-word");
+ static constexpr size_t y_words_per_row = block_w / ppw_y;
+
+ // All same-tag positions share the same bit width.
+ static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits;
+ static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits;
+ static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits;
+
+ static void write_block(Buffer<2>& buf, size_t bx, size_t by,
+ const YUV16 (&block)[block_h][block_w]) noexcept
+ {
+ // Y plane: y_words_per_row Y-words per row, block_h rows.
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ uint8_t* y_row = buf.data[0]
+ + (by + dy) * buf.stride[0];
+ for (size_t w = 0; w < y_words_per_row; ++w) {
+ std::array<uint16_t, YP::num_comps> v{};
+ for (size_t i = 0; i < ppw_y; ++i) {
+ const size_t pos = YP::template find_pos<C::Y>(i);
+ v[pos] = detail::encode_norm(
+ y_bits, block[dy][w * ppw_y + i].y);
+ }
+ detail::store_word<YP>(
+ y_row + (bx / ppw_y + w)
+ * YP::bytes_per_pixel,
+ YP::pack(v));
+ }
+ }
+
+ // One UV-word: `pairs` chroma pairs. Each pair averages h_sub
+ // horizontally × v_sub vertically luma values.
+ std::array<uint16_t, CP::num_comps> uv{};
+ constexpr uint32_t n = L::h_sub * L::v_sub;
+ for (size_t p = 0; p < pairs; ++p) {
+ uint32_t u_sum = 0, v_sum = 0;
+ for (size_t dy = 0; dy < block_h; ++dy) {
+ for (size_t dx = 0; dx < L::h_sub; ++dx) {
+ u_sum += block[dy][p * L::h_sub + dx].u;
+ v_sum += block[dy][p * L::h_sub + dx].v;
+ }
+ }
+ uv[CP::template find_pos<C::U>(p)] =
+ detail::encode_norm(u_bits, uint16_t(u_sum / n));
+ uv[CP::template find_pos<C::V>(p)] =
+ detail::encode_norm(v_bits, uint16_t(v_sum / n));
+ }
+
+ const size_t cy = by / L::v_sub;
+ const size_t uv_word_idx = bx / block_w;
+ detail::store_word<CP>(
+ buf.data[1] + cy * buf.stride[1]
+ + uv_word_idx * CP::bytes_per_pixel,
+ CP::pack(uv));
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/layout.h b/subprojects/pixpat/pixpat-native/src/layout.h
new file mode 100644
index 0000000..d092bb1
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/layout.h
@@ -0,0 +1,141 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace pixpat
+{
+
+enum class ColorKind { RGB, YUV };
+
+// Normalized inter-stage pixel types. 16-bit-per-component integer.
+// N-bit storage values bit-replicate up to normalized 16-bit (so
+// N-bit max maps to 0xFFFF); encoding back is a truncating
+// `normalized >> (16 - N)`. See io/detail.h for the round-trip
+// argument. Sources without an A component emit a=0; cross-color-kind
+// ColorXfm resets a=0xFFFF; sinks with X write 0, sinks with A
+// encode `a`.
+struct RGB16 {
+ static constexpr ColorKind kind = ColorKind::RGB;
+ uint16_t r, g, b, a;
+};
+
+struct YUV16 {
+ static constexpr ColorKind kind = ColorKind::YUV;
+ uint16_t y, u, v, a;
+};
+
+inline constexpr uint16_t kNormMax = 0xFFFF;
+
+enum class C : uint8_t { X, A, R, G, B, Y, U, V };
+
+struct Comp {
+ C c;
+ uint8_t bits;
+ uint8_t shift;
+};
+
+template <typename Storage, Comp... Cs>
+struct Plane {
+ using storage_t = Storage;
+
+ static constexpr size_t num_comps = sizeof...(Cs);
+ static constexpr std::array<Comp, num_comps> comps{ Cs ... };
+ static constexpr size_t total_bits = (size_t(Cs.bits) + ... + 0);
+ static constexpr size_t storage_bits = sizeof(Storage) * 8;
+ static constexpr size_t bytes_per_pixel = (total_bits + 7) / 8;
+
+ static_assert(total_bits <= storage_bits, "components overflow storage word");
+
+ // Index of the n-th component matching Tag, or num_comps if absent.
+ template <C Tag>
+ static constexpr size_t find_pos(size_t n = 0)
+ {
+ for (size_t i = 0; i < num_comps; ++i) {
+ if (comps[i].c == Tag) {
+ if (n == 0)
+ return i;
+ --n;
+ }
+ }
+ return num_comps;
+ }
+
+ // Count of components matching Tag. Used to derive
+ // pixels_per_word for multi-pixel-per-storage formats (XYYY2101010,
+ // P030, ...).
+ template <C Tag>
+ static constexpr size_t component_count()
+ {
+ size_t cnt = 0;
+ for (size_t i = 0; i < num_comps; ++i)
+ if (comps[i].c == Tag)
+ ++cnt;
+ return cnt;
+ }
+
+ // Mask each input value to its bit-width and OR-shift it into the
+ // storage word. The loop trip count and the comps[i] reads are
+ // compile-time constant, so the optimizer unrolls and folds.
+ static constexpr Storage pack(const std::array<uint16_t, num_comps>& v) noexcept
+ {
+ Storage out{};
+ for (size_t i = 0; i < num_comps; ++i) {
+ const Storage mask = (Storage{ 1 } << comps[i].bits) - 1;
+ out |= Storage(v[i] & mask) << comps[i].shift;
+ }
+ return out;
+ }
+
+ // Mirror of `pack`.
+ static constexpr std::array<uint16_t, num_comps> unpack(Storage word) noexcept
+ {
+ std::array<uint16_t, num_comps> out{};
+ for (size_t i = 0; i < num_comps; ++i) {
+ const Storage mask = (Storage{ 1 } << comps[i].bits) - 1;
+ out[i] = uint16_t((word >> comps[i].shift) & mask);
+ }
+ return out;
+ }
+};
+
+template <ColorKind Kind, size_t Hsub, size_t Vsub, typename ... Planes>
+struct Layout {
+ static constexpr ColorKind kind = Kind;
+ static constexpr size_t h_sub = Hsub;
+ static constexpr size_t v_sub = Vsub;
+ static constexpr size_t num_planes = sizeof...(Planes);
+
+ template <size_t N>
+ using plane = std::tuple_element_t<N, std::tuple<Planes...> >;
+
+ // Index of the first plane containing component Tag, or num_planes
+ // if no plane has it. Lets PlanarSource/Sink map C::U / C::V to a
+ // plane regardless of YUV vs YVU ordering.
+ // Comma-fold over plane indices: for each plane I check if it has
+ // Tag, and on the first hit assign `found = I`. Subsequent hits are
+ // suppressed by the `found == num_planes` guard. The whole fold
+ // evaluates to a discarded list of int 0s; the `found` capture
+ // carries the result out.
+ template <C Tag>
+ static constexpr size_t find_plane()
+ {
+ return [&]<size_t... I>(std::index_sequence<I...>) {
+ size_t found = num_planes;
+ ((plane<I>::template find_pos<Tag>() < plane<I>::num_comps
+ ? (found == num_planes ? (found = I, 0) : 0)
+ : 0), ...);
+ return found;
+ } (std::make_index_sequence<num_planes>{});
+ }
+};
+
+template <size_t N>
+struct Buffer {
+ std::array<uint8_t*, N> data;
+ std::array<size_t, N> stride;
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/params.h b/subprojects/pixpat/pixpat-native/src/params.h
new file mode 100644
index 0000000..aa2be67
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/params.h
@@ -0,0 +1,219 @@
+#pragma once
+
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "layout.h"
+
+namespace pixpat
+{
+
+// Pattern-specific parameters parsed from pixpat_pattern_opts::params.
+// The wire format is a comma-separated list of "key=value" items;
+// whitespace around tokens is trimmed, keys and values are
+// case-insensitive ASCII, and neither may contain ',' or '='.
+// Malformed input leaves ok() returning false — the pattern dispatcher
+// fails the call when that happens.
+//
+// Patterns query keys by name via get() / get_int() / get_hex_color().
+// Unknown keys are ignored: each pattern handles forward compatibility,
+// not the parser.
+class Params
+{
+public:
+ explicit Params(const char* csv);
+
+ bool ok() const noexcept {
+ return ok_;
+ }
+
+ std::optional<std::string_view> get(std::string_view key) const noexcept;
+ std::optional<int> get_int(std::string_view key) const noexcept;
+ std::optional<RGB16> get_hex_color(std::string_view key) const noexcept;
+
+private:
+ std::vector<std::pair<std::string, std::string> > kv_;
+ bool ok_{ true };
+};
+
+namespace detail
+{
+
+inline char ascii_tolower(char c) noexcept
+{
+ return (c >= 'A' && c <= 'Z') ? char(c + ('a' - 'A')) : c;
+}
+
+inline std::string_view trim(std::string_view s) noexcept
+{
+ while (!s.empty() && std::isspace(static_cast<unsigned char>(s.front())))
+ s.remove_prefix(1);
+ while (!s.empty() && std::isspace(static_cast<unsigned char>(s.back())))
+ s.remove_suffix(1);
+ return s;
+}
+
+inline bool ieq(std::string_view a, std::string_view b) noexcept
+{
+ if (a.size() != b.size())
+ return false;
+ for (size_t i = 0; i < a.size(); ++i)
+ if (ascii_tolower(a[i]) != ascii_tolower(b[i]))
+ return false;
+ return true;
+}
+
+} // namespace detail
+
+
+inline Params::Params(const char* csv)
+{
+ if (!csv || *csv == '\0')
+ return;
+
+ std::string_view s(csv);
+ while (!s.empty()) {
+ const size_t comma = s.find(',');
+ std::string_view item = (comma == std::string_view::npos)
+ ? s : s.substr(0, comma);
+ s = (comma == std::string_view::npos)
+ ? std::string_view{} : s.substr(comma + 1);
+
+ item = detail::trim(item);
+ if (item.empty()) {
+ ok_ = false;
+ return;
+ }
+
+ const size_t eq = item.find('=');
+ if (eq == std::string_view::npos) {
+ ok_ = false;
+ return;
+ }
+ const std::string_view k = detail::trim(item.substr(0, eq));
+ const std::string_view v = detail::trim(item.substr(eq + 1));
+ if (k.empty()) {
+ ok_ = false;
+ return;
+ }
+ kv_.emplace_back(std::string(k), std::string(v));
+ }
+}
+
+inline std::optional<std::string_view>
+Params::get(std::string_view key) const noexcept
+{
+ for (const auto& [k, v] : kv_)
+ if (detail::ieq(k, key))
+ return std::string_view(v);
+ return std::nullopt;
+}
+
+inline std::optional<int>
+Params::get_int(std::string_view key) const noexcept
+{
+ auto v = get(key);
+ if (!v || v->empty())
+ return std::nullopt;
+ int sign = 1;
+ size_t i = 0;
+ if ((*v)[0] == '-') { sign = -1; ++i; }
+ else if ((*v)[0] == '+') { ++i; }
+ if (i == v->size())
+ return std::nullopt;
+ int out = 0;
+ for (; i < v->size(); ++i) {
+ const char c = (*v)[i];
+ if (c < '0' || c > '9')
+ return std::nullopt;
+ out = out * 10 + (c - '0');
+ }
+ return sign * out;
+}
+
+// Parses a hex color string. The optional `0x`/`0X` prefix is allowed.
+// The number of hex digits after the prefix selects the layout:
+// 6 digits — 8-bit RRGGBB (alpha defaults to opaque)
+// 8 digits — 8-bit AARRGGBB (alpha-first)
+// 12 digits — 16-bit RRRRGGGGBBBB (alpha defaults to opaque)
+// 16 digits — 16-bit AAAARRRRGGGGBBBB (alpha-first)
+// 8-bit components are byte-replicated to the normalized 16-bit form
+// (0xFF → 0xFFFF); 16-bit components are stored directly. Any other
+// length, malformed digits, or stray separators yield std::nullopt.
+inline std::optional<RGB16>
+Params::get_hex_color(std::string_view key) const noexcept
+{
+ auto v = get(key);
+ if (!v)
+ return std::nullopt;
+
+ std::string_view s = *v;
+ if (s.size() >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
+ s.remove_prefix(2);
+
+ const auto digit = [](char c) -> int {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return -1;
+ };
+ const auto parse_n = [&](size_t off, size_t n) -> std::optional<unsigned> {
+ unsigned out = 0;
+ for (size_t i = 0; i < n; ++i) {
+ const int d = digit(s[off + i]);
+ if (d < 0)
+ return std::nullopt;
+ out = (out << 4) | unsigned(d);
+ }
+ return out;
+ };
+
+ bool has_alpha;
+ bool is_16bpc;
+ switch (s.size()) {
+ case 6: has_alpha = false; is_16bpc = false; break;
+ case 8: has_alpha = true; is_16bpc = false; break;
+ case 12: has_alpha = false; is_16bpc = true; break;
+ case 16: has_alpha = true; is_16bpc = true; break;
+ default: return std::nullopt;
+ }
+
+ const size_t per = is_16bpc ? 4 : 2;
+ const unsigned full = is_16bpc ? 0xFFFFu : 0xFFu;
+ unsigned a = full, r, g, b;
+ size_t off = 0;
+ if (has_alpha) {
+ auto av = parse_n(off, per);
+ if (!av) return std::nullopt;
+ a = *av;
+ off += per;
+ }
+ auto rv = parse_n(off, per);
+ if (!rv) return std::nullopt;
+ r = *rv;
+ off += per;
+ auto gv = parse_n(off, per);
+ if (!gv) return std::nullopt;
+ g = *gv;
+ off += per;
+ auto bv = parse_n(off, per);
+ if (!bv) return std::nullopt;
+ b = *bv;
+
+ if (is_16bpc) {
+ return RGB16{ uint16_t(r), uint16_t(g), uint16_t(b), uint16_t(a) };
+ } else {
+ const auto rep = [](unsigned x) noexcept {
+ return uint16_t((x << 8) | x);
+ };
+ return RGB16{ rep(r), rep(g), rep(b), rep(a) };
+ }
+}
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/pattern.h b/subprojects/pixpat/pixpat-native/src/pattern.h
new file mode 100644
index 0000000..fbee683
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pattern.h
@@ -0,0 +1,597 @@
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+
+#include "color.h"
+#include "layout.h"
+#include "params.h"
+
+namespace pixpat::patterns
+{
+
+// Patterns emit opaque pixels (a=kNormMax) unless they encode their
+// own alpha (e.g. `plain`'s ARGB form). Alpha-bearing sinks
+// (ARGB8888 etc) therefore see the pattern's chosen alpha; convert
+// paths propagate the source's actual `a` instead (a=0 for X-only
+// sources).
+//
+// A pattern is an instance with:
+// using Pixel = RGB16 | YUV16;
+// explicit Pat(const Params&) noexcept;
+// Pixel sample(size_t x, size_t y, size_t W, size_t H) const noexcept;
+// bool ready() const noexcept; // optional, default true
+// Patterns that don't read params ignore the constructor argument.
+
+namespace detail
+{
+// 8-bit -> normalized 16 byte-replication. e.g. 255 -> 0xFFFF,
+// 1 -> 0x0101.
+constexpr RGB16 rgb8(uint8_t r, uint8_t g, uint8_t b) noexcept
+{
+ return RGB16{
+ uint16_t((uint16_t(r) << 8) | r),
+ uint16_t((uint16_t(g) << 8) | g),
+ uint16_t((uint16_t(b) << 8) | b),
+ kNormMax,
+ };
+}
+
+// 12-bit -> normalized 16 bit-replication.
+constexpr YUV16 yuv12(uint16_t y, uint16_t u, uint16_t v) noexcept
+{
+ return YUV16{
+ uint16_t((y << 4) | (y >> 8)),
+ uint16_t((u << 4) | (u >> 8)),
+ uint16_t((v << 4) | (v >> 8)),
+ kNormMax,
+ };
+}
+} // namespace detail
+
+// "kmstest" default pattern: white border + diagonals; blue rails on
+// the top/left edges; red rails on the bottom/right; an 8-step color
+// gradient block in the center.
+struct Kmstest {
+ using Pixel = RGB16;
+
+ explicit Kmstest(const Params&) noexcept {
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ using detail::rgb8;
+ const size_t mw = 20;
+ const size_t xm1 = mw;
+ const size_t xm2 = W - mw - 1;
+ const size_t ym1 = mw;
+ const size_t ym2 = H - mw - 1;
+
+ if (x == xm1 || x == xm2 || y == ym1 || y == ym2)
+ return rgb8(255, 255, 255);
+ if (x < xm1 && y < ym1)
+ return rgb8(255, 255, 255);
+ if ((x == 0 || x == W - 1) && (y < ym1 || y > ym2))
+ return rgb8(255, 255, 255);
+ if ((y == 0 || y == H - 1) && (x < xm1 || x > xm2))
+ return rgb8(255, 255, 255);
+ if (x < xm1 && (y > ym1 && y < ym2))
+ return rgb8(0, 0, 255);
+ if (y < ym1 && (x > xm1 && x < xm2))
+ return rgb8(0, 0, 255);
+ if (x > xm2 && (y > ym1 && y < ym2))
+ return rgb8(255, 0, 0);
+ if (y > ym2 && (x > xm1 && x < xm2))
+ return rgb8(255, 0, 0);
+ if (x > xm1 && x < xm2 && y > ym1 && y < ym2) {
+ if (x == y || W - x == H - y)
+ return rgb8(255, 255, 255);
+ if (W - x - 1 == y || x == H - y - 1)
+ return rgb8(255, 255, 255);
+ const int t = int((x - xm1 - 1) * 8 / (xm2 - xm1 - 1));
+ const unsigned c = unsigned((y - ym1 - 1) % 256);
+ unsigned r = 0, g = 0, b = 0;
+ switch (t) {
+ case 0: r = c; break;
+ case 1: g = c; break;
+ case 2: b = c; break;
+ case 3: g = b = c; break;
+ case 4: r = b = c; break;
+ case 5: r = g = c; break;
+ case 6: r = g = b = c; break;
+ case 7: break;
+ }
+ return rgb8(uint8_t(r), uint8_t(g), uint8_t(b));
+ }
+ return rgb8(0, 0, 0);
+ }
+};
+
+// SMPTE RP 219-1:2014 color bar pattern. Emits YUV directly with
+// pixel values defined by the spec in BT.709 / Limited range. Pass
+// `rec=BT709, range=Limited` for spec-correct output; other ColorSpec
+// settings produce visibly-wrong colors when the sink crosses to RGB
+// (the matrix the caller picked is applied to BT.709-encoded values).
+// Callers are trusted — pixpat does not override the spec for them.
+struct Smpte {
+ using Pixel = YUV16;
+
+ explicit Smpte(const Params&) noexcept {
+ }
+
+ YUV16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ using detail::yuv12;
+ constexpr YUV16 gray40 = yuv12(1658, 2048, 2048);
+ constexpr YUV16 white75 = yuv12(2884, 2048, 2048);
+ constexpr YUV16 yellow75 = yuv12(2694, 704, 2171);
+ constexpr YUV16 cyan75 = yuv12(2325, 2356, 704);
+ constexpr YUV16 green75 = yuv12(2136, 1012, 827);
+ constexpr YUV16 magenta75 = yuv12(1004, 3084, 3269);
+ constexpr YUV16 red75 = yuv12( 815, 1740, 3392);
+ constexpr YUV16 blue75 = yuv12( 446, 3392, 1925);
+ constexpr YUV16 cyan100 = yuv12(3015, 2459, 256);
+ constexpr YUV16 blue100 = yuv12( 509, 3840, 1884);
+ constexpr YUV16 yellow100 = yuv12(3507, 256, 2212);
+ constexpr YUV16 black = yuv12( 256, 2048, 2048);
+ constexpr YUV16 white100 = yuv12(3760, 2048, 2048);
+ constexpr YUV16 red100 = yuv12(1001, 1637, 3840);
+ constexpr YUV16 gray15 = yuv12( 782, 2048, 2048);
+
+ constexpr YUV16 black_m2 = yuv12( 186, 2048, 2048);
+ constexpr YUV16 black_p2 = yuv12( 326, 2048, 2048);
+ constexpr YUV16 black_p4 = yuv12( 396, 2048, 2048);
+
+ constexpr size_t M = 1024;
+ const size_t xs = x * M;
+ const size_t a = W * M;
+ const size_t c = (a * 3 / 4) / 7;
+ const size_t d = a / 8;
+
+ const size_t pattern1_height = (H * 7) / 12;
+ const size_t pattern2_height = pattern1_height + (H / 12);
+ const size_t pattern3_height = pattern2_height + (H / 12);
+
+ if (y < pattern1_height) {
+ if (xs < d || xs >= (a - d))
+ return gray40;
+ const size_t bar = (xs - d) / c;
+ switch (bar) {
+ case 0: return white75;
+ case 1: return yellow75;
+ case 2: return cyan75;
+ case 3: return green75;
+ case 4: return magenta75;
+ case 5: return red75;
+ default: return blue75;
+ }
+ }
+
+ if (y < pattern2_height) {
+ if (xs < d) return cyan100;
+ if (xs >= (a - d)) return blue100;
+ return white75;
+ }
+
+ if (y < pattern3_height) {
+ if (xs < d) return yellow100;
+ if (xs >= (a - d)) return red100;
+ const size_t ramp_w = a - 2 * d;
+ const size_t ramp_x = xs - d;
+ const uint16_t y_val = uint16_t(256 + (3760 - 256) * ramp_x / ramp_w);
+ return yuv12(y_val, 2048, 2048);
+ }
+
+ // pattern4 (PLUGE)
+ const size_t c0 = d;
+ const size_t c1 = c0 + c * 3 / 2;
+ const size_t c2 = c1 + 2 * c;
+ const size_t c3 = c2 + c * 5 / 6;
+
+ if (xs < c0) return gray15;
+ if (xs < c1) return black;
+ if (xs < c2) return white100;
+ if (xs < c3) return black;
+ if (xs >= a - d) return gray15;
+ if (xs >= a - d - c) return black;
+
+ const size_t step = (xs - c3) / (c / 3);
+ switch (step) {
+ case 0: return black_m2;
+ case 1: return black;
+ case 2: return black_p2;
+ case 3: return black;
+ default: return black_p4;
+ }
+ }
+};
+
+// Solid fill from a hex color string. Reads `color=<hex>` from
+// params; the value is parsed by Params::get_hex_color (8/16-bpc,
+// alpha-first if present, optional `0x` prefix). Missing or
+// malformed `color` leaves ready()=false and the dispatcher fails
+// the call.
+struct Plain {
+ using Pixel = RGB16;
+
+ explicit Plain(const Params& p) noexcept
+ {
+ if (auto c = p.get_hex_color("color")) {
+ color_ = *c;
+ ready_ = true;
+ }
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ RGB16 sample(size_t, size_t, size_t, size_t) const noexcept
+ {
+ return color_;
+ }
+
+private:
+ RGB16 color_{};
+ bool ready_{ false };
+};
+
+namespace detail
+{
+// Linear ramp 0..kNormMax across [0, span-1]. span<=1 returns kNormMax.
+constexpr uint16_t ramp16(size_t pos, size_t span) noexcept
+{
+ if (span <= 1)
+ return kNormMax;
+ return uint16_t((uint64_t(pos) * kNormMax) / (span - 1));
+}
+} // namespace detail
+
+// Black/white checkerboard. Reads optional `cell=<N>` (positive
+// integer; default 8) for cell size in pixels.
+struct Checker {
+ using Pixel = RGB16;
+
+ explicit Checker(const Params& p) noexcept
+ {
+ if (p.get("cell")) {
+ auto n = p.get_int("cell");
+ if (!n || *n <= 0) {
+ ready_ = false;
+ return;
+ }
+ cell_ = size_t(*n);
+ }
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t, size_t) const noexcept
+ {
+ const bool dark = (((x / cell_) ^ (y / cell_)) & 1u) != 0;
+ return dark ? RGB16{ 0, 0, 0, kNormMax }
+ : RGB16{ kNormMax, kNormMax, kNormMax, kNormMax };
+ }
+
+private:
+ size_t cell_{ 8 };
+ bool ready_{ true };
+};
+
+namespace detail
+{
+// Pick one of (R, G, B, gray) given a stripe index in [0, 4) and a
+// scalar ramp value. Used by hramp/vramp.
+constexpr RGB16 rgb_gray_stripe(size_t stripe, uint16_t v) noexcept
+{
+ switch (stripe) {
+ case 0: return RGB16{ v, 0, 0, kNormMax };
+ case 1: return RGB16{ 0, v, 0, kNormMax };
+ case 2: return RGB16{ 0, 0, v, kNormMax };
+ default: return RGB16{ v, v, v, kNormMax };
+ }
+}
+} // namespace detail
+
+// Four horizontal stripes — R, G, B, gray — each a 0..max ramp
+// along x. Per-channel and luma quantization in one pattern.
+struct Hramp {
+ using Pixel = RGB16;
+
+ explicit Hramp(const Params&) noexcept {
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ const size_t stripe = (H == 0) ? 0 : (y * 4) / H;
+ return detail::rgb_gray_stripe(stripe, detail::ramp16(x, W));
+ }
+};
+
+// Four vertical columns — R, G, B, gray — each a 0..max ramp
+// along y. Same coverage as hramp, rotated 90°.
+struct Vramp {
+ using Pixel = RGB16;
+
+ explicit Vramp(const Params&) noexcept {
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ const size_t col = (W == 0) ? 0 : (x * 4) / W;
+ return detail::rgb_gray_stripe(col, detail::ramp16(y, H));
+ }
+};
+
+// Diagonal RGB ramp: R sweeps with x, G with y, B with x+y.
+struct Dramp {
+ using Pixel = RGB16;
+
+ explicit Dramp(const Params&) noexcept {
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ const uint16_t r = detail::ramp16(x, W);
+ const uint16_t g = detail::ramp16(y, H);
+ const size_t span = (W + H >= 2) ? (W + H - 1) : 1;
+ const uint16_t b = detail::ramp16(x + y, span);
+ return RGB16{ r, g, b, kNormMax };
+ }
+};
+
+namespace detail
+{
+// Seven-region color sequence used by hbar/vbar:
+// white, red, white, green, white, blue, white. The white separators
+// between R/G/B make per-channel offsets at the band boundaries
+// visible.
+constexpr RGB16 bar_color7(size_t band) noexcept
+{
+ switch (band) {
+ case 1: return rgb8(255, 0, 0);
+ case 3: return rgb8( 0, 255, 0);
+ case 5: return rgb8( 0, 0, 255);
+ default: return rgb8(255, 255, 255);
+ }
+}
+} // namespace detail
+
+// Vertical bar (full image height, narrow along x) over a black
+// background. `pos` is the left edge in pixels (signed; negative
+// values clip at the left edge); `width` is the bar thickness in
+// pixels (default 32). The bar is split into 7 equal-height regions
+// colored white/red/white/green/white/blue/white.
+struct VBarRGB {
+ using Pixel = RGB16;
+
+ explicit VBarRGB(const Params& p) noexcept
+ {
+ auto pp = p.get_int("pos");
+ if (!pp) {
+ ready_ = false;
+ return;
+ }
+ pos_ = *pp;
+ if (p.get("width")) {
+ auto w = p.get_int("width");
+ if (!w || *w <= 0) {
+ ready_ = false;
+ return;
+ }
+ width_ = size_t(*w);
+ }
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t, size_t H) const noexcept
+ {
+ const long long sx = static_cast<long long>(x);
+ const long long lo = pos_;
+ const long long hi = lo + static_cast<long long>(width_);
+ if (sx < lo || sx >= hi)
+ return detail::rgb8(0, 0, 0);
+ const size_t band = (H == 0) ? 0 : (y * 7) / H;
+ return detail::bar_color7(band);
+ }
+
+private:
+ int pos_{};
+ size_t width_{ 32 };
+ bool ready_{ true };
+};
+
+// Horizontal bar: vbar rotated 90°. `pos` is the top edge in pixels;
+// `width` is the bar thickness in pixels (default 32). The bar spans
+// the full image width and is split into 7 equal-width regions
+// colored white/red/white/green/white/blue/white.
+struct HBarRGB {
+ using Pixel = RGB16;
+
+ explicit HBarRGB(const Params& p) noexcept
+ {
+ auto pp = p.get_int("pos");
+ if (!pp) {
+ ready_ = false;
+ return;
+ }
+ pos_ = *pp;
+ if (p.get("width")) {
+ auto w = p.get_int("width");
+ if (!w || *w <= 0) {
+ ready_ = false;
+ return;
+ }
+ width_ = size_t(*w);
+ }
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t) const noexcept
+ {
+ const long long sy = static_cast<long long>(y);
+ const long long lo = pos_;
+ const long long hi = lo + static_cast<long long>(width_);
+ if (sy < lo || sy >= hi)
+ return detail::rgb8(0, 0, 0);
+ const size_t band = (W == 0) ? 0 : (x * 7) / W;
+ return detail::bar_color7(band);
+ }
+
+private:
+ int pos_{};
+ size_t width_{ 32 };
+ bool ready_{ true };
+};
+
+// Same shape as VBarRGB but emits YUV16 directly. The five unique colors
+// (black bg + white/red/green/blue bar regions) are precomputed from
+// `spec` at construction so the cross-kind pass is a no-op when the
+// sink is YUV. Use the RGB-native `VBarRGB` for RGB sinks instead — it
+// avoids the YUV→RGB pass that this variant would incur there.
+struct VBarYUV {
+ using Pixel = YUV16;
+
+ explicit VBarYUV(const Params& p, ColorSpec spec) noexcept
+ {
+ auto pp = p.get_int("pos");
+ if (!pp) {
+ ready_ = false;
+ return;
+ }
+ pos_ = *pp;
+ if (p.get("width")) {
+ auto w = p.get_int("width");
+ if (!w || *w <= 0) {
+ ready_ = false;
+ return;
+ }
+ width_ = size_t(*w);
+ }
+ const ColorCoeffs c = coeffs_for(spec);
+ using X = ColorXfm<RGB16, YUV16>;
+ bg_ = X::apply(detail::rgb8( 0, 0, 0), c);
+ bands_[0] = X::apply(detail::rgb8(255, 255, 255), c);
+ bands_[1] = X::apply(detail::rgb8(255, 0, 0), c);
+ bands_[2] = bands_[0];
+ bands_[3] = X::apply(detail::rgb8( 0, 255, 0), c);
+ bands_[4] = bands_[0];
+ bands_[5] = X::apply(detail::rgb8( 0, 0, 255), c);
+ bands_[6] = bands_[0];
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ YUV16 sample(size_t x, size_t y, size_t, size_t H) const noexcept
+ {
+ const long long sx = static_cast<long long>(x);
+ const long long lo = pos_;
+ const long long hi = lo + static_cast<long long>(width_);
+ if (sx < lo || sx >= hi)
+ return bg_;
+ const size_t band = (H == 0) ? 0 : (y * 7) / H;
+ return bands_[band];
+ }
+
+private:
+ YUV16 bg_{};
+ YUV16 bands_[7]{};
+ int pos_{};
+ size_t width_{ 32 };
+ bool ready_{ true };
+};
+
+// YUV-native counterpart to HBarRGB. See VBarYUV.
+struct HBarYUV {
+ using Pixel = YUV16;
+
+ explicit HBarYUV(const Params& p, ColorSpec spec) noexcept
+ {
+ auto pp = p.get_int("pos");
+ if (!pp) {
+ ready_ = false;
+ return;
+ }
+ pos_ = *pp;
+ if (p.get("width")) {
+ auto w = p.get_int("width");
+ if (!w || *w <= 0) {
+ ready_ = false;
+ return;
+ }
+ width_ = size_t(*w);
+ }
+ const ColorCoeffs c = coeffs_for(spec);
+ using X = ColorXfm<RGB16, YUV16>;
+ bg_ = X::apply(detail::rgb8( 0, 0, 0), c);
+ bands_[0] = X::apply(detail::rgb8(255, 255, 255), c);
+ bands_[1] = X::apply(detail::rgb8(255, 0, 0), c);
+ bands_[2] = bands_[0];
+ bands_[3] = X::apply(detail::rgb8( 0, 255, 0), c);
+ bands_[4] = bands_[0];
+ bands_[5] = X::apply(detail::rgb8( 0, 0, 255), c);
+ bands_[6] = bands_[0];
+ }
+
+ bool ready() const noexcept {
+ return ready_;
+ }
+
+ YUV16 sample(size_t x, size_t y, size_t W, size_t) const noexcept
+ {
+ const long long sy = static_cast<long long>(y);
+ const long long lo = pos_;
+ const long long hi = lo + static_cast<long long>(width_);
+ if (sy < lo || sy >= hi)
+ return bg_;
+ const size_t band = (W == 0) ? 0 : (x * 7) / W;
+ return bands_[band];
+ }
+
+private:
+ YUV16 bg_{};
+ YUV16 bands_[7]{};
+ int pos_{};
+ size_t width_{ 32 };
+ bool ready_{ true };
+};
+
+// Centered radial cosine zone plate: 0.5 + 0.5 * cos(k * (cx² + cy²))
+// with cx, cy measured from the image center and k chosen so the
+// local frequency hits Nyquist at the longer edge — i.e. the pattern
+// uses every spatial frequency the grid can resolve.
+struct Zoneplate {
+ using Pixel = RGB16;
+
+ explicit Zoneplate(const Params&) noexcept {
+ }
+
+ RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept
+ {
+ const double max_dim = double(W > H ? W : H);
+ // Local frequency d(k r²)/dr = 2 k r. At r = max_dim/2 the
+ // frequency reaches π/pixel (Nyquist), giving k = π / max_dim.
+ const double k = 3.14159265358979323846 / (max_dim > 0 ? max_dim : 1.0);
+ const double cx = double(x) - 0.5 * double(W);
+ const double cy = double(y) - 0.5 * double(H);
+ const double phase = k * (cx * cx + cy * cy);
+ const double v = 0.5 + 0.5 * std::cos(phase);
+ const double scaled = v * 65535.0;
+ const uint16_t g = (scaled < 0.0) ? uint16_t(0)
+ : (scaled > 65535.0) ? kNormMax
+ : uint16_t(scaled + 0.5);
+ return RGB16{ g, g, g, kNormMax };
+ }
+};
+
+} // namespace pixpat::patterns
diff --git a/subprojects/pixpat/pixpat-native/src/pattern_catalog.h b/subprojects/pixpat/pixpat-native/src/pattern_catalog.h
new file mode 100644
index 0000000..6576b2b
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pattern_catalog.h
@@ -0,0 +1,64 @@
+#pragma once
+
+// Catalog of every named pattern the C++ side knows. Mirrors the
+// shape of format_catalog.h. The X-macro is a list of
+// (Label, RgbType, YuvType, "name") rows:
+//
+// X(Label, RgbType, YuvType, "name")
+//
+// `Label` is the C++ identifier doubling as the PatternId enum value
+// and the s_pattern_caps[] index. `RgbType` and `YuvType` resolve to
+// classes in `pixpat::patterns::` (defined in pattern.h) that satisfy
+// the pattern interface (sample(), Pixel) — one per color kind. Use
+// `void` if the pattern has no variant in that kind. At least one
+// must be non-void. When both are present, dispatch_draw_pattern
+// picks the variant matching the sink's color kind so the cross-kind
+// pass is a no-op; when only one is present, the pipeline runs the
+// cross-kind pass for the opposite-kind sinks. `name` is the
+// lowercase identifier exposed via the C ABI.
+//
+// Adding a pattern = a row here AND its class(es) in pattern.h. The
+// codegen (pixpat-native/codegen/gen_pixpat.py) parses this X-macro
+// to learn the pattern set; pixpat_pattern.cpp re-expands it to build
+// the dispatch arms and the default-pattern fallback.
+
+#include <cstddef>
+#include <cstdint>
+
+namespace pixpat
+{
+
+#define PIXPAT_PATTERN_LIST(X) \
+ X(Kmstest, Kmstest, void, "kmstest") \
+ X(Smpte, void, Smpte, "smpte") \
+ X(Plain, Plain, void, "plain") \
+ X(Checker, Checker, void, "checker") \
+ X(Hramp, Hramp, void, "hramp") \
+ X(Vramp, Vramp, void, "vramp") \
+ X(HBar, HBarRGB, HBarYUV, "hbar") \
+ X(VBar, VBarRGB, VBarYUV, "vbar") \
+ X(Dramp, Dramp, void, "dramp") \
+ X(Zoneplate, Zoneplate, void, "zoneplate")
+
+enum class PatternId : uint8_t {
+#define X(label, rgb, yuv, name) label,
+ PIXPAT_PATTERN_LIST(X)
+#undef X
+ Unknown,
+};
+
+struct PatternEntry {
+ const char* name;
+ PatternId id;
+};
+
+inline constexpr PatternEntry s_pattern_table[] = {
+#define X(label, rgb, yuv, name) { name, PatternId::label },
+ PIXPAT_PATTERN_LIST(X)
+#undef X
+};
+
+inline constexpr size_t s_pattern_catalog_count =
+ sizeof(s_pattern_table) / sizeof(s_pattern_table[0]);
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/pipeline.h b/subprojects/pixpat/pixpat-native/src/pipeline.h
new file mode 100644
index 0000000..09e13bc
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pipeline.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <cstddef>
+
+#include "color.h"
+#include "layout.h"
+
+// Inlined source → color → sink composition. The intermediate Pixel
+// values stay in registers across stages; there is no normalized RGB16
+// or YUV16 buffer between source and sink. Block size is dictated by
+// the sink: 1x1 for non-subsampled formats, h_sub × v_sub for chroma-
+// subsampled ones.
+
+namespace pixpat
+{
+
+template <typename Source, typename Sink>
+struct Converter {
+ using Xfm = ColorXfm<typename Source::Pixel, typename Sink::Pixel>;
+ static constexpr size_t bh = Sink::block_h;
+ static constexpr size_t bw = Sink::block_w;
+
+ static void run(const Buffer<Source::Layout::num_planes>& src,
+ Buffer<Sink::Layout::num_planes>& dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec) noexcept
+ {
+ const ColorCoeffs c = coeffs_for(spec);
+ for (size_t by = by_start; by < by_end; by += bh) {
+ for (size_t bx = 0; bx < W; bx += bw) {
+ typename Sink::Pixel block[bh][bw];
+ for (size_t dy = 0; dy < bh; ++dy)
+ for (size_t dx = 0; dx < bw; ++dx)
+ block[dy][dx] = Xfm::apply(
+ Source::read(src, bx + dx, by + dy,
+ W, H), c);
+ Sink::write_block(dst, bx, by, block);
+ }
+ }
+ }
+};
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/pixpat.cpp b/subprojects/pixpat/pixpat-native/src/pixpat.cpp
new file mode 100644
index 0000000..ac21fac
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pixpat.cpp
@@ -0,0 +1,355 @@
+// pixpat: extern "C" entry points + runtime format dispatch.
+//
+// The format catalog (X-macro PIXPAT_FORMAT_LIST + FormatId enum +
+// s_format_table) is hand-written in format_catalog.h. The generator
+// (pixpat-native/codegen/gen_pixpat.py) reads the same X-macro and
+// the user TOML and emits the per-config bits:
+//
+// pixpat_config.h — PIXPAT_FEATURE_PATTERN / _CONVERT
+// pixpat_caps.inc — s_format_caps[] (per-format readable / writable /
+// hot_src / hot_dst, indexed by FormatId) and
+// s_pattern_caps[] (per-pattern enabled flag).
+//
+// The convert and pattern dispatch (dispatch_convert in
+// pixpat_convert.cpp, dispatch_draw_pattern in pixpat_pattern.cpp) is
+// hand-written and consumes s_format_caps / s_pattern_caps via
+// `if constexpr` on the per-row constexpr fields.
+//
+// s_format_info is built here, once, by re-expanding the catalog
+// X-macro through unpack_for / pack_for / snk_block_h_for /
+// snk_block_w_for. Those constexpr helpers use `if constexpr` on the
+// per-format readable / writable flags from s_format_caps to either
+// take the address of unpack_to_norm / pack_from_norm or fall back to
+// nullptr (or 0). Because they're function templates, the discarded
+// branch is never instantiated, so disabled-direction templates
+// produce no code.
+//
+// Feature gating is meson-side: pixpat_pattern.cpp / pixpat_convert.cpp
+// are added to the source list only when their feature is enabled. This
+// file's entry points always exist; they call the bridge functions
+// dispatch_draw_pattern / dispatch_convert under `if constexpr
+// (kFeatureXxx)`. The discarded if-constexpr branch produces no symbol
+// reference, so when the matching TU is absent the link still succeeds
+// and the entry point returns -1 instead.
+
+#include <pixpat/pixpat.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+
+#include "pixpat_config.h"
+
+#include "color.h"
+#include "error.h"
+#include "format_catalog.h"
+#include "formats.h"
+#include "io.h"
+#include "layout.h"
+#include "params.h"
+#include "pattern.h"
+#include "pixpat_internal.h"
+#include "threading.h"
+
+namespace pixpat
+{
+
+inline constexpr bool kFeaturePattern = PIXPAT_FEATURE_PATTERN;
+inline constexpr bool kFeatureConvert = PIXPAT_FEATURE_CONVERT;
+
+static FormatId lookup_format(const char* name) noexcept
+{
+ if (!name)
+ return FormatId::Unknown;
+ for (const auto& e : s_format_table)
+ if (std::strcmp(e.name, name) == 0)
+ return e.id;
+ return FormatId::Unknown;
+}
+
+// Per-source: fill `bh` rows of normalized pixels by calling Src::read.
+// Address is taken in s_format_info[] for every readable format. When
+// no format is readable (convert disabled) no specialization is
+// instantiated, so this template emits no code.
+template <typename Src>
+static void unpack_to_norm(uint8_t* norm, const pixpat_buffer* src,
+ size_t by, size_t bh, size_t W) noexcept
+{
+ using P = typename Src::Pixel;
+ auto sb = make_buffer<typename Src::Layout>(src);
+ auto* dst = reinterpret_cast<P*>(norm);
+ const size_t H = src->height;
+ for (size_t dy = 0; dy < bh; ++dy)
+ for (size_t x = 0; x < W; ++x)
+ dst[dy * W + x] = Src::read(sb, x, by + dy, W, H);
+}
+
+// Per-sink: re-block `Snk::block_h × W` of normalized pixels and call
+// Sink::write_block. Snk's block_h dictates how many normalized rows
+// the caller has to have prepared. Used by the normalized pivot for
+// both convert (cold path) and pattern.
+template <typename Snk>
+static void pack_from_norm(const pixpat_buffer* dst,
+ const uint8_t* norm,
+ size_t by, size_t W) noexcept
+{
+ using P = typename Snk::Pixel;
+ constexpr size_t bh = Snk::block_h;
+ constexpr size_t bw = Snk::block_w;
+ auto db = make_buffer<typename Snk::Layout>(dst);
+ auto* src = reinterpret_cast<const P*>(norm);
+ for (size_t bx = 0; bx < W; bx += bw) {
+ P block[bh][bw];
+ for (size_t dy = 0; dy < bh; ++dy)
+ for (size_t dx = 0; dx < bw; ++dx)
+ block[dy][dx] = src[dy * W + bx + dx];
+ Snk::write_block(db, bx, by, block);
+ }
+}
+
+// Generated: s_format_caps[] indexed by FormatId, plus s_pattern_* /
+// DefaultPattern (used only by pixpat_pattern.cpp; harmless here).
+#include "pixpat_caps.inc"
+
+static_assert(sizeof(s_format_caps) / sizeof(s_format_caps[0]) == s_format_catalog_count,
+ "s_format_caps must cover the full catalog");
+
+// `if constexpr` keeps disabled-direction function-template bodies
+// uninstantiated. Taking `&unpack_to_norm<Src>` / `&pack_from_norm<Snk>`
+// forces the function body to be emitted; without the gate every
+// catalog format would carry unpack and pack code regardless of its
+// readable / writable bit. Snk::block_h / Snk::block_w are constexpr
+// scalars — no body, no emission — so they're inlined directly in the
+// initializer below, without a helper.
+template <bool Read, typename Src>
+static constexpr UnpackFn unpack_for() noexcept
+{
+ if constexpr (Read)
+ return &unpack_to_norm<Src>;
+ else
+ return nullptr;
+}
+
+template <bool Write, typename Snk>
+static constexpr PackFn pack_for() noexcept
+{
+ if constexpr (Write)
+ return &pack_from_norm<Snk>;
+ else
+ return nullptr;
+}
+
+const FormatInfo s_format_info[] = {
+#define CAPS(name) s_format_caps[size_t(FormatId::name)]
+#define X(name) \
+ { \
+ unpack_for<CAPS(name).readable, formats::name::Source>(), \
+ pack_for<CAPS(name).writable, formats::name::Sink>(), \
+ formats::name::kind, \
+ uint8_t(formats::name::h_sub), \
+ uint8_t(formats::name::v_sub), \
+ uint8_t(formats::name::Sink::block_h), \
+ uint8_t(formats::name::Sink::block_w), \
+ },
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+#undef CAPS
+};
+static_assert(sizeof(s_format_info) / sizeof(s_format_info[0]) == s_format_catalog_count,
+ "s_format_info must cover the full catalog");
+
+// validate_* / parse_spec are only reached from inside the entry points'
+// `if constexpr (kFeatureXxx)` true branches. With a feature disabled,
+// its caller's branch is discarded and the helper becomes unreferenced;
+// require_readable is convert-only. [[maybe_unused]] keeps
+// -Wunused-function (and clang's -Wunneeded-internal-declaration) quiet.
+[[maybe_unused]] static void validate_buffer(const pixpat_buffer* b)
+{
+ if (!b)
+ throw invalid_argument("null buffer");
+ if (b->width == 0 || b->height == 0)
+ throw invalid_argument("zero-sized buffer");
+}
+
+[[maybe_unused]] static FormatId validate_format(const char* name)
+{
+ auto id = lookup_format(name);
+ if (id == FormatId::Unknown)
+ throw invalid_argument("unknown format");
+ return id;
+}
+
+[[maybe_unused]] static void require_writable(FormatId id)
+{
+ if (s_format_info[size_t(id)].pack == nullptr)
+ throw invalid_argument("format not enabled as a sink in this build");
+}
+
+[[maybe_unused]] static void require_readable(FormatId id)
+{
+ if (s_format_info[size_t(id)].unpack == nullptr)
+ throw invalid_argument("format not enabled as a source in this build");
+}
+
+[[maybe_unused]] static unsigned validate_thread_count(int n)
+{
+ if (n < 0)
+ throw invalid_argument("negative num_threads");
+ return n > 0 ? static_cast<unsigned>(n) : default_thread_count();
+}
+
+// Map the C-side pixpat_rec / pixpat_range enums (defined in
+// pixpat.h with explicit values 0/1/2 for rec, 0/1 for range) onto
+// the internal pixpat::Rec / pixpat::Range. Out-of-range values fall
+// back to BT.601 / Limited — matching the zero-initialised opts
+// struct and kDefaultColorSpec.
+[[maybe_unused]] static ColorSpec parse_spec(int rec_in, int range_in) noexcept
+{
+ Rec rec;
+ switch (rec_in) {
+ case PIXPAT_REC_BT709: rec = Rec::BT709; break;
+ case PIXPAT_REC_BT2020: rec = Rec::BT2020; break;
+ default: rec = Rec::BT601; break;
+ }
+ Range range = (range_in == PIXPAT_RANGE_FULL) ? Range::Full : Range::Limited;
+ return ColorSpec{ rec, range };
+}
+
+} // namespace pixpat
+
+// Marks the C entry points as part of the public ABI: restores default
+// visibility against the build-wide -fvisibility=hidden, so they are
+// exported from libpixpat.so.
+#define PIXPAT_API __attribute__((visibility("default")))
+
+extern "C" {
+
+PIXPAT_API int pixpat_draw_pattern(const pixpat_buffer* dst,
+ const char* pattern,
+ const pixpat_pattern_opts* opts)
+{
+ if constexpr (pixpat::kFeaturePattern) {
+ try {
+ pixpat::validate_buffer(dst);
+ auto id = pixpat::validate_format(dst->format);
+ pixpat::require_writable(id);
+ const auto& di = pixpat::s_format_info[size_t(id)];
+ if (dst->width % di.snk_block_w != 0 ||
+ dst->height % di.snk_block_h != 0)
+ throw pixpat::invalid_argument(
+ "dimensions not aligned to format block");
+ const unsigned n_threads = opts
+ ? pixpat::validate_thread_count(opts->num_threads)
+ : pixpat::default_thread_count();
+ const pixpat::ColorSpec spec = opts
+ ? pixpat::parse_spec(opts->rec, opts->range)
+ : pixpat::kDefaultColorSpec;
+ const pixpat::Params params(opts ? opts->params : nullptr);
+ if (!params.ok())
+ throw pixpat::invalid_argument("malformed opts->params");
+
+ pixpat::run_stripes(dst->height, di.snk_block_h, n_threads,
+ [&](size_t y0, size_t y1) {
+ pixpat::dispatch_draw_pattern(
+ id, pattern, params, dst,
+ dst->width, dst->height, y0, y1, spec);
+ });
+ return 0;
+ } catch (const std::exception&) {
+ return -1;
+ }
+ } else {
+ (void)dst;
+ (void)pattern;
+ (void)opts;
+ return -1;
+ }
+}
+
+PIXPAT_API int pixpat_convert(const pixpat_buffer* dst,
+ const pixpat_buffer* src,
+ const pixpat_convert_opts* opts)
+{
+ if constexpr (pixpat::kFeatureConvert) {
+ try {
+ pixpat::validate_buffer(dst);
+ pixpat::validate_buffer(src);
+ if (src->width != dst->width || src->height != dst->height)
+ throw pixpat::invalid_argument("src/dst dimensions differ");
+
+ auto src_id = pixpat::validate_format(src->format);
+ auto dst_id = pixpat::validate_format(dst->format);
+ pixpat::require_readable(src_id);
+ pixpat::require_writable(dst_id);
+
+ const auto& si = pixpat::s_format_info[size_t(src_id)];
+ const auto& di = pixpat::s_format_info[size_t(dst_id)];
+ // Each constraint must hold independently — checking only
+ // max() would miss e.g. h_sub=2 vs snk_block_w=3 with W=3.
+ if (src->width % si.h_sub != 0 || src->height % si.v_sub != 0 ||
+ src->width % di.h_sub != 0 || src->height % di.v_sub != 0 ||
+ src->width % di.snk_block_w != 0 || src->height % di.snk_block_h != 0)
+ throw pixpat::invalid_argument(
+ "dimensions not aligned to format subsampling");
+ // run_stripes only needs the v dimension. Stripes must align
+ // to si.v_sub (source reads) and di.snk_block_h (sink block
+ // loop); for pixpat's catalog these are powers-of-two and
+ // max == LCM.
+ const unsigned vs = std::max({ unsigned(si.v_sub),
+ unsigned(di.v_sub),
+ unsigned(di.snk_block_h) });
+ const unsigned n_threads = opts
+ ? pixpat::validate_thread_count(opts->num_threads)
+ : pixpat::default_thread_count();
+ const pixpat::ColorSpec spec = opts
+ ? pixpat::parse_spec(opts->rec, opts->range)
+ : pixpat::kDefaultColorSpec;
+
+ pixpat::run_stripes(src->height, vs, n_threads,
+ [&](size_t y0, size_t y1) {
+ pixpat::dispatch_convert(src_id, dst_id, src, dst,
+ src->width, src->height,
+ y0, y1, spec);
+ });
+ return 0;
+ } catch (const std::exception&) {
+ return -1;
+ }
+ } else {
+ (void)dst;
+ (void)src;
+ (void)opts;
+ return -1;
+ }
+}
+
+PIXPAT_API int pixpat_format_supported(const char* format)
+{
+ auto id = pixpat::lookup_format(format);
+ if (id == pixpat::FormatId::Unknown)
+ return 0;
+ return pixpat::s_format_caps[size_t(id)].enabled() ? 1 : 0;
+}
+
+PIXPAT_API size_t pixpat_format_count(void)
+{
+ size_t n = 0;
+ for (const auto& c : pixpat::s_format_caps)
+ if (c.enabled())
+ ++n;
+ return n;
+}
+
+PIXPAT_API const char* pixpat_format_name(size_t idx)
+{
+ size_t n = 0;
+ for (size_t i = 0; i < pixpat::s_format_catalog_count; ++i) {
+ if (!pixpat::s_format_caps[i].enabled())
+ continue;
+ if (n++ == idx)
+ return pixpat::s_format_table[i].name;
+ }
+ return nullptr;
+}
+
+} // extern "C"
diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp b/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp
new file mode 100644
index 0000000..63461d8
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp
@@ -0,0 +1,201 @@
+// Convert-feature TU: built only when PIXPAT_FEATURE_CONVERT is on
+// (controlled by the meson source list). pixpat.cpp's pixpat_convert
+// entry calls into dispatch_convert() below via if-constexpr; when the
+// feature is off this file isn't compiled, the discarded if-constexpr
+// branch emits no symbol reference, and the .so simply lacks these
+// symbols.
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "color.h"
+#include "error.h"
+#include "format_catalog.h"
+#include "formats.h"
+#include "io.h"
+#include "layout.h"
+#include "pattern.h"
+#include "pipeline.h"
+#include "pixpat_internal.h"
+
+namespace pixpat
+{
+
+template <typename Src, typename Snk>
+static void run_convert_impl(const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ using SL = typename Src::Layout;
+ using DL = typename Snk::Layout;
+ // Entry point (pixpat_convert) validates W/H against each layout's
+ // h_sub / v_sub, plus the sink's block dims.
+ assert(W % SL::h_sub == 0 && W % DL::h_sub == 0);
+ assert(H % SL::v_sub == 0 && H % DL::v_sub == 0);
+
+ auto sb = make_buffer<SL>(src);
+ auto db = make_buffer<DL>(dst);
+ Converter<Src, Snk>::run(sb, db, W, H, by_start, by_end, spec);
+}
+
+static void run_norm(FormatId src_id, FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ const auto& si = s_format_info[size_t(src_id)];
+ const auto& di = s_format_info[size_t(dst_id)];
+
+ const size_t bh = di.snk_block_h;
+ // Entry point (pixpat_convert) guarantees W/H alignment to each
+ // of si.h_sub / si.v_sub and di.snk_block_w / di.snk_block_h.
+ assert(W % si.h_sub == 0 && W % di.snk_block_w == 0);
+ assert(H % si.v_sub == 0 && H % bh == 0);
+
+ // Per-thread normalized line buffer. RGB16 and YUV16 are both 8
+ // bytes, so one allocation works for both. thread_local gives each
+ // worker its own buffer when called from run_stripes.
+ thread_local std::vector<uint8_t> norm;
+ norm.resize(bh * W * sizeof(RGB16));
+
+ const ColorCoeffs c = coeffs_for(spec);
+ for (size_t by = by_start; by < by_end; by += bh) {
+ si.unpack(norm.data(), src, by, bh, W);
+ if (si.kind != di.kind) {
+ const size_t n = bh * W;
+ if (si.kind == ColorKind::RGB)
+ norm_rgb_to_yuv(norm.data(), n, c);
+ else
+ norm_yuv_to_rgb(norm.data(), n, c);
+ }
+ di.pack(dst, norm.data(), by, W);
+ }
+}
+
+// Generated: FormatCaps + s_format_caps[] (per-format readable/writable
+// + hot_src/hot_dst), plus s_pattern_* / DefaultPattern.
+#include "pixpat_caps.inc"
+
+// Per-Src dispatch: pick the right Sink for `dst_id` and call
+// run_convert_impl. The X-macro emits one case per catalog format;
+// `if constexpr (...writable)` discards the body for non-writable
+// formats — those cases fall to the trailing throw.
+template <typename Src>
+static void dispatch_dst_convert(FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ switch (dst_id) {
+#define CAPS(name) s_format_caps[size_t(FormatId::name)]
+#define X(name) \
+ case FormatId::name: \
+ if constexpr (CAPS(name).writable) { \
+ run_convert_impl<Src, formats::name::Sink>( \
+ src, dst, W, H, by_start, by_end, spec); \
+ return; \
+ } \
+ break;
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+#undef CAPS
+ default:
+ break;
+ }
+ throw invalid_argument("destination format not enabled in this build");
+}
+
+// Per-Snk dispatch: mirror of dispatch_dst_convert.
+template <typename Snk>
+static void dispatch_src_convert(FormatId src_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ switch (src_id) {
+#define CAPS(name) s_format_caps[size_t(FormatId::name)]
+#define X(name) \
+ case FormatId::name: \
+ if constexpr (CAPS(name).readable) { \
+ run_convert_impl<formats::name::Source, Snk>( \
+ src, dst, W, H, by_start, by_end, spec); \
+ return; \
+ } \
+ break;
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+#undef CAPS
+ default:
+ break;
+ }
+ throw invalid_argument("source format not enabled in this build");
+}
+
+// Hot-pivot probes. The wrapper has to be a template so that the
+// discarded `if constexpr` branch is not instantiated — otherwise
+// dispatch_dst_convert<formats::X::Source> would be instantiated for
+// every catalog format, not just hot pivots.
+template <bool HotSrc, FormatId Id, typename Source>
+static bool try_hot_src(FormatId src_id, FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ if constexpr (HotSrc) {
+ if (src_id == Id) {
+ dispatch_dst_convert<Source>(
+ dst_id, src, dst, W, H, by_start, by_end, spec);
+ return true;
+ }
+ }
+ return false;
+}
+
+template <bool HotDst, FormatId Id, typename Sink>
+static bool try_hot_dst(FormatId src_id, FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ if constexpr (HotDst) {
+ if (dst_id == Id) {
+ dispatch_src_convert<Sink>(
+ src_id, src, dst, W, H, by_start, by_end, spec);
+ return true;
+ }
+ }
+ return false;
+}
+
+void dispatch_convert(FormatId src_id, FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+#define CAPS(name) s_format_caps[size_t(FormatId::name)]
+#define X(name) \
+ if (try_hot_src<CAPS(name).hot_src, FormatId::name, \
+ formats::name::Source>( \
+ src_id, dst_id, src, dst, W, H, by_start, by_end, spec)) \
+ return; \
+ if (try_hot_dst<CAPS(name).hot_dst, FormatId::name, \
+ formats::name::Sink>( \
+ src_id, dst_id, src, dst, W, H, by_start, by_end, spec)) \
+ return;
+ PIXPAT_FORMAT_LIST(X)
+#undef X
+#undef CAPS
+
+ run_norm(src_id, dst_id, src, dst, W, H, by_start, by_end, spec);
+}
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_internal.h b/subprojects/pixpat/pixpat-native/src/pixpat_internal.h
new file mode 100644
index 0000000..50d3405
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pixpat_internal.h
@@ -0,0 +1,89 @@
+#pragma once
+
+// Internal interface shared between the always-built pixpat.cpp and the
+// optional pixpat_pattern.cpp / pixpat_convert.cpp TUs. The feature
+// gate is meson-side: pixpat_pattern.cpp is in the source list iff
+// PIXPAT_FEATURE_PATTERN, and likewise for convert. The bridge
+// declarations below are unconditional; pixpat.cpp's entry points call
+// them inside `if constexpr (kFeatureXxx)`, and the discarded branch
+// emits no symbol reference, so absent definitions don't cause link
+// failures.
+
+#include <cstddef>
+#include <cstdint>
+
+#include <pixpat/pixpat.h>
+
+#include "color.h"
+#include "format_catalog.h"
+#include "layout.h"
+#include "pattern_catalog.h"
+
+namespace pixpat
+{
+
+template <typename Layout>
+inline Buffer<Layout::num_planes> make_buffer(const pixpat_buffer* b) noexcept
+{
+ Buffer<Layout::num_planes> out{};
+ for (size_t i = 0; i < Layout::num_planes; ++i) {
+ out.data[i] = static_cast<uint8_t*>(b->planes[i]);
+ out.stride[i] = b->strides[i];
+ }
+ return out;
+}
+
+using UnpackFn = void (*)(uint8_t*, const pixpat_buffer*, size_t, size_t, size_t);
+using PackFn = void (*)(const pixpat_buffer*, const uint8_t*, size_t, size_t);
+
+struct FormatInfo {
+ UnpackFn unpack;
+ PackFn pack;
+ ColorKind kind;
+ uint8_t h_sub;
+ uint8_t v_sub;
+ uint8_t snk_block_h;
+ uint8_t snk_block_w;
+};
+
+extern const FormatInfo s_format_info[];
+
+// Per-format build capabilities. Defined once per build by the
+// generator into s_format_caps[] (in pixpat_caps.inc); the schema is
+// here so that file is pure data.
+struct FormatCaps {
+ bool readable;
+ bool writable;
+ bool hot_src;
+ bool hot_dst;
+
+ constexpr bool enabled() const noexcept
+ {
+ return readable || writable;
+ }
+};
+
+// Per-pattern build capabilities. Generator emits s_pattern_caps[]
+// indexed by PatternId, plus a separate s_default_pattern_id singleton
+// (the fallback when pattern_name doesn't match any enabled arm).
+// Used only when PIXPAT_FEATURE_PATTERN — pixpat_pattern.cpp consumes
+// both.
+struct PatternCaps {
+ bool enabled;
+};
+
+class Params;
+
+// Bridge into pixpat_pattern.cpp (defined there iff PIXPAT_FEATURE_PATTERN).
+void dispatch_draw_pattern(FormatId id, const char* pattern_name,
+ const Params& params,
+ const pixpat_buffer* dst, size_t W, size_t H,
+ size_t by_start, size_t by_end, ColorSpec spec);
+
+// Bridge into pixpat_convert.cpp (defined there iff PIXPAT_FEATURE_CONVERT).
+void dispatch_convert(FormatId src_id, FormatId dst_id,
+ const pixpat_buffer* src, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end, ColorSpec spec);
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp b/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp
new file mode 100644
index 0000000..e8ac780
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp
@@ -0,0 +1,168 @@
+// Pattern-feature TU: built only when PIXPAT_FEATURE_PATTERN is on
+// (controlled by the meson source list). pixpat.cpp's pixpat_draw_pattern
+// entry calls into dispatch_draw_pattern() below via if-constexpr; when
+// the feature is off this file isn't compiled, the discarded if-constexpr
+// branch emits no symbol reference, and the .so simply lacks these
+// symbols.
+
+#include <cassert>
+#include <cstdint>
+#include <string_view>
+#include <type_traits>
+#include <vector>
+
+#include "color.h"
+#include "error.h"
+#include "params.h"
+#include "pattern.h"
+#include "pattern_catalog.h"
+#include "pipeline.h"
+#include "pixpat_internal.h"
+
+namespace pixpat
+{
+
+// Generated: s_pattern_* enable flags + DefaultPattern alias. Included
+// inside namespace pixpat so the unqualified FormatId / s_format_catalog_count
+// references resolve.
+#include "pixpat_caps.inc"
+
+// Cold pattern path: fill a per-thread normalized line buffer with
+// Pattern samples in the pattern's native color kind, run a cross-
+// color-kind pass over the buffer if the sink wants the other kind,
+// then hand the buffer to the destination's per-format pack via
+// s_format_info. Same shape as run_norm in pixpat_convert.cpp.
+template <typename Pattern>
+static void run_pattern_norm(const Pattern& pat,
+ FormatId dst_id, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ using P = typename Pattern::Pixel;
+ constexpr bool pat_is_rgb = std::is_same_v<P, RGB16>;
+
+ const auto& di = s_format_info[size_t(dst_id)];
+ const size_t bh = di.snk_block_h;
+ // Entry point (pixpat_draw_pattern) validates W%bw / H%bh.
+ assert(W % di.snk_block_w == 0 && H % bh == 0);
+
+ thread_local std::vector<uint8_t> norm;
+ norm.resize(bh * W * sizeof(RGB16)); // RGB16 / YUV16 same size
+
+ const ColorCoeffs c = coeffs_for(spec);
+ const bool need_xfm = (pat_is_rgb && di.kind == ColorKind::YUV) ||
+ (!pat_is_rgb && di.kind == ColorKind::RGB);
+
+ for (size_t by = by_start; by < by_end; by += bh) {
+ auto* px = reinterpret_cast<P*>(norm.data());
+ for (size_t dy = 0; dy < bh; ++dy)
+ for (size_t x = 0; x < W; ++x)
+ px[dy * W + x] = pat.sample(x, by + dy, W, H);
+ if (need_xfm) {
+ const size_t n = bh * W;
+ if constexpr (pat_is_rgb)
+ norm_rgb_to_yuv(norm.data(), n, c);
+ else
+ norm_yuv_to_rgb(norm.data(), n, c);
+ }
+ di.pack(dst, norm.data(), by, W);
+ }
+}
+
+// Construct, ready-check, and run a pattern. Patterns whose colors
+// depend on the call's ColorSpec (e.g. native-YUV bar variants) opt
+// in by exposing a (Params, ColorSpec) constructor; the rest take
+// Params only and stay unchanged.
+template <typename Pattern>
+static void run_one_pattern(const Params& params,
+ FormatId id, const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ auto pat = [&] {
+ if constexpr (std::is_constructible_v<
+ Pattern, const Params&, ColorSpec>)
+ return Pattern(params, spec);
+ else
+ return Pattern(params);
+ }();
+ if constexpr (requires { pat.ready(); }) {
+ if (!pat.ready())
+ throw invalid_argument("pattern parameters not accepted");
+ }
+ run_pattern_norm(pat, id, dst, W, H, by_start, by_end, spec);
+}
+
+// Per-pattern dispatch arm. Templated on the catalog row's RGB and
+// YUV variants (either may be `void` if the pattern has no variant
+// in that kind). When both are present, the sink kind picks the
+// matching variant so the cross-kind pass is a no-op; when only one
+// is present, the pipeline runs the cross-kind pass for opposite-
+// kind sinks.
+//
+// Wrapping in a templated helper is what keeps the binary size down:
+// `if constexpr (Enabled = false)` discards the run_pattern_norm
+// reference, and because try_pattern is itself a template, the
+// discarded branch is *not instantiated* — so disabled patterns
+// emit no code, and the `void` arms of partial patterns never
+// instantiate `Pattern::Pixel` or run_pattern_norm<void>.
+template <bool Enabled, typename Rgb, typename Yuv>
+static bool try_pattern(std::string_view name, std::string_view want,
+ const Params& params,
+ FormatId id, ColorKind sink_kind,
+ const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ if constexpr (Enabled) {
+ if (name == want) {
+ constexpr bool has_rgb = !std::is_void_v<Rgb>;
+ constexpr bool has_yuv = !std::is_void_v<Yuv>;
+ static_assert(has_rgb || has_yuv,
+ "pattern needs at least one variant");
+ if constexpr (has_rgb && has_yuv) {
+ if (sink_kind == ColorKind::YUV)
+ run_one_pattern<Yuv>(params, id, dst, W, H,
+ by_start, by_end, spec);
+ else
+ run_one_pattern<Rgb>(params, id, dst, W, H,
+ by_start, by_end, spec);
+ } else if constexpr (has_rgb) {
+ run_one_pattern<Rgb>(params, id, dst, W, H,
+ by_start, by_end, spec);
+ } else {
+ run_one_pattern<Yuv>(params, id, dst, W, H,
+ by_start, by_end, spec);
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+void dispatch_draw_pattern(FormatId id, const char* pattern_name,
+ const Params& params,
+ const pixpat_buffer* dst,
+ size_t W, size_t H,
+ size_t by_start, size_t by_end,
+ ColorSpec spec)
+{
+ using namespace patterns;
+ // NULL pattern_name selects the default ("kmstest"); see pixpat.h.
+ const std::string_view name = pattern_name ? pattern_name : "kmstest";
+ const ColorKind kind = s_format_info[size_t(id)].kind;
+
+#define X(label, rgb, yuv, str) \
+ if (try_pattern<s_pattern_caps[size_t(PatternId::label)].enabled, rgb, yuv>( \
+ name, str, params, id, kind, dst, W, H, by_start, by_end, spec)) \
+ return;
+ PIXPAT_PATTERN_LIST(X)
+#undef X
+
+ throw invalid_argument("unknown or disabled pattern name");
+}
+
+} // namespace pixpat
diff --git a/subprojects/pixpat/pixpat-native/src/threading.h b/subprojects/pixpat/pixpat-native/src/threading.h
new file mode 100644
index 0000000..5e7fc01
--- /dev/null
+++ b/subprojects/pixpat/pixpat-native/src/threading.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <unistd.h>
+
+#include <cassert>
+#include <cstddef>
+#include <exception>
+#include <thread>
+#include <vector>
+
+namespace pixpat
+{
+
+inline unsigned default_thread_count()
+{
+ long n = sysconf(_SC_NPROCESSORS_ONLN);
+ if (n < 1)
+ return 1;
+ // Cap to keep per-stripe work meaningful and avoid heavy
+ // oversubscription on large NUMA hosts.
+ if (n > 16)
+ n = 16;
+ return static_cast<unsigned>(n);
+}
+
+/*
+ * Run `fn(start_y, end_y)` over `[0, height)` partitioned into stripes
+ * aligned to `v_sub`. Half-open ranges, matching the `for (by = 0;
+ * by < H; by += bh)` block-loop style.
+ *
+ * `fn` must be callable as `void(size_t start_y, size_t end_y)` and is
+ * invoked concurrently from multiple threads — it must be safe to call
+ * with disjoint Y-ranges in parallel. Exceptions thrown from a worker
+ * are captured and the first (by stripe index) is rethrown after all
+ * workers join.
+ *
+ * When `n_threads <= 1`, `fn` is called inline on the calling thread —
+ * no `std::thread` is spawned, no allocation occurs.
+ */
+template<typename F>
+void run_stripes(size_t height, unsigned v_sub, unsigned n_threads, F&& fn)
+{
+ if (height == 0 || v_sub == 0)
+ return;
+
+ // Callers (pixpat_convert / pixpat_draw_pattern) validate divisibility
+ // at the entry point.
+ assert(height % v_sub == 0);
+
+ const size_t max_useful = height / v_sub;
+ if (n_threads == 0)
+ n_threads = 1;
+ if (static_cast<size_t>(n_threads) > max_useful)
+ n_threads = static_cast<unsigned>(max_useful);
+
+ if (n_threads <= 1) {
+ fn(size_t{ 0 }, height);
+ return;
+ }
+
+ // Stripe height rounded up to v_sub; last stripe absorbs the
+ // remainder.
+ size_t part_height = (height + n_threads - 1) / n_threads;
+ part_height = (part_height + v_sub - 1) / v_sub * v_sub;
+
+ std::vector<std::exception_ptr> errors(n_threads);
+ std::vector<std::thread> workers;
+ workers.reserve(n_threads);
+
+ for (unsigned i = 0; i < n_threads; i++) {
+ size_t start = i * part_height;
+ if (start >= height)
+ break;
+ size_t end = start + part_height;
+ if (i == n_threads - 1 || end > height)
+ end = height;
+
+ workers.emplace_back([&, i, start, end] {
+ try {
+ fn(start, end);
+ } catch (...) {
+ errors[i] = std::current_exception();
+ }
+ });
+ }
+
+ for (auto& t : workers)
+ t.join();
+
+ for (auto& e : errors)
+ if (e)
+ std::rethrow_exception(e);
+}
+
+} // namespace pixpat