diff options
Diffstat (limited to 'subprojects/pixpat/pixpat-native/src')
31 files changed, 4544 insertions, 0 deletions
diff --git a/subprojects/pixpat/pixpat-native/src/color.h b/subprojects/pixpat/pixpat-native/src/color.h new file mode 100644 index 0000000..16dfb7d --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/color.h @@ -0,0 +1,199 @@ +#pragma once + +#include <algorithm> +#include <cstdint> +#include <cstring> + +#include "layout.h" + +namespace pixpat +{ + +// BT.601 / BT.709 / BT.2020 × Limited / Full range, dispatched at +// runtime via a small `ColorCoeffs` struct that the caller hoists out +// of the per-pixel loop. The convert and pattern entry points compute +// `coeffs_for(spec)` once before the stripe loop, then pass the +// resulting struct into every `ColorXfm::apply()` in the inner loop. +// This avoids per-pixel matrix branching and also the alternative of +// a 6×-instantiated template (would push the hot pivot from 121 to +// 726 `Converter` bodies). The coefficient values are loop-invariant +// broadcast scalars, so the compiler vectorizes the inner loop with +// vbroadcastss + vmulps in place of constant folds. +// +// Math runs in float. + +enum class Rec : uint8_t { BT601, BT709, BT2020 }; +enum class Range : uint8_t { Limited, Full }; + +struct ColorSpec { + Rec rec; + Range range; + constexpr bool operator==(const ColorSpec&) const = default; +}; + +inline constexpr ColorSpec kDefaultColorSpec{ Rec::BT601, Range::Limited }; + +struct ColorCoeffs { + // RGB->YUV + float kr, kg, kb; + float y_scale, y_offset; + float c_scale, c_offset; + float u_factor, v_factor; + // YUV->RGB + float y_inv, c_inv; + float gu, gv, ru, bv; + // normalized 16-bit scale (kNormMax in float, plus its inverse) + float norm_scale, norm_inv_scale; +}; + +namespace detail +{ +constexpr ColorCoeffs make_coeffs(float kr, float kg, float kb, bool full) noexcept +{ + const float y_min = full ? 0.0f : 16.0f / 255.0f; + const float y_max = full ? 1.0f : 235.0f / 255.0f; + const float c_min = full ? 0.0f : 16.0f / 255.0f; + const float c_max = full ? 1.0f : 240.0f / 255.0f; + + const float y_scale = y_max - y_min; + const float y_offset = y_min; + const float c_scale = c_max - c_min; + const float c_offset = (c_max + c_min) * 0.5f; + + const float u_factor = 1.0f / (2.0f * (1.0f - kb)); + const float v_factor = 1.0f / (2.0f * (1.0f - kr)); + const float y_inv = 1.0f / y_scale; + const float c_inv = 1.0f / c_scale; + const float gu = -2.0f * (1.0f - kb) * kb / kg; + const float gv = -2.0f * (1.0f - kr) * kr / kg; + const float ru = 2.0f * (1.0f - kr); + const float bv = 2.0f * (1.0f - kb); + + const float norm_scale = float(kNormMax); + const float norm_inv_scale = 1.0f / norm_scale; + + return ColorCoeffs{ + kr, kg, kb, + y_scale, y_offset, + c_scale, c_offset, + u_factor, v_factor, + y_inv, c_inv, + gu, gv, ru, bv, + norm_scale, norm_inv_scale, + }; +} +} // namespace detail + +constexpr ColorCoeffs coeffs_for(ColorSpec spec) noexcept +{ + const bool full = spec.range == Range::Full; + switch (spec.rec) { + case Rec::BT601: return detail::make_coeffs(0.299f, 0.587f, 0.114f, full); + case Rec::BT2020: return detail::make_coeffs(0.2627f, 0.6780f, 0.0593f, full); + default: return detail::make_coeffs(0.2126f, 0.7152f, 0.0722f, full); + } +} + +template <typename SrcPix, typename DstPix> +struct ColorXfm; + +template <> +struct ColorXfm<RGB16, RGB16> { + static constexpr RGB16 apply(RGB16 p) noexcept { + return p; + } + static constexpr RGB16 apply(RGB16 p, const ColorCoeffs&) noexcept { + return p; + } +}; + +template <> +struct ColorXfm<YUV16, YUV16> { + static constexpr YUV16 apply(YUV16 p) noexcept { + return p; + } + static constexpr YUV16 apply(YUV16 p, const ColorCoeffs&) noexcept { + return p; + } +}; + +// Cross-color-kind conversions reset `a` to kNormMax (sinks with X +// write 0; sinks with A see fully opaque pixels). Within the same +// color kind, identity ColorXfm propagates `a` unchanged. +template <> +struct ColorXfm<RGB16, YUV16> { + static YUV16 apply(RGB16 rgb, const ColorCoeffs& c) noexcept + { + const float r = float(rgb.r) * c.norm_inv_scale; + const float g = float(rgb.g) * c.norm_inv_scale; + const float b = float(rgb.b) * c.norm_inv_scale; + + const float yp = c.kr * r + c.kg * g + c.kb * b; + const float u = (b - yp) * c.u_factor; + const float v = (r - yp) * c.v_factor; + + // No clamp on RGB→YUV: for any uint16_t (RGB) input the + // output Y/U/V is structurally in [0, 1] (limited-range + // chroma stays within [c_min, c_max] ⊂ [0, 1]). The +0.5 + // rounds half-up before the integer cast. + return YUV16{ + uint16_t((yp * c.y_scale + c.y_offset) * c.norm_scale + 0.5f), + uint16_t((u * c.c_scale + c.c_offset) * c.norm_scale + 0.5f), + uint16_t((v * c.c_scale + c.c_offset) * c.norm_scale + 0.5f), + kNormMax, + }; + } +}; + +template <> +struct ColorXfm<YUV16, RGB16> { + static RGB16 apply(YUV16 yuv, const ColorCoeffs& c) noexcept + { + const float yp = (float(yuv.y) * c.norm_inv_scale - c.y_offset) * c.y_inv; + const float u = (float(yuv.u) * c.norm_inv_scale - c.c_offset) * c.c_inv; + const float v = (float(yuv.v) * c.norm_inv_scale - c.c_offset) * c.c_inv; + + const float r = yp + c.ru * v; + const float g = yp + c.gu * u + c.gv * v; + const float b = yp + c.bv * u; + + // Clamp on YUV→RGB: the inverse matrix produces out-of-range + // RGB for some valid YUV inputs. Written as min/max so it + // vectorizes to vminps/vmaxps; std::clamp can defeat that. + auto pack = [&](float x) -> uint16_t { + x = x * c.norm_scale + 0.5f; + x = std::min(std::max(x, 0.0f), c.norm_scale); + return uint16_t(x); + }; + + return RGB16{ + pack(r), pack(g), pack(b), + kNormMax, + }; + } +}; + +// In-place cross-color-kind passes over a normalized line buffer. +// RGB16 and YUV16 are both 4 uint16_t with identical layout, so we +// can memcpy through the same buffer pixel-by-pixel without aliasing. +inline void norm_rgb_to_yuv(uint8_t* buf, size_t n, const ColorCoeffs& c) noexcept +{ + for (size_t i = 0; i < n; ++i) { + RGB16 rgb; + std::memcpy(&rgb, buf + i * sizeof(RGB16), sizeof(RGB16)); + YUV16 yuv = ColorXfm<RGB16, YUV16>::apply(rgb, c); + std::memcpy(buf + i * sizeof(YUV16), &yuv, sizeof(YUV16)); + } +} + +inline void norm_yuv_to_rgb(uint8_t* buf, size_t n, const ColorCoeffs& c) noexcept +{ + for (size_t i = 0; i < n; ++i) { + YUV16 yuv; + std::memcpy(&yuv, buf + i * sizeof(YUV16), sizeof(YUV16)); + RGB16 rgb = ColorXfm<YUV16, RGB16>::apply(yuv, c); + std::memcpy(buf + i * sizeof(RGB16), &rgb, sizeof(RGB16)); + } +} + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/error.h b/subprojects/pixpat/pixpat-native/src/error.h new file mode 100644 index 0000000..83a3596 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/error.h @@ -0,0 +1,16 @@ +#pragma once + +#include <stdexcept> + +namespace pixpat +{ + +struct error : std::runtime_error { + using std::runtime_error::runtime_error; +}; + +struct invalid_argument : error { + using error::error; +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/format_catalog.h b/subprojects/pixpat/pixpat-native/src/format_catalog.h new file mode 100644 index 0000000..287d773 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/format_catalog.h @@ -0,0 +1,140 @@ +#pragma once + +// Catalog of every pixel format the C++ side handles. The X-macro is a +// flat list of names: +// +// X(name) +// +// `name` is the canonical format identifier — both an internal FormatId +// enum entry and the public string accepted by pixpat_buffer::format. +// Each name resolves to a struct in `pixpat::formats::` (defined under +// pixpat-native/src/formats/) that carries: +// +// - the layout (subsampling, planes, components) +// - nested `Source` / `Sink` aliases for the matching I/O templates +// +// Adding a format = a row here AND a struct in the right +// pixpat-native/src/formats/*.h. The codegen +// (pixpat-native/codegen/gen_pixpat.py) parses this X-macro to learn +// the format set; pixpat.cpp re-expands it to build s_format_info via +// `formats::name::Source` / `formats::name::Sink`. +// +// FormatId is internal — the public C ABI deals in format names only. + +#include <cstddef> + +namespace pixpat +{ + +#define PIXPAT_FORMAT_LIST(X) \ + X(XRGB8888) \ + X(ARGB8888) \ + X(XBGR8888) \ + X(ABGR8888) \ + X(RGBX8888) \ + X(RGBA8888) \ + X(BGRX8888) \ + X(BGRA8888) \ + X(RGB888) \ + X(BGR888) \ + X(RGB332) \ + X(RGB565) \ + X(BGR565) \ + X(XRGB1555) \ + X(ARGB1555) \ + X(XBGR1555) \ + X(ABGR1555) \ + X(XRGB4444) \ + X(ARGB4444) \ + X(XBGR4444) \ + X(ABGR4444) \ + X(RGBX4444) \ + X(RGBA4444) \ + X(XRGB2101010) \ + X(ARGB2101010) \ + X(XBGR2101010) \ + X(ABGR2101010) \ + X(RGBX1010102) \ + X(RGBA1010102) \ + X(BGRX1010102) \ + X(BGRA1010102) \ + X(ABGR16161616) \ + X(NV12) \ + X(NV21) \ + X(NV16) \ + X(NV61) \ + X(P030) \ + X(P230) \ + X(YUV420) \ + X(YVU420) \ + X(YUV422) \ + X(YVU422) \ + X(YUV444) \ + X(YVU444) \ + X(T430) \ + X(VUY888) \ + X(XVUY8888) \ + X(XVUY2101010) \ + X(AVUY16161616) \ + X(YUYV) \ + X(YVYU) \ + X(UYVY) \ + X(VYUY) \ + X(Y210) \ + X(Y212) \ + X(Y216) \ + X(Y8) \ + X(Y10) \ + X(Y12) \ + X(Y16) \ + X(R8) \ + X(XYYY2101010) \ + X(Y10P) \ + X(Y12P) \ + X(SRGGB8) \ + X(SBGGR8) \ + X(SGRBG8) \ + X(SGBRG8) \ + X(SRGGB10) \ + X(SBGGR10) \ + X(SGRBG10) \ + X(SGBRG10) \ + X(SRGGB12) \ + X(SBGGR12) \ + X(SGRBG12) \ + X(SGBRG12) \ + X(SRGGB16) \ + X(SBGGR16) \ + X(SGRBG16) \ + X(SGBRG16) \ + X(SRGGB10P) \ + X(SBGGR10P) \ + X(SGRBG10P) \ + X(SGBRG10P) \ + X(SRGGB12P) \ + X(SBGGR12P) \ + X(SGRBG12P) \ + X(SGBRG12P) + +enum class FormatId { +#define X(name) name, + PIXPAT_FORMAT_LIST(X) +#undef X + Unknown, +}; + +struct FormatEntry { + const char* name; + FormatId id; +}; + +inline constexpr FormatEntry s_format_table[] = { +#define X(name) { #name, FormatId::name }, + PIXPAT_FORMAT_LIST(X) +#undef X +}; + +inline constexpr size_t s_format_catalog_count = + sizeof(s_format_table) / sizeof(s_format_table[0]); + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/formats.h b/subprojects/pixpat/pixpat-native/src/formats.h new file mode 100644 index 0000000..68bdeec --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats.h @@ -0,0 +1,13 @@ +#pragma once + +// Aggregator: every named layout the X-macro registers lives in one of +// the headers under formats/, organized by color kind. Format names +// follow the kms++/pixutils convention (see formats/rgb.h for the +// longer note; the YUYV group is an exception, see formats/yuv_packed.h). + +#include "formats/rgb.h" +#include "formats/yuv_semiplanar.h" +#include "formats/yuv_planar.h" +#include "formats/yuv_packed.h" +#include "formats/grayscale.h" +#include "formats/bayer.h" diff --git a/subprojects/pixpat/pixpat-native/src/formats/bayer.h b/subprojects/pixpat/pixpat-native/src/formats/bayer.h new file mode 100644 index 0000000..057c342 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/bayer.h @@ -0,0 +1,97 @@ +#pragma once + +// Bayer raw layouts. Each pixel carries one of R/G/B selected by +// (x mod 2, y mod 2) and BayerOrder; the pattern is on the +// BayerSource/BayerSink template, not the layout itself. Storage shape +// is single-component (C::Y reused as the storage tag) so the same +// 8/10/12/16-bit shapes apply across all four phase patterns. +// +// Each format is a distinct struct (rather than a type alias of one +// another) so each format type can carry its own pattern-specific +// Source/Sink aliases. The shared bit layout lives in a base struct per +// (depth,packing) combination. +// +// ColorKind is RGB because the normalized pixel passed through ColorXfm +// is RGB16 — the sink picks one of r/g/b at write time, and the +// source nearest-neighbor demosaics into RGB16 at read time. + +#include "../layout.h" +#include "../io/bayer.h" + +namespace pixpat::formats +{ + +namespace bayer_detail +{ + +// Per-(depth,packing) base layouts. Every Bayer format derives from +// one of these and pins its own pattern-specific I/O templates. +using Bayer8 = Layout<ColorKind::RGB, 1, 1, + Plane<uint8_t, Comp { C::Y, 8, 0 }> >; +using Bayer10 = Layout<ColorKind::RGB, 1, 1, + Plane<uint16_t, Comp { C::Y, 10, 0 }, Comp { C::X, 6, 10 }> >; +using Bayer12 = Layout<ColorKind::RGB, 1, 1, + Plane<uint16_t, Comp { C::Y, 12, 0 }, Comp { C::X, 4, 12 }> >; +using Bayer16 = Layout<ColorKind::RGB, 1, 1, + Plane<uint16_t, Comp { C::Y, 16, 0 }> >; +// MIPI CSI-2 packed Bayer (10P: 4 pix in 5 bytes; 12P: 2 pix in 3 +// bytes). The Layout doesn't capture the packed bit layout — the +// BayerPackedSink hand-rolls the byte writes. uint8_t plane shape is +// a placeholder so the dispatch plumbing is uniform. +using Bayer10P = Layout<ColorKind::RGB, 1, 1, + Plane<uint8_t, Comp { C::Y, 8, 0 }> >; +using Bayer12P = Layout<ColorKind::RGB, 1, 1, + Plane<uint8_t, Comp { C::Y, 8, 0 }> >; + +} // namespace bayer_detail + +// Unpacked Bayer (4 patterns × 4 bit depths). +#define PIXPAT_BAYER(name, base, pat) \ + struct name : bayer_detail::base { \ + using Source = BayerSource_ ## pat<name>; \ + using Sink = BayerSink_ ## pat<name>; \ + } + +PIXPAT_BAYER(SRGGB8, Bayer8, RGGB); +PIXPAT_BAYER(SBGGR8, Bayer8, BGGR); +PIXPAT_BAYER(SGRBG8, Bayer8, GRBG); +PIXPAT_BAYER(SGBRG8, Bayer8, GBRG); + +PIXPAT_BAYER(SRGGB10, Bayer10, RGGB); +PIXPAT_BAYER(SBGGR10, Bayer10, BGGR); +PIXPAT_BAYER(SGRBG10, Bayer10, GRBG); +PIXPAT_BAYER(SGBRG10, Bayer10, GBRG); + +PIXPAT_BAYER(SRGGB12, Bayer12, RGGB); +PIXPAT_BAYER(SBGGR12, Bayer12, BGGR); +PIXPAT_BAYER(SGRBG12, Bayer12, GRBG); +PIXPAT_BAYER(SGBRG12, Bayer12, GBRG); + +PIXPAT_BAYER(SRGGB16, Bayer16, RGGB); +PIXPAT_BAYER(SBGGR16, Bayer16, BGGR); +PIXPAT_BAYER(SGRBG16, Bayer16, GRBG); +PIXPAT_BAYER(SGBRG16, Bayer16, GBRG); + +#undef PIXPAT_BAYER + +// MIPI-packed Bayer: pattern + bit depth both encoded in the I/O +// template name (BayerPackedSource_RGGB10, ...). +#define PIXPAT_BAYER_PACKED(name, base, pat_depth) \ + struct name : bayer_detail::base { \ + using Source = BayerPackedSource_ ## pat_depth<name>; \ + using Sink = BayerPackedSink_ ## pat_depth<name>; \ + } + +PIXPAT_BAYER_PACKED(SRGGB10P, Bayer10P, RGGB10); +PIXPAT_BAYER_PACKED(SBGGR10P, Bayer10P, BGGR10); +PIXPAT_BAYER_PACKED(SGRBG10P, Bayer10P, GRBG10); +PIXPAT_BAYER_PACKED(SGBRG10P, Bayer10P, GBRG10); + +PIXPAT_BAYER_PACKED(SRGGB12P, Bayer12P, RGGB12); +PIXPAT_BAYER_PACKED(SBGGR12P, Bayer12P, BGGR12); +PIXPAT_BAYER_PACKED(SGRBG12P, Bayer12P, GRBG12); +PIXPAT_BAYER_PACKED(SGBRG12P, Bayer12P, GBRG12); + +#undef PIXPAT_BAYER_PACKED + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/formats/grayscale.h b/subprojects/pixpat/pixpat-native/src/formats/grayscale.h new file mode 100644 index 0000000..b1cd294 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/grayscale.h @@ -0,0 +1,78 @@ +#pragma once + +// Single-component-per-pixel formats. Most are grayscale (Y) modeled as +// a YUV format with synthesized neutral chroma; R8 is the RGB-kind +// counterpart, modeled grey-style with G=B=R on read. Y10/Y12 carry an +// explicit X padding bitfield. XYYY2101010 is multi-pixel-per-word: 3 Y +// samples in 32 bits. + +#include "../layout.h" +#include "../io/gray.h" +#include "../io/gray_packed.h" +#include "../io/mono_rgb.h" + +namespace pixpat::formats +{ + +#define PIXPAT_GRAY(name, ...) \ + struct name : Layout<ColorKind::YUV, 1, 1, __VA_ARGS__> { \ + using Source = GraySource<name>; \ + using Sink = GraySink<name>; \ + } + +PIXPAT_GRAY(Y8, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>); + +PIXPAT_GRAY(Y10, + Plane<uint16_t, Comp{ C::Y, 10, 0 }, Comp{ C::X, 6, 10 }>); + +PIXPAT_GRAY(Y12, + Plane<uint16_t, Comp{ C::Y, 12, 0 }, Comp{ C::X, 4, 12 }>); + +PIXPAT_GRAY(Y16, + Plane<uint16_t, Comp{ C::Y, 16, 0 }>); + +#undef PIXPAT_GRAY + +// R8: single 8-bit R channel. Read synthesizes G=B=R; write encodes R +// and drops G/B/A. Symmetric to Y8 but ColorKind::RGB so cross-pipeline +// conversions go through the RGB->YUV ColorXfm direction. +struct R8 : Layout<ColorKind::RGB, 1, 1, + Plane<uint8_t, Comp{ C::R, 8, 0 }> > { + using Source = MonoRGBSource<R8>; + using Sink = MonoRGBSink<R8>; +}; + +struct XYYY2101010 : Layout<ColorKind::YUV, 1, 1, + Plane<uint32_t, + Comp{ C::Y, 10, 0 }, + Comp{ C::Y, 10, 10 }, + Comp{ C::Y, 10, 20 }, + Comp{ C::X, 2, 30 }> > { + using Source = MultiPixelGraySource<XYYY2101010>; + using Sink = MultiPixelGraySink<XYYY2101010>; +}; + +// MIPI CSI-2 packed grayscale (Y10P / Y12P). The Layout doesn't capture +// the packed bit layout — GrayPackedSource/Sink delegate to the shared +// CSI-2 helper (io/csi2.h). uint8_t plane shape is a placeholder so +// dispatch plumbing is uniform (mirrors bayer_detail::Bayer10P/12P). +namespace gray_csi2_detail +{ +using Gray10P = Layout<ColorKind::YUV, 1, 1, + Plane<uint8_t, Comp { C::Y, 8, 0 }> >; +using Gray12P = Layout<ColorKind::YUV, 1, 1, + Plane<uint8_t, Comp { C::Y, 8, 0 }> >; +} // namespace gray_csi2_detail + +struct Y10P : gray_csi2_detail::Gray10P { + using Source = GrayPackedSource<Y10P, 10>; + using Sink = GrayPackedSink<Y10P, 10>; +}; + +struct Y12P : gray_csi2_detail::Gray12P { + using Source = GrayPackedSource<Y12P, 12>; + using Sink = GrayPackedSink<Y12P, 12>; +}; + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/formats/rgb.h b/subprojects/pixpat/pixpat-native/src/formats/rgb.h new file mode 100644 index 0000000..19d007a --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/rgb.h @@ -0,0 +1,267 @@ +#pragma once + +// RGB packed layouts: 8-bit / 16-bit (sub-byte) / 32-bit (10-bit) / +// 64-bit-normalized, all single-plane single-pixel-per-storage-word. +// Names follow the kms++/pixutils register-order convention (MSB-first +// in the storage word), so XRGB8888 has X at bits 31..24 and B at 7..0. + +#include "../layout.h" +#include "../io/packed.h" + +namespace pixpat::formats +{ + +// Helper: every format in this file pairs with PackedSource/PackedSink. +// Each format struct exposes Source / Sink aliases so the catalog row +// in format_catalog.h can stay name-only. +#define PIXPAT_RGB_PACKED(name, ...) \ + struct name : Layout<ColorKind::RGB, 1, 1, __VA_ARGS__> { \ + using Source = PackedSource<name>; \ + using Sink = PackedSink<name>; \ + } + +// --------------------------------------------------------------------- +// 32-bit packed RGB, 8-bit components. +// --------------------------------------------------------------------- + +PIXPAT_RGB_PACKED(XRGB8888, + Plane<uint32_t, + Comp{ C::B, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::R, 8, 16 }, + Comp{ C::X, 8, 24 }>); + +PIXPAT_RGB_PACKED(ARGB8888, + Plane<uint32_t, + Comp{ C::B, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::R, 8, 16 }, + Comp{ C::A, 8, 24 }>); + +PIXPAT_RGB_PACKED(XBGR8888, + Plane<uint32_t, + Comp{ C::R, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::B, 8, 16 }, + Comp{ C::X, 8, 24 }>); + +PIXPAT_RGB_PACKED(ABGR8888, + Plane<uint32_t, + Comp{ C::R, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::B, 8, 16 }, + Comp{ C::A, 8, 24 }>); + +PIXPAT_RGB_PACKED(RGBX8888, + Plane<uint32_t, + Comp{ C::X, 8, 0 }, + Comp{ C::B, 8, 8 }, + Comp{ C::G, 8, 16 }, + Comp{ C::R, 8, 24 }>); + +PIXPAT_RGB_PACKED(RGBA8888, + Plane<uint32_t, + Comp{ C::A, 8, 0 }, + Comp{ C::B, 8, 8 }, + Comp{ C::G, 8, 16 }, + Comp{ C::R, 8, 24 }>); + +PIXPAT_RGB_PACKED(BGRX8888, + Plane<uint32_t, + Comp{ C::X, 8, 0 }, + Comp{ C::R, 8, 8 }, + Comp{ C::G, 8, 16 }, + Comp{ C::B, 8, 24 }>); + +PIXPAT_RGB_PACKED(BGRA8888, + Plane<uint32_t, + Comp{ C::A, 8, 0 }, + Comp{ C::R, 8, 8 }, + Comp{ C::G, 8, 16 }, + Comp{ C::B, 8, 24 }>); + +// --------------------------------------------------------------------- +// 24-bit packed RGB, three bytes per pixel. storage_t is uint32_t but +// only bytes_per_pixel = 3 are read/written via memcpy. +// --------------------------------------------------------------------- + +PIXPAT_RGB_PACKED(RGB888, + Plane<uint32_t, + Comp{ C::B, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::R, 8, 16 }>); + +PIXPAT_RGB_PACKED(BGR888, + Plane<uint32_t, + Comp{ C::R, 8, 0 }, + Comp{ C::G, 8, 8 }, + Comp{ C::B, 8, 16 }>); + +// --------------------------------------------------------------------- +// 16-bit packed RGB, sub-byte components. +// --------------------------------------------------------------------- + +PIXPAT_RGB_PACKED(RGB565, + Plane<uint16_t, + Comp{ C::B, 5, 0 }, + Comp{ C::G, 6, 5 }, + Comp{ C::R, 5, 11 }>); + +PIXPAT_RGB_PACKED(BGR565, + Plane<uint16_t, + Comp{ C::R, 5, 0 }, + Comp{ C::G, 6, 5 }, + Comp{ C::B, 5, 11 }>); + +// 8-bit packed RGB: 3-bit R / 3-bit G / 2-bit B in a single byte. + +PIXPAT_RGB_PACKED(RGB332, + Plane<uint8_t, + Comp{ C::B, 2, 0 }, + Comp{ C::G, 3, 2 }, + Comp{ C::R, 3, 5 }>); + +PIXPAT_RGB_PACKED(XRGB1555, + Plane<uint16_t, + Comp{ C::B, 5, 0 }, + Comp{ C::G, 5, 5 }, + Comp{ C::R, 5, 10 }, + Comp{ C::X, 1, 15 }>); + +PIXPAT_RGB_PACKED(ARGB1555, + Plane<uint16_t, + Comp{ C::B, 5, 0 }, + Comp{ C::G, 5, 5 }, + Comp{ C::R, 5, 10 }, + Comp{ C::A, 1, 15 }>); + +PIXPAT_RGB_PACKED(XBGR1555, + Plane<uint16_t, + Comp{ C::R, 5, 0 }, + Comp{ C::G, 5, 5 }, + Comp{ C::B, 5, 10 }, + Comp{ C::X, 1, 15 }>); + +PIXPAT_RGB_PACKED(ABGR1555, + Plane<uint16_t, + Comp{ C::R, 5, 0 }, + Comp{ C::G, 5, 5 }, + Comp{ C::B, 5, 10 }, + Comp{ C::A, 1, 15 }>); + +PIXPAT_RGB_PACKED(XRGB4444, + Plane<uint16_t, + Comp{ C::B, 4, 0 }, + Comp{ C::G, 4, 4 }, + Comp{ C::R, 4, 8 }, + Comp{ C::X, 4, 12 }>); + +PIXPAT_RGB_PACKED(ARGB4444, + Plane<uint16_t, + Comp{ C::B, 4, 0 }, + Comp{ C::G, 4, 4 }, + Comp{ C::R, 4, 8 }, + Comp{ C::A, 4, 12 }>); + +PIXPAT_RGB_PACKED(XBGR4444, + Plane<uint16_t, + Comp{ C::R, 4, 0 }, + Comp{ C::G, 4, 4 }, + Comp{ C::B, 4, 8 }, + Comp{ C::X, 4, 12 }>); + +PIXPAT_RGB_PACKED(ABGR4444, + Plane<uint16_t, + Comp{ C::R, 4, 0 }, + Comp{ C::G, 4, 4 }, + Comp{ C::B, 4, 8 }, + Comp{ C::A, 4, 12 }>); + +PIXPAT_RGB_PACKED(RGBX4444, + Plane<uint16_t, + Comp{ C::X, 4, 0 }, + Comp{ C::B, 4, 4 }, + Comp{ C::G, 4, 8 }, + Comp{ C::R, 4, 12 }>); + +PIXPAT_RGB_PACKED(RGBA4444, + Plane<uint16_t, + Comp{ C::A, 4, 0 }, + Comp{ C::B, 4, 4 }, + Comp{ C::G, 4, 8 }, + Comp{ C::R, 4, 12 }>); + +// --------------------------------------------------------------------- +// 32-bit packed RGB, 10-bit components. +// --------------------------------------------------------------------- + +PIXPAT_RGB_PACKED(XRGB2101010, + Plane<uint32_t, + Comp{ C::B, 10, 0 }, + Comp{ C::G, 10, 10 }, + Comp{ C::R, 10, 20 }, + Comp{ C::X, 2, 30 }>); + +PIXPAT_RGB_PACKED(ARGB2101010, + Plane<uint32_t, + Comp{ C::B, 10, 0 }, + Comp{ C::G, 10, 10 }, + Comp{ C::R, 10, 20 }, + Comp{ C::A, 2, 30 }>); + +PIXPAT_RGB_PACKED(XBGR2101010, + Plane<uint32_t, + Comp{ C::R, 10, 0 }, + Comp{ C::G, 10, 10 }, + Comp{ C::B, 10, 20 }, + Comp{ C::X, 2, 30 }>); + +PIXPAT_RGB_PACKED(ABGR2101010, + Plane<uint32_t, + Comp{ C::R, 10, 0 }, + Comp{ C::G, 10, 10 }, + Comp{ C::B, 10, 20 }, + Comp{ C::A, 2, 30 }>); + +PIXPAT_RGB_PACKED(RGBX1010102, + Plane<uint32_t, + Comp{ C::X, 2, 0 }, + Comp{ C::B, 10, 2 }, + Comp{ C::G, 10, 12 }, + Comp{ C::R, 10, 22 }>); + +PIXPAT_RGB_PACKED(RGBA1010102, + Plane<uint32_t, + Comp{ C::A, 2, 0 }, + Comp{ C::B, 10, 2 }, + Comp{ C::G, 10, 12 }, + Comp{ C::R, 10, 22 }>); + +PIXPAT_RGB_PACKED(BGRX1010102, + Plane<uint32_t, + Comp{ C::X, 2, 0 }, + Comp{ C::R, 10, 2 }, + Comp{ C::G, 10, 12 }, + Comp{ C::B, 10, 22 }>); + +PIXPAT_RGB_PACKED(BGRA1010102, + Plane<uint32_t, + Comp{ C::A, 2, 0 }, + Comp{ C::R, 10, 2 }, + Comp{ C::G, 10, 12 }, + Comp{ C::B, 10, 22 }>); + +// --------------------------------------------------------------------- +// 64-bit normalized wide RGB (16 bits per component). +// --------------------------------------------------------------------- + +PIXPAT_RGB_PACKED(ABGR16161616, + Plane<uint64_t, + Comp{ C::R, 16, 0 }, + Comp{ C::G, 16, 16 }, + Comp{ C::B, 16, 32 }, + Comp{ C::A, 16, 48 }>); + +#undef PIXPAT_RGB_PACKED + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h new file mode 100644 index 0000000..8e88f10 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_packed.h @@ -0,0 +1,136 @@ +#pragma once + +// Packed YUV layouts: +// VUY888 — 1 pixel / 24-bit, 8-bit Y/U/V (storage uint32_t, +// bytes_per_pixel = 3; parallels BGR888 in the YUV +// register order) +// XVUY8888 — 1 pixel / 32-bit word, 8-bit Y/U/V + 8-bit padding +// XVUY2101010 — 1 pixel / 32-bit word, 10-bit Y/U/V + 2-bit padding +// AVUY16161616 — 1 pixel / 64-bit word, 16-bit Y/U/V/A (normalized) +// YUYV / YVYU / UYVY / VYUY — 4:2:2, 2 pixels / 32-bit word +// Y210 / Y212 / Y216 — 4:2:2, 2 pixels / 64-bit word, with +// each component MSB-aligned in a 16-bit slot +// +// XVUY/AVUY name is register MSB-first (X/A in the top bits). The +// YUYV names follow V4L2 / pixpat memory-byte order (Y0 in byte 0), +// so shifts ascend in name order — opposite of XRGB-style. + +#include "../layout.h" +#include "../io/packed.h" +#include "../io/packed_yuv.h" + +namespace pixpat::formats +{ + +// 1-pixel-per-word packed (single Pixel/Word; uses PackedSource/Sink). + +struct VUY888 : Layout<ColorKind::YUV, 1, 1, + Plane<uint32_t, + Comp{ C::Y, 8, 0 }, + Comp{ C::U, 8, 8 }, + Comp{ C::V, 8, 16 }> > { + using Source = PackedSource<VUY888>; + using Sink = PackedSink<VUY888>; +}; + +struct XVUY8888 : Layout<ColorKind::YUV, 1, 1, + Plane<uint32_t, + Comp{ C::Y, 8, 0 }, + Comp{ C::U, 8, 8 }, + Comp{ C::V, 8, 16 }, + Comp{ C::X, 8, 24 }> > { + using Source = PackedSource<XVUY8888>; + using Sink = PackedSink<XVUY8888>; +}; + +struct XVUY2101010 : Layout<ColorKind::YUV, 1, 1, + Plane<uint32_t, + Comp{ C::Y, 10, 0 }, + Comp{ C::U, 10, 10 }, + Comp{ C::V, 10, 20 }, + Comp{ C::X, 2, 30 }> > { + using Source = PackedSource<XVUY2101010>; + using Sink = PackedSink<XVUY2101010>; +}; + +struct AVUY16161616 : Layout<ColorKind::YUV, 1, 1, + Plane<uint64_t, + Comp{ C::Y, 16, 0 }, + Comp{ C::U, 16, 16 }, + Comp{ C::V, 16, 32 }, + Comp{ C::A, 16, 48 }> > { + using Source = PackedSource<AVUY16161616>; + using Sink = PackedSink<AVUY16161616>; +}; + +// 2-pixel-per-word 4:2:2 (uses PackedYUVSource/Sink). + +#define PIXPAT_PACKED_YUV422(name, ...) \ + struct name : Layout<ColorKind::YUV, 2, 1, \ + Plane<uint32_t, __VA_ARGS__> > { \ + using Source = PackedYUVSource<name>; \ + using Sink = PackedYUVSink<name>; \ + } + +PIXPAT_PACKED_YUV422(YUYV, + Comp{ C::Y, 8, 0 }, Comp{ C::U, 8, 8 }, + Comp{ C::Y, 8, 16 }, Comp{ C::V, 8, 24 }); + +PIXPAT_PACKED_YUV422(YVYU, + Comp{ C::Y, 8, 0 }, Comp{ C::V, 8, 8 }, + Comp{ C::Y, 8, 16 }, Comp{ C::U, 8, 24 }); + +PIXPAT_PACKED_YUV422(UYVY, + Comp{ C::U, 8, 0 }, Comp{ C::Y, 8, 8 }, + Comp{ C::V, 8, 16 }, Comp{ C::Y, 8, 24 }); + +PIXPAT_PACKED_YUV422(VYUY, + Comp{ C::V, 8, 0 }, Comp{ C::Y, 8, 8 }, + Comp{ C::U, 8, 16 }, Comp{ C::Y, 8, 24 }); + +#undef PIXPAT_PACKED_YUV422 + +// Y210 / Y212 / Y216: 4:2:2, 2 pixels per 64-bit word, MSB-aligned in +// 16-bit slots. Y210 has 6 unused LSBs per slot, Y212 has 4, Y216 has +// none. The X padding entries pad total_bits to 64 so bytes_per_pixel +// resolves to 8; PackedYUVSink leaves their slots zero via the +// value-array zero-init (see io/packed_yuv.h). +struct Y210 : Layout<ColorKind::YUV, 2, 1, + Plane<uint64_t, + Comp{ C::X, 6, 0 }, + Comp{ C::Y, 10, 6 }, + Comp{ C::X, 6, 16 }, + Comp{ C::U, 10, 22 }, + Comp{ C::X, 6, 32 }, + Comp{ C::Y, 10, 38 }, + Comp{ C::X, 6, 48 }, + Comp{ C::V, 10, 54 }> > { + using Source = PackedYUVSource<Y210>; + using Sink = PackedYUVSink<Y210>; +}; + +struct Y212 : Layout<ColorKind::YUV, 2, 1, + Plane<uint64_t, + Comp{ C::X, 4, 0 }, + Comp{ C::Y, 12, 4 }, + Comp{ C::X, 4, 16 }, + Comp{ C::U, 12, 20 }, + Comp{ C::X, 4, 32 }, + Comp{ C::Y, 12, 36 }, + Comp{ C::X, 4, 48 }, + Comp{ C::V, 12, 52 }> > { + using Source = PackedYUVSource<Y212>; + using Sink = PackedYUVSink<Y212>; +}; + +struct Y216 : Layout<ColorKind::YUV, 2, 1, + Plane<uint64_t, + Comp{ C::Y, 16, 0 }, + Comp{ C::U, 16, 16 }, + Comp{ C::Y, 16, 32 }, + Comp{ C::V, 16, 48 }> > { + using Source = PackedYUVSource<Y216>; + using Sink = PackedYUVSink<Y216>; +}; + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h new file mode 100644 index 0000000..bb6a415 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_planar.h @@ -0,0 +1,76 @@ +#pragma once + +// YUV planar layouts: 3 separate planes (Y, then U/V or V/U), 8-bit +// components. +// YUV420/YVU420 — h_sub=2, v_sub=2 (a.k.a. I420 / YV12) +// YUV422/YVU422 — h_sub=2, v_sub=1 +// YUV444/YVU444 — h_sub=1, v_sub=1 +// T430 — multi-pixel-per-word planar 4:4:4. + +#include "../layout.h" +#include "../io/planar.h" + +namespace pixpat::formats +{ + +#define PIXPAT_PLANAR(name, ...) \ + struct name : Layout<ColorKind::YUV, __VA_ARGS__> { \ + using Source = PlanarSource<name>; \ + using Sink = PlanarSink<name>; \ + } + +PIXPAT_PLANAR(YUV420, 2, 2, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>); + +PIXPAT_PLANAR(YVU420, 2, 2, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>); + +PIXPAT_PLANAR(YUV422, 2, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>); + +PIXPAT_PLANAR(YVU422, 2, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>); + +PIXPAT_PLANAR(YUV444, 1, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>); + +PIXPAT_PLANAR(YVU444, 1, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint8_t, Comp{ C::V, 8, 0 }>, + Plane<uint8_t, Comp{ C::U, 8, 0 }>); + +#undef PIXPAT_PLANAR + +// T430: 3-plane multi-pixel-per-word planar 4:4:4. Each plane carries +// 3 × 10-bit samples per uint32_t plus a 2-bit X padding bit-field. +struct T430 : Layout<ColorKind::YUV, 1, 1, + Plane<uint32_t, + Comp{ C::Y, 10, 0 }, + Comp{ C::Y, 10, 10 }, + Comp{ C::Y, 10, 20 }, + Comp{ C::X, 2, 30 }>, + Plane<uint32_t, + Comp{ C::U, 10, 0 }, + Comp{ C::U, 10, 10 }, + Comp{ C::U, 10, 20 }, + Comp{ C::X, 2, 30 }>, + Plane<uint32_t, + Comp{ C::V, 10, 0 }, + Comp{ C::V, 10, 10 }, + Comp{ C::V, 10, 20 }, + Comp{ C::X, 2, 30 }> > { + using Source = MultiPixelPlanarSource<T430>; + using Sink = MultiPixelPlanarSink<T430>; +}; + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h b/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h new file mode 100644 index 0000000..34aea22 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/formats/yuv_semiplanar.h @@ -0,0 +1,79 @@ +#pragma once + +// YUV semiplanar layouts: Y plane + interleaved UV plane. +// NV12/NV21 — 4:2:0 (h_sub=2, v_sub=2) +// NV16/NV61 — 4:2:2 (h_sub=2, v_sub=1) +// P030/P230 — multi-pixel-per-word semiplanar (10-bit Y triplets). + +#include "../layout.h" +#include "../io/semiplanar.h" + +namespace pixpat::formats +{ + +struct NV12 : Layout<ColorKind::YUV, 2, 2, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint16_t, Comp{ C::U, 8, 0 }, Comp{ C::V, 8, 8 }> > { + using Source = SemiplanarSource<NV12>; + using Sink = SemiplanarSink<NV12>; +}; + +struct NV21 : Layout<ColorKind::YUV, 2, 2, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint16_t, Comp{ C::V, 8, 0 }, Comp{ C::U, 8, 8 }> > { + using Source = SemiplanarSource<NV21>; + using Sink = SemiplanarSink<NV21>; +}; + +struct NV16 : Layout<ColorKind::YUV, 2, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint16_t, Comp{ C::U, 8, 0 }, Comp{ C::V, 8, 8 }> > { + using Source = SemiplanarSource<NV16>; + using Sink = SemiplanarSink<NV16>; +}; + +struct NV61 : Layout<ColorKind::YUV, 2, 1, + Plane<uint8_t, Comp{ C::Y, 8, 0 }>, + Plane<uint16_t, Comp{ C::V, 8, 0 }, Comp{ C::U, 8, 8 }> > { + using Source = SemiplanarSource<NV61>; + using Sink = SemiplanarSink<NV61>; +}; + +// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). Y plane +// holds 3 × 10-bit Y samples per uint32_t (top 2 bits unused). UV plane +// holds 3 × (Cb,Cr) pairs per uint64_t (10 bits each, with 2-bit gaps +// at bits 30-31 and 62-63 — left implicit, no X declared). + +struct P030 : Layout<ColorKind::YUV, 2, 2, + Plane<uint32_t, + Comp{ C::Y, 10, 0 }, + Comp{ C::Y, 10, 10 }, + Comp{ C::Y, 10, 20 }>, + Plane<uint64_t, + Comp{ C::U, 10, 0 }, + Comp{ C::V, 10, 10 }, + Comp{ C::U, 10, 20 }, + Comp{ C::V, 10, 32 }, + Comp{ C::U, 10, 42 }, + Comp{ C::V, 10, 52 }> > { + using Source = MultiPixelSemiplanarSource<P030>; + using Sink = MultiPixelSemiplanarSink<P030>; +}; + +struct P230 : Layout<ColorKind::YUV, 2, 1, + Plane<uint32_t, + Comp{ C::Y, 10, 0 }, + Comp{ C::Y, 10, 10 }, + Comp{ C::Y, 10, 20 }>, + Plane<uint64_t, + Comp{ C::U, 10, 0 }, + Comp{ C::V, 10, 10 }, + Comp{ C::U, 10, 20 }, + Comp{ C::V, 10, 32 }, + Comp{ C::U, 10, 42 }, + Comp{ C::V, 10, 52 }> > { + using Source = MultiPixelSemiplanarSource<P230>; + using Sink = MultiPixelSemiplanarSink<P230>; +}; + +} // namespace pixpat::formats diff --git a/subprojects/pixpat/pixpat-native/src/io.h b/subprojects/pixpat/pixpat-native/src/io.h new file mode 100644 index 0000000..af24232 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io.h @@ -0,0 +1,13 @@ +#pragma once + +// Aggregator: every Source / Sink template lives in one of the +// per-iteration-shape headers under io/. Encode/decode helpers and +// load_word/store_word are in io/detail.h, used by all the others. + +#include "io/detail.h" +#include "io/packed.h" +#include "io/semiplanar.h" +#include "io/planar.h" +#include "io/packed_yuv.h" +#include "io/gray.h" +#include "io/bayer.h" diff --git a/subprojects/pixpat/pixpat-native/src/io/bayer.h b/subprojects/pixpat/pixpat-native/src/io/bayer.h new file mode 100644 index 0000000..6b30c0e --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/bayer.h @@ -0,0 +1,318 @@ +#pragma once + +// Bayer raw read/write support. +// +// Write side: each pixel carries one of R/G/B selected by (x mod 2, +// y mod 2) and a fixed BayerOrder. Two missing channels per pixel are +// dropped on encode. +// +// Read side: bilinear demosaic over a 3x3 window. The pixel's own +// channel comes from self; missing channels are averaged from the +// same-channel neighbours that the Bayer phase guarantees to exist: +// +// * At an R or B pixel, all four cardinal (N, E, S, W) neighbours +// carry G and all four diagonal (NE, NW, SE, SW) neighbours carry +// the other colour, so each missing channel averages four samples. +// * At a G pixel, one missing colour sits in the row neighbours +// (W, E) and the other in the column neighbours (N, S), so each +// missing channel averages two samples. +// +// Sampled coordinates are clamped to the image bounds. +// +// The Layout shape is the same as a Y-only single-plane format +// (storage carries one component plus optional X padding); the +// BayerOrder is a separate template parameter on the Source / Sink. + +#include <array> +#include <cstdint> + +#include "../layout.h" +#include "csi2.h" +#include "detail.h" + +namespace pixpat +{ + +enum class BayerOrder { RGGB, BGGR, GRBG, GBRG }; + +namespace detail +{ +constexpr C bayer_pick(BayerOrder o, bool x_even, bool y_even) noexcept +{ + switch (o) { + case BayerOrder::RGGB: + return y_even ? (x_even ? C::R : C::G) + : (x_even ? C::G : C::B); + case BayerOrder::BGGR: + return y_even ? (x_even ? C::B : C::G) + : (x_even ? C::G : C::R); + case BayerOrder::GRBG: + return y_even ? (x_even ? C::G : C::R) + : (x_even ? C::B : C::G); + case BayerOrder::GBRG: + return y_even ? (x_even ? C::G : C::B) + : (x_even ? C::R : C::G); + } + return C::G; +} + +constexpr size_t clamp_coord(int v, size_t max_excl) noexcept +{ + if (v < 0) + return 0; + if (size_t(v) >= max_excl) + return max_excl - 1; + return size_t(v); +} +} // namespace detail + +template <typename L, BayerOrder Order> +struct BayerSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static_assert(y_idx < P::num_comps); + + static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + return detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]); + } + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + size_t W, size_t H) noexcept + { + const bool x_even = (x & 1) == 0; + const bool y_even = (y & 1) == 0; + const C self = detail::bayer_pick(Order, x_even, y_even); + + const size_t xL = detail::clamp_coord(int(x) - 1, W); + const size_t xR = detail::clamp_coord(int(x) + 1, W); + const size_t yT = detail::clamp_coord(int(y) - 1, H); + const size_t yB = detail::clamp_coord(int(y) + 1, H); + + const uint16_t s = read_sample(buf, x, y); + + uint16_t r = 0, g = 0, b = 0; + + if (self == C::G) { + const C h_color = detail::bayer_pick(Order, !x_even, y_even); + const uint16_t h_avg = uint16_t( + (uint32_t(read_sample(buf, xL, y)) + + read_sample(buf, xR, y) + 1u) >> 1); + const uint16_t v_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + 1u) >> 1); + g = s; + if (h_color == C::R) { r = h_avg; b = v_avg; } + else { b = h_avg; r = v_avg; } + } else { + const uint16_t g_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + + read_sample(buf, xL, y) + + read_sample(buf, xR, y) + 2u) >> 2); + const uint16_t o_avg = uint16_t( + (uint32_t(read_sample(buf, xL, yT)) + + read_sample(buf, xR, yT) + + read_sample(buf, xL, yB) + + read_sample(buf, xR, yB) + 2u) >> 2); + g = g_avg; + if (self == C::R) { r = s; b = o_avg; } + else { b = s; r = o_avg; } + } + + return RGB16{ r, g, b, uint16_t(0) }; + } +}; + +template <typename L, BayerOrder Order> +struct BayerSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(y_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][1]) noexcept + { + const C pick = detail::bayer_pick(Order, (bx & 1) == 0, + (by & 1) == 0); + const uint16_t val = pick == C::R ? block[0][0].r + : pick == C::G ? block[0][0].g + : block[0][0].b; + + std::array<uint16_t, P::num_comps> v{}; + v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, val); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +// Aliases so X-macro can register without nested template-template params. +template <typename L> using BayerSource_RGGB = BayerSource<L, BayerOrder::RGGB>; +template <typename L> using BayerSource_BGGR = BayerSource<L, BayerOrder::BGGR>; +template <typename L> using BayerSource_GRBG = BayerSource<L, BayerOrder::GRBG>; +template <typename L> using BayerSource_GBRG = BayerSource<L, BayerOrder::GBRG>; + +template <typename L> using BayerSink_RGGB = BayerSink<L, BayerOrder::RGGB>; +template <typename L> using BayerSink_BGGR = BayerSink<L, BayerOrder::BGGR>; +template <typename L> using BayerSink_GRBG = BayerSink<L, BayerOrder::GRBG>; +template <typename L> using BayerSink_GBRG = BayerSink<L, BayerOrder::GBRG>; + +// MIPI CSI-2 packed Bayer. The bit layout doesn't fit +// `Plane<Storage, Comp...>` because each pixel's bits span two +// non-contiguous bytes, so we use the shared CSI-2 helper (io/csi2.h) +// to (un)pack samples. +// +// The Layout slot is a placeholder (matches the unpacked Bayer of the +// same bit-depth so the user-facing API can pick the right buffer +// shape); bytes_per_pixel from the Plane is unused. +template <typename L, BayerOrder Order, size_t BitDepth> +struct BayerPackedSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + // Stored N-bit value upshifts to normalized-16 by `<< (16-N)`, + // matching the unpacked Bayer source. + static constexpr unsigned shift = 16 - BitDepth; + + static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept + { + const uint8_t* src = buf.data[0] + y * buf.stride[0] + + (x / ppg) * bpg; + const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg); + return uint16_t(val << shift); + } + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + size_t W, size_t H) noexcept + { + const bool x_even = (x & 1) == 0; + const bool y_even = (y & 1) == 0; + const C self = detail::bayer_pick(Order, x_even, y_even); + + const size_t xL = detail::clamp_coord(int(x) - 1, W); + const size_t xR = detail::clamp_coord(int(x) + 1, W); + const size_t yT = detail::clamp_coord(int(y) - 1, H); + const size_t yB = detail::clamp_coord(int(y) + 1, H); + + const uint16_t s = read_sample(buf, x, y); + + uint16_t r = 0, g = 0, b = 0; + + if (self == C::G) { + const C h_color = detail::bayer_pick(Order, !x_even, y_even); + const uint16_t h_avg = uint16_t( + (uint32_t(read_sample(buf, xL, y)) + + read_sample(buf, xR, y) + 1u) >> 1); + const uint16_t v_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + 1u) >> 1); + g = s; + if (h_color == C::R) { r = h_avg; b = v_avg; } + else { b = h_avg; r = v_avg; } + } else { + const uint16_t g_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + + read_sample(buf, xL, y) + + read_sample(buf, xR, y) + 2u) >> 2); + const uint16_t o_avg = uint16_t( + (uint32_t(read_sample(buf, xL, yT)) + + read_sample(buf, xR, yT) + + read_sample(buf, xL, yB) + + read_sample(buf, xR, yB) + 2u) >> 2); + g = g_avg; + if (self == C::R) { r = s; b = o_avg; } + else { b = s; r = o_avg; } + } + + return RGB16{ r, g, b, uint16_t(0) }; + } +}; + +template <typename L, BayerOrder Order, size_t BitDepth> +struct BayerPackedSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppg; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][ppg]) noexcept + { + std::array<uint16_t, ppg> vals{}; + for (size_t i = 0; i < ppg; ++i) { + const C pick = detail::bayer_pick( + Order, ((bx + i) & 1) == 0, (by & 1) == 0); + const uint16_t norm = + pick == C::R ? block[0][i].r + : pick == C::G ? block[0][i].g + : block[0][i].b; + vals[i] = uint16_t(norm >> (16 - BitDepth)); + } + + uint8_t* dst = buf.data[0] + by * buf.stride[0] + + (bx / ppg) * bpg; + detail::csi2::pack_group<BitDepth>(dst, vals); + } +}; + +template <typename L> using BayerPackedSource_RGGB10 = BayerPackedSource<L, BayerOrder::RGGB, 10>; +template <typename L> using BayerPackedSource_BGGR10 = BayerPackedSource<L, BayerOrder::BGGR, 10>; +template <typename L> using BayerPackedSource_GRBG10 = BayerPackedSource<L, BayerOrder::GRBG, 10>; +template <typename L> using BayerPackedSource_GBRG10 = BayerPackedSource<L, BayerOrder::GBRG, 10>; +template <typename L> using BayerPackedSource_RGGB12 = BayerPackedSource<L, BayerOrder::RGGB, 12>; +template <typename L> using BayerPackedSource_BGGR12 = BayerPackedSource<L, BayerOrder::BGGR, 12>; +template <typename L> using BayerPackedSource_GRBG12 = BayerPackedSource<L, BayerOrder::GRBG, 12>; +template <typename L> using BayerPackedSource_GBRG12 = BayerPackedSource<L, BayerOrder::GBRG, 12>; + +template <typename L> using BayerPackedSink_RGGB10 = BayerPackedSink<L, BayerOrder::RGGB, 10>; +template <typename L> using BayerPackedSink_BGGR10 = BayerPackedSink<L, BayerOrder::BGGR, 10>; +template <typename L> using BayerPackedSink_GRBG10 = BayerPackedSink<L, BayerOrder::GRBG, 10>; +template <typename L> using BayerPackedSink_GBRG10 = BayerPackedSink<L, BayerOrder::GBRG, 10>; +template <typename L> using BayerPackedSink_RGGB12 = BayerPackedSink<L, BayerOrder::RGGB, 12>; +template <typename L> using BayerPackedSink_BGGR12 = BayerPackedSink<L, BayerOrder::BGGR, 12>; +template <typename L> using BayerPackedSink_GRBG12 = BayerPackedSink<L, BayerOrder::GRBG, 12>; +template <typename L> using BayerPackedSink_GBRG12 = BayerPackedSink<L, BayerOrder::GBRG, 12>; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/csi2.h b/subprojects/pixpat/pixpat-native/src/io/csi2.h new file mode 100644 index 0000000..59a8f8d --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/csi2.h @@ -0,0 +1,80 @@ +#pragma once + +// Shared MIPI CSI-2 byte (un)packing for the 10P / 12P forms used by +// Bayer raw and Y-only grayscale. +// +// 10P: 4 samples in 5 bytes — bytes 0..3 hold the high 8 bits of +// samples 0..3; byte 4 holds 4 x 2 LSBs (sample 0 in bits 6..7, +// sample 1 in bits 4..5, ...). +// 12P: 2 samples in 3 bytes — bytes 0..1 hold the high 8 bits of +// samples 0..1; byte 2 holds 2 x 4 LSBs (sample 0 in bits 4..7, +// sample 1 in bits 0..3). +// +// Helpers deal in the stored integer (low BitDepth bits set); +// normalization to/from the 16-bit pivot stays in the caller. + +#include <array> +#include <cstddef> +#include <cstdint> + +namespace pixpat::detail::csi2 +{ + +template <size_t BitDepth> +struct packed_traits; + +template <> +struct packed_traits<10> { + static constexpr size_t ppg = 4; + static constexpr size_t bpg = 5; +}; + +template <> +struct packed_traits<12> { + static constexpr size_t ppg = 2; + static constexpr size_t bpg = 3; +}; + +// Extract one BitDepth-bit sample from a packed group, where `i` is the +// in-group index (0..ppg-1). The returned value occupies the low +// BitDepth bits. +template <size_t BitDepth> +inline uint16_t unpack_sample(const uint8_t* src, size_t i) noexcept +{ + if constexpr (BitDepth == 10) { + const uint8_t hi = src[i]; + const uint8_t lsb = (src[4] >> ((3 - i) * 2)) & 0x03; + return uint16_t((hi << 2) | lsb); + } else { // 12 + const uint8_t hi = src[i]; + const uint8_t lsb = (i == 0) ? ((src[2] >> 4) & 0x0F) + : (src[2] & 0x0F); + return uint16_t((hi << 4) | lsb); + } +} + +// Write `ppg` BitDepth-bit samples (low BitDepth bits significant) into +// a packed group of `bpg` bytes. +template <size_t BitDepth> +inline void pack_group( + uint8_t* dst, + const std::array<uint16_t, packed_traits<BitDepth>::ppg>& vals) noexcept +{ + if constexpr (BitDepth == 10) { + dst[0] = (vals[0] >> 2) & 0xFF; + dst[1] = (vals[1] >> 2) & 0xFF; + dst[2] = (vals[2] >> 2) & 0xFF; + dst[3] = (vals[3] >> 2) & 0xFF; + dst[4] = ((vals[0] & 0x03) << 6) + | ((vals[1] & 0x03) << 4) + | ((vals[2] & 0x03) << 2) + | ((vals[3] & 0x03) << 0); + } else { // 12 + dst[0] = (vals[0] >> 4) & 0xFF; + dst[1] = (vals[1] >> 4) & 0xFF; + dst[2] = ((vals[0] & 0x0F) << 4) + | ((vals[1] & 0x0F) << 0); + } +} + +} // namespace pixpat::detail::csi2 diff --git a/subprojects/pixpat/pixpat-native/src/io/detail.h b/subprojects/pixpat/pixpat-native/src/io/detail.h new file mode 100644 index 0000000..cb2b9fb --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/detail.h @@ -0,0 +1,62 @@ +#pragma once + +// Per-component encode/decode against the descriptor + memcpy-based +// load/store_word helpers. Shared by every Source / Sink template. + +#include <cstdint> +#include <cstring> + +#include "../layout.h" + +namespace pixpat::detail +{ + +// Decode an N-bit stored value into the 16-bit normalized space and +// encode it back. Decode bit-replicates the stored value across the 16 +// bits so that N-bit max maps to normalized max (e.g. 8-bit 0xFF → +// 0xFFFF, not 0xFF00). Encode is a plain truncating right-shift: the +// replicated bits land in the low (16-N) bits and get dropped, so +// stored→norm→stored is exact for any N in [1, 16]. +// +// `bits` is taken at runtime; in every call site it traces back to a +// constexpr Plane::comps[I].bits read, which the optimizer constant- +// folds after inlining. + +constexpr uint16_t decode_norm(unsigned bits, uint16_t stored) noexcept +{ + const int N = int(bits); + // Loop, not a single OR: one replication only covers 2N bits, so + // N < 8 (RGB565, RGBA4444, 1-bit alpha, ...) needs multiple tiles. + uint32_t result = 0; + for (int s = 16 - N; s > -N; s -= N) { + if (s >= 0) + result |= uint32_t(stored) << s; + else + result |= uint32_t(stored) >> -s; + } + return uint16_t(result); +} + +constexpr uint16_t encode_norm(unsigned bits, uint16_t norm) noexcept +{ + return uint16_t(norm >> (16u - bits)); +} + +// Read one storage word from `p`. memcpy is uniform for tight and +// non-tight (e.g. BGR888 24-bit) layouts; the optimizer folds it to a +// single load when the size is constant. +template <typename Plane> +inline typename Plane::storage_t load_word(const uint8_t* p) noexcept +{ + typename Plane::storage_t word{}; + std::memcpy(&word, p, Plane::bytes_per_pixel); + return word; +} + +template <typename Plane> +inline void store_word(uint8_t* p, typename Plane::storage_t word) noexcept +{ + std::memcpy(p, &word, Plane::bytes_per_pixel); +} + +} // namespace pixpat::detail diff --git a/subprojects/pixpat/pixpat-native/src/io/gray.h b/subprojects/pixpat/pixpat-native/src/io/gray.h new file mode 100644 index 0000000..d175b68 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/gray.h @@ -0,0 +1,153 @@ +#pragma once + +// Grayscale (Y8 / Y10 / Y12 / Y16) and multi-pixel-per-word grayscale +// (XYYY2101010: 3 Y components in one uint32_t). Modeled as a YUV format +// with neutral chroma synthesized on read so cross-color-kind ColorXfm +// produces R=G=B=Y'. The sink encodes Y from YUV16 and ignores U/V. +// Y10/Y12 carry an X padding bitfield which we zero out on write. +// Neutral chroma in normalized-16 is 0x8000 (the midpoint of [0, 0xFFFF]). + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct GraySource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static_assert(y_idx < P::num_comps); + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + return YUV16{ + detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L> +struct GraySink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(y_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][1]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, block[0][0].y); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +// Multi-pixel-per-word grayscale. The Layout carries one C::Y entry per +// pixel in the group; pixels_per_word is derived from how many C::Y +// entries the layout has. All Y components must share the same bit width +// (so the encode/decode shift is shared). block_w = ppw so the sink +// writes one storage word per block. +template <typename L> +struct MultiPixelGraySource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t ppw = P::template component_count<C::Y>(); + static_assert(ppw >= 1); + + // All Y positions share the same bit width. + static constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits; + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const size_t gx = x / ppw; + const size_t off = x % ppw; + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + gx * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + + // find_pos walks the comps array at runtime; comps is constexpr + // and num_comps is small (≤4 for these formats), so it inlines. + const size_t y_pos = P::template find_pos<C::Y>(off); + + return YUV16{ + detail::decode_norm(y_bits, vals[y_pos]), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelGraySink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t ppw = P::template component_count<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(ppw >= 1); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppw; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppw]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + // All Y slots share the same bit width. + constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits; + for (size_t i = 0; i < ppw; ++i) { + const size_t pos = P::template find_pos<C::Y>(i); + v[pos] = detail::encode_norm(y_bits, block[0][i].y); + } + + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + (bx / ppw) * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/gray_packed.h b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h new file mode 100644 index 0000000..dc1fa68 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h @@ -0,0 +1,78 @@ +#pragma once + +// MIPI CSI-2 packed grayscale (Y10P / Y12P). Same byte packing as +// Bayer10P/Bayer12P (see io/csi2.h) but every sample is Y; the source +// emits neutral chroma to keep cross-color-kind ColorXfm consistent +// with GraySource. +// +// The Layout slot is a placeholder (matches the unpacked Y8 storage +// shape so dispatch plumbing is uniform); bytes_per_pixel from the +// Plane is unused. + +#include <array> +#include <cstdint> + +#include "../layout.h" +#include "csi2.h" + +namespace pixpat +{ + +template <typename L, size_t BitDepth> +struct GrayPackedSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + static constexpr unsigned shift = 16 - BitDepth; + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* src = buf.data[0] + y * buf.stride[0] + + (x / ppg) * bpg; + const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg); + return YUV16{ + uint16_t(val << shift), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L, size_t BitDepth> +struct GrayPackedSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppg; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppg]) noexcept + { + std::array<uint16_t, ppg> vals{}; + for (size_t i = 0; i < ppg; ++i) + vals[i] = uint16_t(block[0][i].y >> (16 - BitDepth)); + + uint8_t* dst = buf.data[0] + by * buf.stride[0] + + (bx / ppg) * bpg; + detail::csi2::pack_group<BitDepth>(dst, vals); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h new file mode 100644 index 0000000..f2f8206 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h @@ -0,0 +1,72 @@ +#pragma once + +// Single-channel RGB formats (R8). Storage carries one R component; +// MonoRGBSource synthesizes G=B=R on read so cross-color-kind ColorXfm +// produces sensible Y from R alone. MonoRGBSink encodes R and ignores +// G/B/A (and zeroes any X padding). Symmetric to GraySource/GraySink +// (io/gray.h) but for ColorKind::RGB on C::R. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct MonoRGBSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t r_idx = P::template find_pos<C::R>(); + static_assert(r_idx < P::num_comps); + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + const uint16_t r = detail::decode_norm(P::comps[r_idx].bits, vals[r_idx]); + return RGB16{ r, r, r, uint16_t(0) }; + } +}; + +template <typename L> +struct MonoRGBSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t r_idx = P::template find_pos<C::R>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(r_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][1]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[r_idx] = detail::encode_norm(P::comps[r_idx].bits, block[0][0].r); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/packed.h b/subprojects/pixpat/pixpat-native/src/io/packed.h new file mode 100644 index 0000000..9d953bc --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/packed.h @@ -0,0 +1,106 @@ +#pragma once + +// Single-plane, single-pixel-per-storage-word formats. Works for both +// RGB layouts (XRGB8888, RGB565, ABGR16161616, ...) and YUV +// single-pixel layouts (XVUY2101010, AVUY16161616). Pixel type follows +// L::kind; the three mandatory components are R/G/B for RGB or Y/U/V +// for YUV. Both `RGB16` and `YUV16` are 4 uint16_t with the alpha last, +// so aggregate-init by position works for either. + +#include <array> +#include <type_traits> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PackedSource { + using Layout = L; + using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>; + + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y; + static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U; + static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V; + + static constexpr size_t i0 = P::template find_pos<c0>(); + static constexpr size_t i1 = P::template find_pos<c1>(); + static constexpr size_t i2 = P::template find_pos<c2>(); + static constexpr size_t a_idx = P::template find_pos<C::A>(); + static constexpr bool has_a = (a_idx < P::num_comps); + static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps); + + static Pixel read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + Pixel out{ + detail::decode_norm(P::comps[i0].bits, vals[i0]), + detail::decode_norm(P::comps[i1].bits, vals[i1]), + detail::decode_norm(P::comps[i2].bits, vals[i2]), + uint16_t(0), + }; + if constexpr (has_a) + out.a = detail::decode_norm(P::comps[a_idx].bits, vals[a_idx]); + return out; + } +}; + +template <typename L> +struct PackedSink { + using Layout = L; + using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>; + + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y; + static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U; + static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V; + + static constexpr size_t i0 = P::template find_pos<c0>(); + static constexpr size_t i1 = P::template find_pos<c1>(); + static constexpr size_t i2 = P::template find_pos<c2>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr size_t a_idx = P::template find_pos<C::A>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static constexpr bool has_a = (a_idx < P::num_comps); + static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + // Aggregate-init access to RGB16/YUV16 by position: .r/.y, .g/.u, .b/.v. + // We use the field names corresponding to L::kind. + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const Pixel (&block)[1][1]) noexcept + { + const Pixel& pix = block[0][0]; + std::array<uint16_t, P::num_comps> v{}; + if constexpr (L::kind == ColorKind::RGB) { + v[i0] = detail::encode_norm(P::comps[i0].bits, pix.r); + v[i1] = detail::encode_norm(P::comps[i1].bits, pix.g); + v[i2] = detail::encode_norm(P::comps[i2].bits, pix.b); + } else { + v[i0] = detail::encode_norm(P::comps[i0].bits, pix.y); + v[i1] = detail::encode_norm(P::comps[i1].bits, pix.u); + v[i2] = detail::encode_norm(P::comps[i2].bits, pix.v); + } + if constexpr (has_x) + v[x_idx] = 0; + if constexpr (has_a) + v[a_idx] = detail::encode_norm(P::comps[a_idx].bits, pix.a); + + uint8_t* p = buf.data[0] + by * buf.stride[0] + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h new file mode 100644 index 0000000..90c8b2f --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h @@ -0,0 +1,89 @@ +#pragma once + +// Packed YUV 4:2:2 (YUYV / YVYU / UYVY / VYUY): two pixels per 32-bit +// word, one shared chroma pair. The Layout uses two C::Y entries plus +// one each of C::U / C::V; we resolve the duplicate Y via +// find_pos<C::Y>(n). + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PackedYUVSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(L::h_sub == 2 && L::v_sub == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y0_idx = P::template find_pos<C::Y>(0); + static constexpr size_t y1_idx = P::template find_pos<C::Y>(1); + static constexpr size_t u_idx = P::template find_pos<C::U>(); + static constexpr size_t v_idx = P::template find_pos<C::V>(); + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + (x / 2) * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + const size_t y_pick = (x & 1) ? y1_idx : y0_idx; + // Both Y components share the same bit width, so the bit-width + // for y0 and y1 is identical — pick either. + return YUV16{ + detail::decode_norm(P::comps[y0_idx].bits, vals[y_pick]), + detail::decode_norm(P::comps[u_idx].bits, vals[u_idx]), + detail::decode_norm(P::comps[v_idx].bits, vals[v_idx]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct PackedYUVSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(L::h_sub == 2 && L::v_sub == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y0_idx = P::template find_pos<C::Y>(0); + static constexpr size_t y1_idx = P::template find_pos<C::Y>(1); + static constexpr size_t u_idx = P::template find_pos<C::U>(); + static constexpr size_t v_idx = P::template find_pos<C::V>(); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 2; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][2]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[y0_idx] = detail::encode_norm(P::comps[y0_idx].bits, block[0][0].y); + v[y1_idx] = detail::encode_norm(P::comps[y1_idx].bits, block[0][1].y); + // Integer chroma averaging in normalized-16 space. Truncates + // (no round-half-up). + v[u_idx] = detail::encode_norm(P::comps[u_idx].bits, uint16_t( + (uint32_t(block[0][0].u) + + uint32_t(block[0][1].u)) / 2)); + v[v_idx] = detail::encode_norm(P::comps[v_idx].bits, uint16_t( + (uint32_t(block[0][0].v) + + uint32_t(block[0][1].v)) / 2)); + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + (bx / 2) * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/planar.h b/subprojects/pixpat/pixpat-native/src/io/planar.h new file mode 100644 index 0000000..0dab685 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/planar.h @@ -0,0 +1,257 @@ +#pragma once + +// 3-plane planar YUV. Two flavours: +// +// PlanarSource / PlanarSink — YUV/YVU 420/422/444, single Y per word, +// single chroma per word. Chroma is averaged over h_sub × v_sub +// on write. +// +// MultiPixelPlanarSource / MultiPixelPlanarSink — T430, multi-pixel- +// per-word planar 4:4:4 (3 samples per uint32_t in each of 3 +// planes, plus 2-bit X padding). block_w = ppw, block_h = 1. +// +// Plane indices for Y / U / V are looked up via Layout::find_plane<C>(), +// so swap_uv layouts (YVU vs YUV) work without separate templates. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PlanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static YUV16 read(const Buffer<3>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane] + + x * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const uint8_t* up = buf.data[u_plane] + cy * buf.stride[u_plane] + + cx * UP::bytes_per_pixel; + const uint8_t* vp = buf.data[v_plane] + cy * buf.stride[v_plane] + + cx * VP::bytes_per_pixel; + const auto u_vals = UP::unpack(detail::load_word<UP>(up)); + const auto v_vals = VP::unpack(detail::load_word<VP>(vp)); + + return YUV16{ + detail::decode_norm(YP::comps[0].bits, y_vals[0]), + detail::decode_norm(UP::comps[0].bits, u_vals[0]), + detail::decode_norm(VP::comps[0].bits, v_vals[0]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct PlanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t block_h = L::v_sub; + static constexpr size_t block_w = L::h_sub; + + static void write_block(Buffer<3>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y per pixel. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[y_plane] + + (by + dy) * buf.stride[y_plane]; + for (size_t dx = 0; dx < block_w; ++dx) { + std::array<uint16_t, YP::num_comps> v{}; + v[0] = detail::encode_norm(YP::comps[0].bits, block[dy][dx].y); + detail::store_word<YP>( + y_row + (bx + dx) * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One averaged U and V sample per block. Integer truncation + // (no round-half-up). + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < block_w; ++dx) { + u_sum += block[dy][dx].u; + v_sum += block[dy][dx].v; + } + } + constexpr uint32_t n = block_h * block_w; + + const size_t cx = bx / L::h_sub; + const size_t cy = by / L::v_sub; + + std::array<uint16_t, UP::num_comps> uw{}; + uw[0] = detail::encode_norm(UP::comps[0].bits, uint16_t(u_sum / n)); + detail::store_word<UP>( + buf.data[u_plane] + cy * buf.stride[u_plane] + + cx * UP::bytes_per_pixel, + UP::pack(uw)); + + std::array<uint16_t, VP::num_comps> vw{}; + vw[0] = detail::encode_norm(VP::comps[0].bits, uint16_t(v_sum / n)); + detail::store_word<VP>( + buf.data[v_plane] + cy * buf.stride[v_plane] + + cx * VP::bytes_per_pixel, + VP::pack(vw)); + } +}; + +// T430-style 3-plane multi-pixel-per-word planar 4:4:4. Each plane has +// `ppw` samples of the same component (Y in plane 0, U in 1, V in 2 — +// or whichever ordering find_plane resolves) packed into a single +// storage word. block_w = ppw, block_h = 1. No chroma subsampling. +template <typename L> +struct MultiPixelPlanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + static_assert(L::h_sub == 1 && L::v_sub == 1); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t ppw = YP::template component_count<C::Y>(); + static_assert(ppw == UP::template component_count<C::U>()); + static_assert(ppw == VP::template component_count<C::V>()); + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits; + + static YUV16 read(const Buffer<3>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const size_t gx = x / ppw; + const size_t off = x % ppw; + + const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane] + + gx * YP::bytes_per_pixel; + const uint8_t* up = buf.data[u_plane] + y * buf.stride[u_plane] + + gx * UP::bytes_per_pixel; + const uint8_t* vp = buf.data[v_plane] + y * buf.stride[v_plane] + + gx * VP::bytes_per_pixel; + + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + const auto u_vals = UP::unpack(detail::load_word<UP>(up)); + const auto v_vals = VP::unpack(detail::load_word<VP>(vp)); + + return YUV16{ + detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(off)]), + detail::decode_norm(u_bits, u_vals[UP::template find_pos<C::U>(off)]), + detail::decode_norm(v_bits, v_vals[VP::template find_pos<C::V>(off)]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelPlanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + static_assert(L::h_sub == 1 && L::v_sub == 1); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t ppw = YP::template component_count<C::Y>(); + + static constexpr size_t y_x_idx = YP::template find_pos<C::X>(); + static constexpr size_t u_x_idx = UP::template find_pos<C::X>(); + static constexpr size_t v_x_idx = VP::template find_pos<C::X>(); + static constexpr bool y_has_x = (y_x_idx < YP::num_comps); + static constexpr bool u_has_x = (u_x_idx < UP::num_comps); + static constexpr bool v_has_x = (v_x_idx < VP::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppw; + + static void write_block(Buffer<3>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppw]) noexcept + { + std::array<uint16_t, YP::num_comps> yv{}; + std::array<uint16_t, UP::num_comps> uv{}; + std::array<uint16_t, VP::num_comps> vv{}; + + // All same-tag positions share the same bit width. + constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits; + constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits; + for (size_t i = 0; i < ppw; ++i) { + yv[YP::template find_pos<C::Y>(i)] = + detail::encode_norm(y_bits, block[0][i].y); + uv[UP::template find_pos<C::U>(i)] = + detail::encode_norm(u_bits, block[0][i].u); + vv[VP::template find_pos<C::V>(i)] = + detail::encode_norm(v_bits, block[0][i].v); + } + + if constexpr (y_has_x) yv[y_x_idx] = 0; + if constexpr (u_has_x) uv[u_x_idx] = 0; + if constexpr (v_has_x) vv[v_x_idx] = 0; + + const size_t gx = bx / ppw; + detail::store_word<YP>( + buf.data[y_plane] + by * buf.stride[y_plane] + + gx * YP::bytes_per_pixel, + YP::pack(yv)); + detail::store_word<UP>( + buf.data[u_plane] + by * buf.stride[u_plane] + + gx * UP::bytes_per_pixel, + UP::pack(uv)); + detail::store_word<VP>( + buf.data[v_plane] + by * buf.stride[v_plane] + + gx * VP::bytes_per_pixel, + VP::pack(vv)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/semiplanar.h b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h new file mode 100644 index 0000000..00e7731 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h @@ -0,0 +1,242 @@ +#pragma once + +// 2-plane semiplanar YUV. Two flavours: +// +// SemiplanarSource / SemiplanarSink — NV12/NV21/NV16/NV61, single +// pixel per Y storage word, single chroma pair per chroma word. +// +// MultiPixelSemiplanarSource / MultiPixelSemiplanarSink — P030/P230, +// multiple Y pixels per Y word and multiple chroma pairs per +// chroma word. The Y plane has `ppw_y = component_count<Y>()` Y +// samples per storage word; the chroma plane has `pairs = +// component_count<U>()` U/V pairs per storage word. block_w = +// pairs × h_sub, block_h = v_sub — each block exactly fills one +// chroma word. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct SemiplanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t y_idx = YP::template find_pos<C::Y>(); + static constexpr size_t u_idx = CP::template find_pos<C::U>(); + static constexpr size_t v_idx = CP::template find_pos<C::V>(); + + static YUV16 read(const Buffer<2>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* yp = buf.data[0] + y * buf.stride[0] + x * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel; + const auto c_vals = CP::unpack(detail::load_word<CP>(cp)); + + return YUV16{ + detail::decode_norm(YP::comps[y_idx].bits, y_vals[y_idx]), + detail::decode_norm(CP::comps[u_idx].bits, c_vals[u_idx]), + detail::decode_norm(CP::comps[v_idx].bits, c_vals[v_idx]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct SemiplanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t y_idx = YP::template find_pos<C::Y>(); + static constexpr size_t u_idx = CP::template find_pos<C::U>(); + static constexpr size_t v_idx = CP::template find_pos<C::V>(); + + static constexpr size_t block_h = L::v_sub; + static constexpr size_t block_w = L::h_sub; + + static void write_block(Buffer<2>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y per pixel. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[0] + (by + dy) * buf.stride[0]; + for (size_t dx = 0; dx < block_w; ++dx) { + std::array<uint16_t, YP::num_comps> v{}; + v[y_idx] = detail::encode_norm(YP::comps[y_idx].bits, + block[dy][dx].y); + detail::store_word<YP>( + y_row + (bx + dx) * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One averaged UV pair for the whole block. Integer truncation + // (no round-half-up). + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < block_w; ++dx) { + u_sum += block[dy][dx].u; + v_sum += block[dy][dx].v; + } + } + constexpr uint32_t n = block_h * block_w; + const uint16_t u_avg = uint16_t(u_sum / n); + const uint16_t v_avg = uint16_t(v_sum / n); + + std::array<uint16_t, CP::num_comps> uv{}; + uv[u_idx] = detail::encode_norm(CP::comps[u_idx].bits, u_avg); + uv[v_idx] = detail::encode_norm(CP::comps[v_idx].bits, v_avg); + + const size_t cx = bx / L::h_sub; + const size_t cy = by / L::v_sub; + uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel; + detail::store_word<CP>(cp, CP::pack(uv)); + } +}; + +// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). All Y +// components share the same bit width; same for U and V. +template <typename L> +struct MultiPixelSemiplanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t ppw_y = YP::template component_count<C::Y>(); + static constexpr size_t pairs = CP::template component_count<C::U>(); + static_assert(ppw_y >= 1 && pairs >= 1); + static_assert(pairs == CP::template component_count<C::V>()); + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits; + + static YUV16 read(const Buffer<2>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + // Y read. + const size_t y_gx = x / ppw_y; + const size_t y_off = x % ppw_y; + const uint8_t* yp = buf.data[0] + y * buf.stride[0] + + y_gx * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + // Chroma read. + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const size_t c_gx = cx / pairs; + const size_t c_off = cx % pairs; + const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + + c_gx * CP::bytes_per_pixel; + const auto c_vals = CP::unpack(detail::load_word<CP>(cp)); + + return YUV16{ + detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(y_off)]), + detail::decode_norm(u_bits, c_vals[CP::template find_pos<C::U>(c_off)]), + detail::decode_norm(v_bits, c_vals[CP::template find_pos<C::V>(c_off)]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelSemiplanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t ppw_y = YP::template component_count<C::Y>(); + static constexpr size_t pairs = CP::template component_count<C::U>(); + static_assert(ppw_y >= 1 && pairs >= 1); + + // One block exactly fills one chroma word: `pairs` chroma pairs, + // each covering h_sub luma columns × v_sub rows. + static constexpr size_t block_w = pairs * L::h_sub; + static constexpr size_t block_h = L::v_sub; + static_assert(block_w % ppw_y == 0, + "block width must be a multiple of Y-pixels-per-word"); + static constexpr size_t y_words_per_row = block_w / ppw_y; + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits; + + static void write_block(Buffer<2>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y plane: y_words_per_row Y-words per row, block_h rows. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[0] + + (by + dy) * buf.stride[0]; + for (size_t w = 0; w < y_words_per_row; ++w) { + std::array<uint16_t, YP::num_comps> v{}; + for (size_t i = 0; i < ppw_y; ++i) { + const size_t pos = YP::template find_pos<C::Y>(i); + v[pos] = detail::encode_norm( + y_bits, block[dy][w * ppw_y + i].y); + } + detail::store_word<YP>( + y_row + (bx / ppw_y + w) + * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One UV-word: `pairs` chroma pairs. Each pair averages h_sub + // horizontally × v_sub vertically luma values. + std::array<uint16_t, CP::num_comps> uv{}; + constexpr uint32_t n = L::h_sub * L::v_sub; + for (size_t p = 0; p < pairs; ++p) { + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < L::h_sub; ++dx) { + u_sum += block[dy][p * L::h_sub + dx].u; + v_sum += block[dy][p * L::h_sub + dx].v; + } + } + uv[CP::template find_pos<C::U>(p)] = + detail::encode_norm(u_bits, uint16_t(u_sum / n)); + uv[CP::template find_pos<C::V>(p)] = + detail::encode_norm(v_bits, uint16_t(v_sum / n)); + } + + const size_t cy = by / L::v_sub; + const size_t uv_word_idx = bx / block_w; + detail::store_word<CP>( + buf.data[1] + cy * buf.stride[1] + + uv_word_idx * CP::bytes_per_pixel, + CP::pack(uv)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/layout.h b/subprojects/pixpat/pixpat-native/src/layout.h new file mode 100644 index 0000000..d092bb1 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/layout.h @@ -0,0 +1,141 @@ +#pragma once + +#include <array> +#include <cstddef> +#include <cstdint> +#include <tuple> + +namespace pixpat +{ + +enum class ColorKind { RGB, YUV }; + +// Normalized inter-stage pixel types. 16-bit-per-component integer. +// N-bit storage values bit-replicate up to normalized 16-bit (so +// N-bit max maps to 0xFFFF); encoding back is a truncating +// `normalized >> (16 - N)`. See io/detail.h for the round-trip +// argument. Sources without an A component emit a=0; cross-color-kind +// ColorXfm resets a=0xFFFF; sinks with X write 0, sinks with A +// encode `a`. +struct RGB16 { + static constexpr ColorKind kind = ColorKind::RGB; + uint16_t r, g, b, a; +}; + +struct YUV16 { + static constexpr ColorKind kind = ColorKind::YUV; + uint16_t y, u, v, a; +}; + +inline constexpr uint16_t kNormMax = 0xFFFF; + +enum class C : uint8_t { X, A, R, G, B, Y, U, V }; + +struct Comp { + C c; + uint8_t bits; + uint8_t shift; +}; + +template <typename Storage, Comp... Cs> +struct Plane { + using storage_t = Storage; + + static constexpr size_t num_comps = sizeof...(Cs); + static constexpr std::array<Comp, num_comps> comps{ Cs ... }; + static constexpr size_t total_bits = (size_t(Cs.bits) + ... + 0); + static constexpr size_t storage_bits = sizeof(Storage) * 8; + static constexpr size_t bytes_per_pixel = (total_bits + 7) / 8; + + static_assert(total_bits <= storage_bits, "components overflow storage word"); + + // Index of the n-th component matching Tag, or num_comps if absent. + template <C Tag> + static constexpr size_t find_pos(size_t n = 0) + { + for (size_t i = 0; i < num_comps; ++i) { + if (comps[i].c == Tag) { + if (n == 0) + return i; + --n; + } + } + return num_comps; + } + + // Count of components matching Tag. Used to derive + // pixels_per_word for multi-pixel-per-storage formats (XYYY2101010, + // P030, ...). + template <C Tag> + static constexpr size_t component_count() + { + size_t cnt = 0; + for (size_t i = 0; i < num_comps; ++i) + if (comps[i].c == Tag) + ++cnt; + return cnt; + } + + // Mask each input value to its bit-width and OR-shift it into the + // storage word. The loop trip count and the comps[i] reads are + // compile-time constant, so the optimizer unrolls and folds. + static constexpr Storage pack(const std::array<uint16_t, num_comps>& v) noexcept + { + Storage out{}; + for (size_t i = 0; i < num_comps; ++i) { + const Storage mask = (Storage{ 1 } << comps[i].bits) - 1; + out |= Storage(v[i] & mask) << comps[i].shift; + } + return out; + } + + // Mirror of `pack`. + static constexpr std::array<uint16_t, num_comps> unpack(Storage word) noexcept + { + std::array<uint16_t, num_comps> out{}; + for (size_t i = 0; i < num_comps; ++i) { + const Storage mask = (Storage{ 1 } << comps[i].bits) - 1; + out[i] = uint16_t((word >> comps[i].shift) & mask); + } + return out; + } +}; + +template <ColorKind Kind, size_t Hsub, size_t Vsub, typename ... Planes> +struct Layout { + static constexpr ColorKind kind = Kind; + static constexpr size_t h_sub = Hsub; + static constexpr size_t v_sub = Vsub; + static constexpr size_t num_planes = sizeof...(Planes); + + template <size_t N> + using plane = std::tuple_element_t<N, std::tuple<Planes...> >; + + // Index of the first plane containing component Tag, or num_planes + // if no plane has it. Lets PlanarSource/Sink map C::U / C::V to a + // plane regardless of YUV vs YVU ordering. + // Comma-fold over plane indices: for each plane I check if it has + // Tag, and on the first hit assign `found = I`. Subsequent hits are + // suppressed by the `found == num_planes` guard. The whole fold + // evaluates to a discarded list of int 0s; the `found` capture + // carries the result out. + template <C Tag> + static constexpr size_t find_plane() + { + return [&]<size_t... I>(std::index_sequence<I...>) { + size_t found = num_planes; + ((plane<I>::template find_pos<Tag>() < plane<I>::num_comps + ? (found == num_planes ? (found = I, 0) : 0) + : 0), ...); + return found; + } (std::make_index_sequence<num_planes>{}); + } +}; + +template <size_t N> +struct Buffer { + std::array<uint8_t*, N> data; + std::array<size_t, N> stride; +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/params.h b/subprojects/pixpat/pixpat-native/src/params.h new file mode 100644 index 0000000..aa2be67 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/params.h @@ -0,0 +1,219 @@ +#pragma once + +#include <cctype> +#include <cstddef> +#include <cstdint> +#include <optional> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include "layout.h" + +namespace pixpat +{ + +// Pattern-specific parameters parsed from pixpat_pattern_opts::params. +// The wire format is a comma-separated list of "key=value" items; +// whitespace around tokens is trimmed, keys and values are +// case-insensitive ASCII, and neither may contain ',' or '='. +// Malformed input leaves ok() returning false — the pattern dispatcher +// fails the call when that happens. +// +// Patterns query keys by name via get() / get_int() / get_hex_color(). +// Unknown keys are ignored: each pattern handles forward compatibility, +// not the parser. +class Params +{ +public: + explicit Params(const char* csv); + + bool ok() const noexcept { + return ok_; + } + + std::optional<std::string_view> get(std::string_view key) const noexcept; + std::optional<int> get_int(std::string_view key) const noexcept; + std::optional<RGB16> get_hex_color(std::string_view key) const noexcept; + +private: + std::vector<std::pair<std::string, std::string> > kv_; + bool ok_{ true }; +}; + +namespace detail +{ + +inline char ascii_tolower(char c) noexcept +{ + return (c >= 'A' && c <= 'Z') ? char(c + ('a' - 'A')) : c; +} + +inline std::string_view trim(std::string_view s) noexcept +{ + while (!s.empty() && std::isspace(static_cast<unsigned char>(s.front()))) + s.remove_prefix(1); + while (!s.empty() && std::isspace(static_cast<unsigned char>(s.back()))) + s.remove_suffix(1); + return s; +} + +inline bool ieq(std::string_view a, std::string_view b) noexcept +{ + if (a.size() != b.size()) + return false; + for (size_t i = 0; i < a.size(); ++i) + if (ascii_tolower(a[i]) != ascii_tolower(b[i])) + return false; + return true; +} + +} // namespace detail + + +inline Params::Params(const char* csv) +{ + if (!csv || *csv == '\0') + return; + + std::string_view s(csv); + while (!s.empty()) { + const size_t comma = s.find(','); + std::string_view item = (comma == std::string_view::npos) + ? s : s.substr(0, comma); + s = (comma == std::string_view::npos) + ? std::string_view{} : s.substr(comma + 1); + + item = detail::trim(item); + if (item.empty()) { + ok_ = false; + return; + } + + const size_t eq = item.find('='); + if (eq == std::string_view::npos) { + ok_ = false; + return; + } + const std::string_view k = detail::trim(item.substr(0, eq)); + const std::string_view v = detail::trim(item.substr(eq + 1)); + if (k.empty()) { + ok_ = false; + return; + } + kv_.emplace_back(std::string(k), std::string(v)); + } +} + +inline std::optional<std::string_view> +Params::get(std::string_view key) const noexcept +{ + for (const auto& [k, v] : kv_) + if (detail::ieq(k, key)) + return std::string_view(v); + return std::nullopt; +} + +inline std::optional<int> +Params::get_int(std::string_view key) const noexcept +{ + auto v = get(key); + if (!v || v->empty()) + return std::nullopt; + int sign = 1; + size_t i = 0; + if ((*v)[0] == '-') { sign = -1; ++i; } + else if ((*v)[0] == '+') { ++i; } + if (i == v->size()) + return std::nullopt; + int out = 0; + for (; i < v->size(); ++i) { + const char c = (*v)[i]; + if (c < '0' || c > '9') + return std::nullopt; + out = out * 10 + (c - '0'); + } + return sign * out; +} + +// Parses a hex color string. The optional `0x`/`0X` prefix is allowed. +// The number of hex digits after the prefix selects the layout: +// 6 digits — 8-bit RRGGBB (alpha defaults to opaque) +// 8 digits — 8-bit AARRGGBB (alpha-first) +// 12 digits — 16-bit RRRRGGGGBBBB (alpha defaults to opaque) +// 16 digits — 16-bit AAAARRRRGGGGBBBB (alpha-first) +// 8-bit components are byte-replicated to the normalized 16-bit form +// (0xFF → 0xFFFF); 16-bit components are stored directly. Any other +// length, malformed digits, or stray separators yield std::nullopt. +inline std::optional<RGB16> +Params::get_hex_color(std::string_view key) const noexcept +{ + auto v = get(key); + if (!v) + return std::nullopt; + + std::string_view s = *v; + if (s.size() >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) + s.remove_prefix(2); + + const auto digit = [](char c) -> int { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; + }; + const auto parse_n = [&](size_t off, size_t n) -> std::optional<unsigned> { + unsigned out = 0; + for (size_t i = 0; i < n; ++i) { + const int d = digit(s[off + i]); + if (d < 0) + return std::nullopt; + out = (out << 4) | unsigned(d); + } + return out; + }; + + bool has_alpha; + bool is_16bpc; + switch (s.size()) { + case 6: has_alpha = false; is_16bpc = false; break; + case 8: has_alpha = true; is_16bpc = false; break; + case 12: has_alpha = false; is_16bpc = true; break; + case 16: has_alpha = true; is_16bpc = true; break; + default: return std::nullopt; + } + + const size_t per = is_16bpc ? 4 : 2; + const unsigned full = is_16bpc ? 0xFFFFu : 0xFFu; + unsigned a = full, r, g, b; + size_t off = 0; + if (has_alpha) { + auto av = parse_n(off, per); + if (!av) return std::nullopt; + a = *av; + off += per; + } + auto rv = parse_n(off, per); + if (!rv) return std::nullopt; + r = *rv; + off += per; + auto gv = parse_n(off, per); + if (!gv) return std::nullopt; + g = *gv; + off += per; + auto bv = parse_n(off, per); + if (!bv) return std::nullopt; + b = *bv; + + if (is_16bpc) { + return RGB16{ uint16_t(r), uint16_t(g), uint16_t(b), uint16_t(a) }; + } else { + const auto rep = [](unsigned x) noexcept { + return uint16_t((x << 8) | x); + }; + return RGB16{ rep(r), rep(g), rep(b), rep(a) }; + } +} + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/pattern.h b/subprojects/pixpat/pixpat-native/src/pattern.h new file mode 100644 index 0000000..fbee683 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pattern.h @@ -0,0 +1,597 @@ +#pragma once + +#include <cmath> +#include <cstdint> + +#include "color.h" +#include "layout.h" +#include "params.h" + +namespace pixpat::patterns +{ + +// Patterns emit opaque pixels (a=kNormMax) unless they encode their +// own alpha (e.g. `plain`'s ARGB form). Alpha-bearing sinks +// (ARGB8888 etc) therefore see the pattern's chosen alpha; convert +// paths propagate the source's actual `a` instead (a=0 for X-only +// sources). +// +// A pattern is an instance with: +// using Pixel = RGB16 | YUV16; +// explicit Pat(const Params&) noexcept; +// Pixel sample(size_t x, size_t y, size_t W, size_t H) const noexcept; +// bool ready() const noexcept; // optional, default true +// Patterns that don't read params ignore the constructor argument. + +namespace detail +{ +// 8-bit -> normalized 16 byte-replication. e.g. 255 -> 0xFFFF, +// 1 -> 0x0101. +constexpr RGB16 rgb8(uint8_t r, uint8_t g, uint8_t b) noexcept +{ + return RGB16{ + uint16_t((uint16_t(r) << 8) | r), + uint16_t((uint16_t(g) << 8) | g), + uint16_t((uint16_t(b) << 8) | b), + kNormMax, + }; +} + +// 12-bit -> normalized 16 bit-replication. +constexpr YUV16 yuv12(uint16_t y, uint16_t u, uint16_t v) noexcept +{ + return YUV16{ + uint16_t((y << 4) | (y >> 8)), + uint16_t((u << 4) | (u >> 8)), + uint16_t((v << 4) | (v >> 8)), + kNormMax, + }; +} +} // namespace detail + +// "kmstest" default pattern: white border + diagonals; blue rails on +// the top/left edges; red rails on the bottom/right; an 8-step color +// gradient block in the center. +struct Kmstest { + using Pixel = RGB16; + + explicit Kmstest(const Params&) noexcept { + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + using detail::rgb8; + const size_t mw = 20; + const size_t xm1 = mw; + const size_t xm2 = W - mw - 1; + const size_t ym1 = mw; + const size_t ym2 = H - mw - 1; + + if (x == xm1 || x == xm2 || y == ym1 || y == ym2) + return rgb8(255, 255, 255); + if (x < xm1 && y < ym1) + return rgb8(255, 255, 255); + if ((x == 0 || x == W - 1) && (y < ym1 || y > ym2)) + return rgb8(255, 255, 255); + if ((y == 0 || y == H - 1) && (x < xm1 || x > xm2)) + return rgb8(255, 255, 255); + if (x < xm1 && (y > ym1 && y < ym2)) + return rgb8(0, 0, 255); + if (y < ym1 && (x > xm1 && x < xm2)) + return rgb8(0, 0, 255); + if (x > xm2 && (y > ym1 && y < ym2)) + return rgb8(255, 0, 0); + if (y > ym2 && (x > xm1 && x < xm2)) + return rgb8(255, 0, 0); + if (x > xm1 && x < xm2 && y > ym1 && y < ym2) { + if (x == y || W - x == H - y) + return rgb8(255, 255, 255); + if (W - x - 1 == y || x == H - y - 1) + return rgb8(255, 255, 255); + const int t = int((x - xm1 - 1) * 8 / (xm2 - xm1 - 1)); + const unsigned c = unsigned((y - ym1 - 1) % 256); + unsigned r = 0, g = 0, b = 0; + switch (t) { + case 0: r = c; break; + case 1: g = c; break; + case 2: b = c; break; + case 3: g = b = c; break; + case 4: r = b = c; break; + case 5: r = g = c; break; + case 6: r = g = b = c; break; + case 7: break; + } + return rgb8(uint8_t(r), uint8_t(g), uint8_t(b)); + } + return rgb8(0, 0, 0); + } +}; + +// SMPTE RP 219-1:2014 color bar pattern. Emits YUV directly with +// pixel values defined by the spec in BT.709 / Limited range. Pass +// `rec=BT709, range=Limited` for spec-correct output; other ColorSpec +// settings produce visibly-wrong colors when the sink crosses to RGB +// (the matrix the caller picked is applied to BT.709-encoded values). +// Callers are trusted — pixpat does not override the spec for them. +struct Smpte { + using Pixel = YUV16; + + explicit Smpte(const Params&) noexcept { + } + + YUV16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + using detail::yuv12; + constexpr YUV16 gray40 = yuv12(1658, 2048, 2048); + constexpr YUV16 white75 = yuv12(2884, 2048, 2048); + constexpr YUV16 yellow75 = yuv12(2694, 704, 2171); + constexpr YUV16 cyan75 = yuv12(2325, 2356, 704); + constexpr YUV16 green75 = yuv12(2136, 1012, 827); + constexpr YUV16 magenta75 = yuv12(1004, 3084, 3269); + constexpr YUV16 red75 = yuv12( 815, 1740, 3392); + constexpr YUV16 blue75 = yuv12( 446, 3392, 1925); + constexpr YUV16 cyan100 = yuv12(3015, 2459, 256); + constexpr YUV16 blue100 = yuv12( 509, 3840, 1884); + constexpr YUV16 yellow100 = yuv12(3507, 256, 2212); + constexpr YUV16 black = yuv12( 256, 2048, 2048); + constexpr YUV16 white100 = yuv12(3760, 2048, 2048); + constexpr YUV16 red100 = yuv12(1001, 1637, 3840); + constexpr YUV16 gray15 = yuv12( 782, 2048, 2048); + + constexpr YUV16 black_m2 = yuv12( 186, 2048, 2048); + constexpr YUV16 black_p2 = yuv12( 326, 2048, 2048); + constexpr YUV16 black_p4 = yuv12( 396, 2048, 2048); + + constexpr size_t M = 1024; + const size_t xs = x * M; + const size_t a = W * M; + const size_t c = (a * 3 / 4) / 7; + const size_t d = a / 8; + + const size_t pattern1_height = (H * 7) / 12; + const size_t pattern2_height = pattern1_height + (H / 12); + const size_t pattern3_height = pattern2_height + (H / 12); + + if (y < pattern1_height) { + if (xs < d || xs >= (a - d)) + return gray40; + const size_t bar = (xs - d) / c; + switch (bar) { + case 0: return white75; + case 1: return yellow75; + case 2: return cyan75; + case 3: return green75; + case 4: return magenta75; + case 5: return red75; + default: return blue75; + } + } + + if (y < pattern2_height) { + if (xs < d) return cyan100; + if (xs >= (a - d)) return blue100; + return white75; + } + + if (y < pattern3_height) { + if (xs < d) return yellow100; + if (xs >= (a - d)) return red100; + const size_t ramp_w = a - 2 * d; + const size_t ramp_x = xs - d; + const uint16_t y_val = uint16_t(256 + (3760 - 256) * ramp_x / ramp_w); + return yuv12(y_val, 2048, 2048); + } + + // pattern4 (PLUGE) + const size_t c0 = d; + const size_t c1 = c0 + c * 3 / 2; + const size_t c2 = c1 + 2 * c; + const size_t c3 = c2 + c * 5 / 6; + + if (xs < c0) return gray15; + if (xs < c1) return black; + if (xs < c2) return white100; + if (xs < c3) return black; + if (xs >= a - d) return gray15; + if (xs >= a - d - c) return black; + + const size_t step = (xs - c3) / (c / 3); + switch (step) { + case 0: return black_m2; + case 1: return black; + case 2: return black_p2; + case 3: return black; + default: return black_p4; + } + } +}; + +// Solid fill from a hex color string. Reads `color=<hex>` from +// params; the value is parsed by Params::get_hex_color (8/16-bpc, +// alpha-first if present, optional `0x` prefix). Missing or +// malformed `color` leaves ready()=false and the dispatcher fails +// the call. +struct Plain { + using Pixel = RGB16; + + explicit Plain(const Params& p) noexcept + { + if (auto c = p.get_hex_color("color")) { + color_ = *c; + ready_ = true; + } + } + + bool ready() const noexcept { + return ready_; + } + + RGB16 sample(size_t, size_t, size_t, size_t) const noexcept + { + return color_; + } + +private: + RGB16 color_{}; + bool ready_{ false }; +}; + +namespace detail +{ +// Linear ramp 0..kNormMax across [0, span-1]. span<=1 returns kNormMax. +constexpr uint16_t ramp16(size_t pos, size_t span) noexcept +{ + if (span <= 1) + return kNormMax; + return uint16_t((uint64_t(pos) * kNormMax) / (span - 1)); +} +} // namespace detail + +// Black/white checkerboard. Reads optional `cell=<N>` (positive +// integer; default 8) for cell size in pixels. +struct Checker { + using Pixel = RGB16; + + explicit Checker(const Params& p) noexcept + { + if (p.get("cell")) { + auto n = p.get_int("cell"); + if (!n || *n <= 0) { + ready_ = false; + return; + } + cell_ = size_t(*n); + } + } + + bool ready() const noexcept { + return ready_; + } + + RGB16 sample(size_t x, size_t y, size_t, size_t) const noexcept + { + const bool dark = (((x / cell_) ^ (y / cell_)) & 1u) != 0; + return dark ? RGB16{ 0, 0, 0, kNormMax } + : RGB16{ kNormMax, kNormMax, kNormMax, kNormMax }; + } + +private: + size_t cell_{ 8 }; + bool ready_{ true }; +}; + +namespace detail +{ +// Pick one of (R, G, B, gray) given a stripe index in [0, 4) and a +// scalar ramp value. Used by hramp/vramp. +constexpr RGB16 rgb_gray_stripe(size_t stripe, uint16_t v) noexcept +{ + switch (stripe) { + case 0: return RGB16{ v, 0, 0, kNormMax }; + case 1: return RGB16{ 0, v, 0, kNormMax }; + case 2: return RGB16{ 0, 0, v, kNormMax }; + default: return RGB16{ v, v, v, kNormMax }; + } +} +} // namespace detail + +// Four horizontal stripes — R, G, B, gray — each a 0..max ramp +// along x. Per-channel and luma quantization in one pattern. +struct Hramp { + using Pixel = RGB16; + + explicit Hramp(const Params&) noexcept { + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + const size_t stripe = (H == 0) ? 0 : (y * 4) / H; + return detail::rgb_gray_stripe(stripe, detail::ramp16(x, W)); + } +}; + +// Four vertical columns — R, G, B, gray — each a 0..max ramp +// along y. Same coverage as hramp, rotated 90°. +struct Vramp { + using Pixel = RGB16; + + explicit Vramp(const Params&) noexcept { + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + const size_t col = (W == 0) ? 0 : (x * 4) / W; + return detail::rgb_gray_stripe(col, detail::ramp16(y, H)); + } +}; + +// Diagonal RGB ramp: R sweeps with x, G with y, B with x+y. +struct Dramp { + using Pixel = RGB16; + + explicit Dramp(const Params&) noexcept { + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + const uint16_t r = detail::ramp16(x, W); + const uint16_t g = detail::ramp16(y, H); + const size_t span = (W + H >= 2) ? (W + H - 1) : 1; + const uint16_t b = detail::ramp16(x + y, span); + return RGB16{ r, g, b, kNormMax }; + } +}; + +namespace detail +{ +// Seven-region color sequence used by hbar/vbar: +// white, red, white, green, white, blue, white. The white separators +// between R/G/B make per-channel offsets at the band boundaries +// visible. +constexpr RGB16 bar_color7(size_t band) noexcept +{ + switch (band) { + case 1: return rgb8(255, 0, 0); + case 3: return rgb8( 0, 255, 0); + case 5: return rgb8( 0, 0, 255); + default: return rgb8(255, 255, 255); + } +} +} // namespace detail + +// Vertical bar (full image height, narrow along x) over a black +// background. `pos` is the left edge in pixels (signed; negative +// values clip at the left edge); `width` is the bar thickness in +// pixels (default 32). The bar is split into 7 equal-height regions +// colored white/red/white/green/white/blue/white. +struct VBarRGB { + using Pixel = RGB16; + + explicit VBarRGB(const Params& p) noexcept + { + auto pp = p.get_int("pos"); + if (!pp) { + ready_ = false; + return; + } + pos_ = *pp; + if (p.get("width")) { + auto w = p.get_int("width"); + if (!w || *w <= 0) { + ready_ = false; + return; + } + width_ = size_t(*w); + } + } + + bool ready() const noexcept { + return ready_; + } + + RGB16 sample(size_t x, size_t y, size_t, size_t H) const noexcept + { + const long long sx = static_cast<long long>(x); + const long long lo = pos_; + const long long hi = lo + static_cast<long long>(width_); + if (sx < lo || sx >= hi) + return detail::rgb8(0, 0, 0); + const size_t band = (H == 0) ? 0 : (y * 7) / H; + return detail::bar_color7(band); + } + +private: + int pos_{}; + size_t width_{ 32 }; + bool ready_{ true }; +}; + +// Horizontal bar: vbar rotated 90°. `pos` is the top edge in pixels; +// `width` is the bar thickness in pixels (default 32). The bar spans +// the full image width and is split into 7 equal-width regions +// colored white/red/white/green/white/blue/white. +struct HBarRGB { + using Pixel = RGB16; + + explicit HBarRGB(const Params& p) noexcept + { + auto pp = p.get_int("pos"); + if (!pp) { + ready_ = false; + return; + } + pos_ = *pp; + if (p.get("width")) { + auto w = p.get_int("width"); + if (!w || *w <= 0) { + ready_ = false; + return; + } + width_ = size_t(*w); + } + } + + bool ready() const noexcept { + return ready_; + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t) const noexcept + { + const long long sy = static_cast<long long>(y); + const long long lo = pos_; + const long long hi = lo + static_cast<long long>(width_); + if (sy < lo || sy >= hi) + return detail::rgb8(0, 0, 0); + const size_t band = (W == 0) ? 0 : (x * 7) / W; + return detail::bar_color7(band); + } + +private: + int pos_{}; + size_t width_{ 32 }; + bool ready_{ true }; +}; + +// Same shape as VBarRGB but emits YUV16 directly. The five unique colors +// (black bg + white/red/green/blue bar regions) are precomputed from +// `spec` at construction so the cross-kind pass is a no-op when the +// sink is YUV. Use the RGB-native `VBarRGB` for RGB sinks instead — it +// avoids the YUV→RGB pass that this variant would incur there. +struct VBarYUV { + using Pixel = YUV16; + + explicit VBarYUV(const Params& p, ColorSpec spec) noexcept + { + auto pp = p.get_int("pos"); + if (!pp) { + ready_ = false; + return; + } + pos_ = *pp; + if (p.get("width")) { + auto w = p.get_int("width"); + if (!w || *w <= 0) { + ready_ = false; + return; + } + width_ = size_t(*w); + } + const ColorCoeffs c = coeffs_for(spec); + using X = ColorXfm<RGB16, YUV16>; + bg_ = X::apply(detail::rgb8( 0, 0, 0), c); + bands_[0] = X::apply(detail::rgb8(255, 255, 255), c); + bands_[1] = X::apply(detail::rgb8(255, 0, 0), c); + bands_[2] = bands_[0]; + bands_[3] = X::apply(detail::rgb8( 0, 255, 0), c); + bands_[4] = bands_[0]; + bands_[5] = X::apply(detail::rgb8( 0, 0, 255), c); + bands_[6] = bands_[0]; + } + + bool ready() const noexcept { + return ready_; + } + + YUV16 sample(size_t x, size_t y, size_t, size_t H) const noexcept + { + const long long sx = static_cast<long long>(x); + const long long lo = pos_; + const long long hi = lo + static_cast<long long>(width_); + if (sx < lo || sx >= hi) + return bg_; + const size_t band = (H == 0) ? 0 : (y * 7) / H; + return bands_[band]; + } + +private: + YUV16 bg_{}; + YUV16 bands_[7]{}; + int pos_{}; + size_t width_{ 32 }; + bool ready_{ true }; +}; + +// YUV-native counterpart to HBarRGB. See VBarYUV. +struct HBarYUV { + using Pixel = YUV16; + + explicit HBarYUV(const Params& p, ColorSpec spec) noexcept + { + auto pp = p.get_int("pos"); + if (!pp) { + ready_ = false; + return; + } + pos_ = *pp; + if (p.get("width")) { + auto w = p.get_int("width"); + if (!w || *w <= 0) { + ready_ = false; + return; + } + width_ = size_t(*w); + } + const ColorCoeffs c = coeffs_for(spec); + using X = ColorXfm<RGB16, YUV16>; + bg_ = X::apply(detail::rgb8( 0, 0, 0), c); + bands_[0] = X::apply(detail::rgb8(255, 255, 255), c); + bands_[1] = X::apply(detail::rgb8(255, 0, 0), c); + bands_[2] = bands_[0]; + bands_[3] = X::apply(detail::rgb8( 0, 255, 0), c); + bands_[4] = bands_[0]; + bands_[5] = X::apply(detail::rgb8( 0, 0, 255), c); + bands_[6] = bands_[0]; + } + + bool ready() const noexcept { + return ready_; + } + + YUV16 sample(size_t x, size_t y, size_t W, size_t) const noexcept + { + const long long sy = static_cast<long long>(y); + const long long lo = pos_; + const long long hi = lo + static_cast<long long>(width_); + if (sy < lo || sy >= hi) + return bg_; + const size_t band = (W == 0) ? 0 : (x * 7) / W; + return bands_[band]; + } + +private: + YUV16 bg_{}; + YUV16 bands_[7]{}; + int pos_{}; + size_t width_{ 32 }; + bool ready_{ true }; +}; + +// Centered radial cosine zone plate: 0.5 + 0.5 * cos(k * (cx² + cy²)) +// with cx, cy measured from the image center and k chosen so the +// local frequency hits Nyquist at the longer edge — i.e. the pattern +// uses every spatial frequency the grid can resolve. +struct Zoneplate { + using Pixel = RGB16; + + explicit Zoneplate(const Params&) noexcept { + } + + RGB16 sample(size_t x, size_t y, size_t W, size_t H) const noexcept + { + const double max_dim = double(W > H ? W : H); + // Local frequency d(k r²)/dr = 2 k r. At r = max_dim/2 the + // frequency reaches π/pixel (Nyquist), giving k = π / max_dim. + const double k = 3.14159265358979323846 / (max_dim > 0 ? max_dim : 1.0); + const double cx = double(x) - 0.5 * double(W); + const double cy = double(y) - 0.5 * double(H); + const double phase = k * (cx * cx + cy * cy); + const double v = 0.5 + 0.5 * std::cos(phase); + const double scaled = v * 65535.0; + const uint16_t g = (scaled < 0.0) ? uint16_t(0) + : (scaled > 65535.0) ? kNormMax + : uint16_t(scaled + 0.5); + return RGB16{ g, g, g, kNormMax }; + } +}; + +} // namespace pixpat::patterns diff --git a/subprojects/pixpat/pixpat-native/src/pattern_catalog.h b/subprojects/pixpat/pixpat-native/src/pattern_catalog.h new file mode 100644 index 0000000..6576b2b --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pattern_catalog.h @@ -0,0 +1,64 @@ +#pragma once + +// Catalog of every named pattern the C++ side knows. Mirrors the +// shape of format_catalog.h. The X-macro is a list of +// (Label, RgbType, YuvType, "name") rows: +// +// X(Label, RgbType, YuvType, "name") +// +// `Label` is the C++ identifier doubling as the PatternId enum value +// and the s_pattern_caps[] index. `RgbType` and `YuvType` resolve to +// classes in `pixpat::patterns::` (defined in pattern.h) that satisfy +// the pattern interface (sample(), Pixel) — one per color kind. Use +// `void` if the pattern has no variant in that kind. At least one +// must be non-void. When both are present, dispatch_draw_pattern +// picks the variant matching the sink's color kind so the cross-kind +// pass is a no-op; when only one is present, the pipeline runs the +// cross-kind pass for the opposite-kind sinks. `name` is the +// lowercase identifier exposed via the C ABI. +// +// Adding a pattern = a row here AND its class(es) in pattern.h. The +// codegen (pixpat-native/codegen/gen_pixpat.py) parses this X-macro +// to learn the pattern set; pixpat_pattern.cpp re-expands it to build +// the dispatch arms and the default-pattern fallback. + +#include <cstddef> +#include <cstdint> + +namespace pixpat +{ + +#define PIXPAT_PATTERN_LIST(X) \ + X(Kmstest, Kmstest, void, "kmstest") \ + X(Smpte, void, Smpte, "smpte") \ + X(Plain, Plain, void, "plain") \ + X(Checker, Checker, void, "checker") \ + X(Hramp, Hramp, void, "hramp") \ + X(Vramp, Vramp, void, "vramp") \ + X(HBar, HBarRGB, HBarYUV, "hbar") \ + X(VBar, VBarRGB, VBarYUV, "vbar") \ + X(Dramp, Dramp, void, "dramp") \ + X(Zoneplate, Zoneplate, void, "zoneplate") + +enum class PatternId : uint8_t { +#define X(label, rgb, yuv, name) label, + PIXPAT_PATTERN_LIST(X) +#undef X + Unknown, +}; + +struct PatternEntry { + const char* name; + PatternId id; +}; + +inline constexpr PatternEntry s_pattern_table[] = { +#define X(label, rgb, yuv, name) { name, PatternId::label }, + PIXPAT_PATTERN_LIST(X) +#undef X +}; + +inline constexpr size_t s_pattern_catalog_count = + sizeof(s_pattern_table) / sizeof(s_pattern_table[0]); + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/pipeline.h b/subprojects/pixpat/pixpat-native/src/pipeline.h new file mode 100644 index 0000000..09e13bc --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pipeline.h @@ -0,0 +1,44 @@ +#pragma once + +#include <cstddef> + +#include "color.h" +#include "layout.h" + +// Inlined source → color → sink composition. The intermediate Pixel +// values stay in registers across stages; there is no normalized RGB16 +// or YUV16 buffer between source and sink. Block size is dictated by +// the sink: 1x1 for non-subsampled formats, h_sub × v_sub for chroma- +// subsampled ones. + +namespace pixpat +{ + +template <typename Source, typename Sink> +struct Converter { + using Xfm = ColorXfm<typename Source::Pixel, typename Sink::Pixel>; + static constexpr size_t bh = Sink::block_h; + static constexpr size_t bw = Sink::block_w; + + static void run(const Buffer<Source::Layout::num_planes>& src, + Buffer<Sink::Layout::num_planes>& dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) noexcept + { + const ColorCoeffs c = coeffs_for(spec); + for (size_t by = by_start; by < by_end; by += bh) { + for (size_t bx = 0; bx < W; bx += bw) { + typename Sink::Pixel block[bh][bw]; + for (size_t dy = 0; dy < bh; ++dy) + for (size_t dx = 0; dx < bw; ++dx) + block[dy][dx] = Xfm::apply( + Source::read(src, bx + dx, by + dy, + W, H), c); + Sink::write_block(dst, bx, by, block); + } + } + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/pixpat.cpp b/subprojects/pixpat/pixpat-native/src/pixpat.cpp new file mode 100644 index 0000000..ac21fac --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pixpat.cpp @@ -0,0 +1,355 @@ +// pixpat: extern "C" entry points + runtime format dispatch. +// +// The format catalog (X-macro PIXPAT_FORMAT_LIST + FormatId enum + +// s_format_table) is hand-written in format_catalog.h. The generator +// (pixpat-native/codegen/gen_pixpat.py) reads the same X-macro and +// the user TOML and emits the per-config bits: +// +// pixpat_config.h — PIXPAT_FEATURE_PATTERN / _CONVERT +// pixpat_caps.inc — s_format_caps[] (per-format readable / writable / +// hot_src / hot_dst, indexed by FormatId) and +// s_pattern_caps[] (per-pattern enabled flag). +// +// The convert and pattern dispatch (dispatch_convert in +// pixpat_convert.cpp, dispatch_draw_pattern in pixpat_pattern.cpp) is +// hand-written and consumes s_format_caps / s_pattern_caps via +// `if constexpr` on the per-row constexpr fields. +// +// s_format_info is built here, once, by re-expanding the catalog +// X-macro through unpack_for / pack_for / snk_block_h_for / +// snk_block_w_for. Those constexpr helpers use `if constexpr` on the +// per-format readable / writable flags from s_format_caps to either +// take the address of unpack_to_norm / pack_from_norm or fall back to +// nullptr (or 0). Because they're function templates, the discarded +// branch is never instantiated, so disabled-direction templates +// produce no code. +// +// Feature gating is meson-side: pixpat_pattern.cpp / pixpat_convert.cpp +// are added to the source list only when their feature is enabled. This +// file's entry points always exist; they call the bridge functions +// dispatch_draw_pattern / dispatch_convert under `if constexpr +// (kFeatureXxx)`. The discarded if-constexpr branch produces no symbol +// reference, so when the matching TU is absent the link still succeeds +// and the entry point returns -1 instead. + +#include <pixpat/pixpat.h> + +#include <algorithm> +#include <cstdint> +#include <cstring> + +#include "pixpat_config.h" + +#include "color.h" +#include "error.h" +#include "format_catalog.h" +#include "formats.h" +#include "io.h" +#include "layout.h" +#include "params.h" +#include "pattern.h" +#include "pixpat_internal.h" +#include "threading.h" + +namespace pixpat +{ + +inline constexpr bool kFeaturePattern = PIXPAT_FEATURE_PATTERN; +inline constexpr bool kFeatureConvert = PIXPAT_FEATURE_CONVERT; + +static FormatId lookup_format(const char* name) noexcept +{ + if (!name) + return FormatId::Unknown; + for (const auto& e : s_format_table) + if (std::strcmp(e.name, name) == 0) + return e.id; + return FormatId::Unknown; +} + +// Per-source: fill `bh` rows of normalized pixels by calling Src::read. +// Address is taken in s_format_info[] for every readable format. When +// no format is readable (convert disabled) no specialization is +// instantiated, so this template emits no code. +template <typename Src> +static void unpack_to_norm(uint8_t* norm, const pixpat_buffer* src, + size_t by, size_t bh, size_t W) noexcept +{ + using P = typename Src::Pixel; + auto sb = make_buffer<typename Src::Layout>(src); + auto* dst = reinterpret_cast<P*>(norm); + const size_t H = src->height; + for (size_t dy = 0; dy < bh; ++dy) + for (size_t x = 0; x < W; ++x) + dst[dy * W + x] = Src::read(sb, x, by + dy, W, H); +} + +// Per-sink: re-block `Snk::block_h × W` of normalized pixels and call +// Sink::write_block. Snk's block_h dictates how many normalized rows +// the caller has to have prepared. Used by the normalized pivot for +// both convert (cold path) and pattern. +template <typename Snk> +static void pack_from_norm(const pixpat_buffer* dst, + const uint8_t* norm, + size_t by, size_t W) noexcept +{ + using P = typename Snk::Pixel; + constexpr size_t bh = Snk::block_h; + constexpr size_t bw = Snk::block_w; + auto db = make_buffer<typename Snk::Layout>(dst); + auto* src = reinterpret_cast<const P*>(norm); + for (size_t bx = 0; bx < W; bx += bw) { + P block[bh][bw]; + for (size_t dy = 0; dy < bh; ++dy) + for (size_t dx = 0; dx < bw; ++dx) + block[dy][dx] = src[dy * W + bx + dx]; + Snk::write_block(db, bx, by, block); + } +} + +// Generated: s_format_caps[] indexed by FormatId, plus s_pattern_* / +// DefaultPattern (used only by pixpat_pattern.cpp; harmless here). +#include "pixpat_caps.inc" + +static_assert(sizeof(s_format_caps) / sizeof(s_format_caps[0]) == s_format_catalog_count, + "s_format_caps must cover the full catalog"); + +// `if constexpr` keeps disabled-direction function-template bodies +// uninstantiated. Taking `&unpack_to_norm<Src>` / `&pack_from_norm<Snk>` +// forces the function body to be emitted; without the gate every +// catalog format would carry unpack and pack code regardless of its +// readable / writable bit. Snk::block_h / Snk::block_w are constexpr +// scalars — no body, no emission — so they're inlined directly in the +// initializer below, without a helper. +template <bool Read, typename Src> +static constexpr UnpackFn unpack_for() noexcept +{ + if constexpr (Read) + return &unpack_to_norm<Src>; + else + return nullptr; +} + +template <bool Write, typename Snk> +static constexpr PackFn pack_for() noexcept +{ + if constexpr (Write) + return &pack_from_norm<Snk>; + else + return nullptr; +} + +const FormatInfo s_format_info[] = { +#define CAPS(name) s_format_caps[size_t(FormatId::name)] +#define X(name) \ + { \ + unpack_for<CAPS(name).readable, formats::name::Source>(), \ + pack_for<CAPS(name).writable, formats::name::Sink>(), \ + formats::name::kind, \ + uint8_t(formats::name::h_sub), \ + uint8_t(formats::name::v_sub), \ + uint8_t(formats::name::Sink::block_h), \ + uint8_t(formats::name::Sink::block_w), \ + }, + PIXPAT_FORMAT_LIST(X) +#undef X +#undef CAPS +}; +static_assert(sizeof(s_format_info) / sizeof(s_format_info[0]) == s_format_catalog_count, + "s_format_info must cover the full catalog"); + +// validate_* / parse_spec are only reached from inside the entry points' +// `if constexpr (kFeatureXxx)` true branches. With a feature disabled, +// its caller's branch is discarded and the helper becomes unreferenced; +// require_readable is convert-only. [[maybe_unused]] keeps +// -Wunused-function (and clang's -Wunneeded-internal-declaration) quiet. +[[maybe_unused]] static void validate_buffer(const pixpat_buffer* b) +{ + if (!b) + throw invalid_argument("null buffer"); + if (b->width == 0 || b->height == 0) + throw invalid_argument("zero-sized buffer"); +} + +[[maybe_unused]] static FormatId validate_format(const char* name) +{ + auto id = lookup_format(name); + if (id == FormatId::Unknown) + throw invalid_argument("unknown format"); + return id; +} + +[[maybe_unused]] static void require_writable(FormatId id) +{ + if (s_format_info[size_t(id)].pack == nullptr) + throw invalid_argument("format not enabled as a sink in this build"); +} + +[[maybe_unused]] static void require_readable(FormatId id) +{ + if (s_format_info[size_t(id)].unpack == nullptr) + throw invalid_argument("format not enabled as a source in this build"); +} + +[[maybe_unused]] static unsigned validate_thread_count(int n) +{ + if (n < 0) + throw invalid_argument("negative num_threads"); + return n > 0 ? static_cast<unsigned>(n) : default_thread_count(); +} + +// Map the C-side pixpat_rec / pixpat_range enums (defined in +// pixpat.h with explicit values 0/1/2 for rec, 0/1 for range) onto +// the internal pixpat::Rec / pixpat::Range. Out-of-range values fall +// back to BT.601 / Limited — matching the zero-initialised opts +// struct and kDefaultColorSpec. +[[maybe_unused]] static ColorSpec parse_spec(int rec_in, int range_in) noexcept +{ + Rec rec; + switch (rec_in) { + case PIXPAT_REC_BT709: rec = Rec::BT709; break; + case PIXPAT_REC_BT2020: rec = Rec::BT2020; break; + default: rec = Rec::BT601; break; + } + Range range = (range_in == PIXPAT_RANGE_FULL) ? Range::Full : Range::Limited; + return ColorSpec{ rec, range }; +} + +} // namespace pixpat + +// Marks the C entry points as part of the public ABI: restores default +// visibility against the build-wide -fvisibility=hidden, so they are +// exported from libpixpat.so. +#define PIXPAT_API __attribute__((visibility("default"))) + +extern "C" { + +PIXPAT_API int pixpat_draw_pattern(const pixpat_buffer* dst, + const char* pattern, + const pixpat_pattern_opts* opts) +{ + if constexpr (pixpat::kFeaturePattern) { + try { + pixpat::validate_buffer(dst); + auto id = pixpat::validate_format(dst->format); + pixpat::require_writable(id); + const auto& di = pixpat::s_format_info[size_t(id)]; + if (dst->width % di.snk_block_w != 0 || + dst->height % di.snk_block_h != 0) + throw pixpat::invalid_argument( + "dimensions not aligned to format block"); + const unsigned n_threads = opts + ? pixpat::validate_thread_count(opts->num_threads) + : pixpat::default_thread_count(); + const pixpat::ColorSpec spec = opts + ? pixpat::parse_spec(opts->rec, opts->range) + : pixpat::kDefaultColorSpec; + const pixpat::Params params(opts ? opts->params : nullptr); + if (!params.ok()) + throw pixpat::invalid_argument("malformed opts->params"); + + pixpat::run_stripes(dst->height, di.snk_block_h, n_threads, + [&](size_t y0, size_t y1) { + pixpat::dispatch_draw_pattern( + id, pattern, params, dst, + dst->width, dst->height, y0, y1, spec); + }); + return 0; + } catch (const std::exception&) { + return -1; + } + } else { + (void)dst; + (void)pattern; + (void)opts; + return -1; + } +} + +PIXPAT_API int pixpat_convert(const pixpat_buffer* dst, + const pixpat_buffer* src, + const pixpat_convert_opts* opts) +{ + if constexpr (pixpat::kFeatureConvert) { + try { + pixpat::validate_buffer(dst); + pixpat::validate_buffer(src); + if (src->width != dst->width || src->height != dst->height) + throw pixpat::invalid_argument("src/dst dimensions differ"); + + auto src_id = pixpat::validate_format(src->format); + auto dst_id = pixpat::validate_format(dst->format); + pixpat::require_readable(src_id); + pixpat::require_writable(dst_id); + + const auto& si = pixpat::s_format_info[size_t(src_id)]; + const auto& di = pixpat::s_format_info[size_t(dst_id)]; + // Each constraint must hold independently — checking only + // max() would miss e.g. h_sub=2 vs snk_block_w=3 with W=3. + if (src->width % si.h_sub != 0 || src->height % si.v_sub != 0 || + src->width % di.h_sub != 0 || src->height % di.v_sub != 0 || + src->width % di.snk_block_w != 0 || src->height % di.snk_block_h != 0) + throw pixpat::invalid_argument( + "dimensions not aligned to format subsampling"); + // run_stripes only needs the v dimension. Stripes must align + // to si.v_sub (source reads) and di.snk_block_h (sink block + // loop); for pixpat's catalog these are powers-of-two and + // max == LCM. + const unsigned vs = std::max({ unsigned(si.v_sub), + unsigned(di.v_sub), + unsigned(di.snk_block_h) }); + const unsigned n_threads = opts + ? pixpat::validate_thread_count(opts->num_threads) + : pixpat::default_thread_count(); + const pixpat::ColorSpec spec = opts + ? pixpat::parse_spec(opts->rec, opts->range) + : pixpat::kDefaultColorSpec; + + pixpat::run_stripes(src->height, vs, n_threads, + [&](size_t y0, size_t y1) { + pixpat::dispatch_convert(src_id, dst_id, src, dst, + src->width, src->height, + y0, y1, spec); + }); + return 0; + } catch (const std::exception&) { + return -1; + } + } else { + (void)dst; + (void)src; + (void)opts; + return -1; + } +} + +PIXPAT_API int pixpat_format_supported(const char* format) +{ + auto id = pixpat::lookup_format(format); + if (id == pixpat::FormatId::Unknown) + return 0; + return pixpat::s_format_caps[size_t(id)].enabled() ? 1 : 0; +} + +PIXPAT_API size_t pixpat_format_count(void) +{ + size_t n = 0; + for (const auto& c : pixpat::s_format_caps) + if (c.enabled()) + ++n; + return n; +} + +PIXPAT_API const char* pixpat_format_name(size_t idx) +{ + size_t n = 0; + for (size_t i = 0; i < pixpat::s_format_catalog_count; ++i) { + if (!pixpat::s_format_caps[i].enabled()) + continue; + if (n++ == idx) + return pixpat::s_format_table[i].name; + } + return nullptr; +} + +} // extern "C" diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp b/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp new file mode 100644 index 0000000..63461d8 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pixpat_convert.cpp @@ -0,0 +1,201 @@ +// Convert-feature TU: built only when PIXPAT_FEATURE_CONVERT is on +// (controlled by the meson source list). pixpat.cpp's pixpat_convert +// entry calls into dispatch_convert() below via if-constexpr; when the +// feature is off this file isn't compiled, the discarded if-constexpr +// branch emits no symbol reference, and the .so simply lacks these +// symbols. + +#include <cassert> +#include <cstdint> +#include <cstring> +#include <vector> + +#include "color.h" +#include "error.h" +#include "format_catalog.h" +#include "formats.h" +#include "io.h" +#include "layout.h" +#include "pattern.h" +#include "pipeline.h" +#include "pixpat_internal.h" + +namespace pixpat +{ + +template <typename Src, typename Snk> +static void run_convert_impl(const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + using SL = typename Src::Layout; + using DL = typename Snk::Layout; + // Entry point (pixpat_convert) validates W/H against each layout's + // h_sub / v_sub, plus the sink's block dims. + assert(W % SL::h_sub == 0 && W % DL::h_sub == 0); + assert(H % SL::v_sub == 0 && H % DL::v_sub == 0); + + auto sb = make_buffer<SL>(src); + auto db = make_buffer<DL>(dst); + Converter<Src, Snk>::run(sb, db, W, H, by_start, by_end, spec); +} + +static void run_norm(FormatId src_id, FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + const auto& si = s_format_info[size_t(src_id)]; + const auto& di = s_format_info[size_t(dst_id)]; + + const size_t bh = di.snk_block_h; + // Entry point (pixpat_convert) guarantees W/H alignment to each + // of si.h_sub / si.v_sub and di.snk_block_w / di.snk_block_h. + assert(W % si.h_sub == 0 && W % di.snk_block_w == 0); + assert(H % si.v_sub == 0 && H % bh == 0); + + // Per-thread normalized line buffer. RGB16 and YUV16 are both 8 + // bytes, so one allocation works for both. thread_local gives each + // worker its own buffer when called from run_stripes. + thread_local std::vector<uint8_t> norm; + norm.resize(bh * W * sizeof(RGB16)); + + const ColorCoeffs c = coeffs_for(spec); + for (size_t by = by_start; by < by_end; by += bh) { + si.unpack(norm.data(), src, by, bh, W); + if (si.kind != di.kind) { + const size_t n = bh * W; + if (si.kind == ColorKind::RGB) + norm_rgb_to_yuv(norm.data(), n, c); + else + norm_yuv_to_rgb(norm.data(), n, c); + } + di.pack(dst, norm.data(), by, W); + } +} + +// Generated: FormatCaps + s_format_caps[] (per-format readable/writable +// + hot_src/hot_dst), plus s_pattern_* / DefaultPattern. +#include "pixpat_caps.inc" + +// Per-Src dispatch: pick the right Sink for `dst_id` and call +// run_convert_impl. The X-macro emits one case per catalog format; +// `if constexpr (...writable)` discards the body for non-writable +// formats — those cases fall to the trailing throw. +template <typename Src> +static void dispatch_dst_convert(FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + switch (dst_id) { +#define CAPS(name) s_format_caps[size_t(FormatId::name)] +#define X(name) \ + case FormatId::name: \ + if constexpr (CAPS(name).writable) { \ + run_convert_impl<Src, formats::name::Sink>( \ + src, dst, W, H, by_start, by_end, spec); \ + return; \ + } \ + break; + PIXPAT_FORMAT_LIST(X) +#undef X +#undef CAPS + default: + break; + } + throw invalid_argument("destination format not enabled in this build"); +} + +// Per-Snk dispatch: mirror of dispatch_dst_convert. +template <typename Snk> +static void dispatch_src_convert(FormatId src_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + switch (src_id) { +#define CAPS(name) s_format_caps[size_t(FormatId::name)] +#define X(name) \ + case FormatId::name: \ + if constexpr (CAPS(name).readable) { \ + run_convert_impl<formats::name::Source, Snk>( \ + src, dst, W, H, by_start, by_end, spec); \ + return; \ + } \ + break; + PIXPAT_FORMAT_LIST(X) +#undef X +#undef CAPS + default: + break; + } + throw invalid_argument("source format not enabled in this build"); +} + +// Hot-pivot probes. The wrapper has to be a template so that the +// discarded `if constexpr` branch is not instantiated — otherwise +// dispatch_dst_convert<formats::X::Source> would be instantiated for +// every catalog format, not just hot pivots. +template <bool HotSrc, FormatId Id, typename Source> +static bool try_hot_src(FormatId src_id, FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + if constexpr (HotSrc) { + if (src_id == Id) { + dispatch_dst_convert<Source>( + dst_id, src, dst, W, H, by_start, by_end, spec); + return true; + } + } + return false; +} + +template <bool HotDst, FormatId Id, typename Sink> +static bool try_hot_dst(FormatId src_id, FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + if constexpr (HotDst) { + if (dst_id == Id) { + dispatch_src_convert<Sink>( + src_id, src, dst, W, H, by_start, by_end, spec); + return true; + } + } + return false; +} + +void dispatch_convert(FormatId src_id, FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ +#define CAPS(name) s_format_caps[size_t(FormatId::name)] +#define X(name) \ + if (try_hot_src<CAPS(name).hot_src, FormatId::name, \ + formats::name::Source>( \ + src_id, dst_id, src, dst, W, H, by_start, by_end, spec)) \ + return; \ + if (try_hot_dst<CAPS(name).hot_dst, FormatId::name, \ + formats::name::Sink>( \ + src_id, dst_id, src, dst, W, H, by_start, by_end, spec)) \ + return; + PIXPAT_FORMAT_LIST(X) +#undef X +#undef CAPS + + run_norm(src_id, dst_id, src, dst, W, H, by_start, by_end, spec); +} + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_internal.h b/subprojects/pixpat/pixpat-native/src/pixpat_internal.h new file mode 100644 index 0000000..50d3405 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pixpat_internal.h @@ -0,0 +1,89 @@ +#pragma once + +// Internal interface shared between the always-built pixpat.cpp and the +// optional pixpat_pattern.cpp / pixpat_convert.cpp TUs. The feature +// gate is meson-side: pixpat_pattern.cpp is in the source list iff +// PIXPAT_FEATURE_PATTERN, and likewise for convert. The bridge +// declarations below are unconditional; pixpat.cpp's entry points call +// them inside `if constexpr (kFeatureXxx)`, and the discarded branch +// emits no symbol reference, so absent definitions don't cause link +// failures. + +#include <cstddef> +#include <cstdint> + +#include <pixpat/pixpat.h> + +#include "color.h" +#include "format_catalog.h" +#include "layout.h" +#include "pattern_catalog.h" + +namespace pixpat +{ + +template <typename Layout> +inline Buffer<Layout::num_planes> make_buffer(const pixpat_buffer* b) noexcept +{ + Buffer<Layout::num_planes> out{}; + for (size_t i = 0; i < Layout::num_planes; ++i) { + out.data[i] = static_cast<uint8_t*>(b->planes[i]); + out.stride[i] = b->strides[i]; + } + return out; +} + +using UnpackFn = void (*)(uint8_t*, const pixpat_buffer*, size_t, size_t, size_t); +using PackFn = void (*)(const pixpat_buffer*, const uint8_t*, size_t, size_t); + +struct FormatInfo { + UnpackFn unpack; + PackFn pack; + ColorKind kind; + uint8_t h_sub; + uint8_t v_sub; + uint8_t snk_block_h; + uint8_t snk_block_w; +}; + +extern const FormatInfo s_format_info[]; + +// Per-format build capabilities. Defined once per build by the +// generator into s_format_caps[] (in pixpat_caps.inc); the schema is +// here so that file is pure data. +struct FormatCaps { + bool readable; + bool writable; + bool hot_src; + bool hot_dst; + + constexpr bool enabled() const noexcept + { + return readable || writable; + } +}; + +// Per-pattern build capabilities. Generator emits s_pattern_caps[] +// indexed by PatternId, plus a separate s_default_pattern_id singleton +// (the fallback when pattern_name doesn't match any enabled arm). +// Used only when PIXPAT_FEATURE_PATTERN — pixpat_pattern.cpp consumes +// both. +struct PatternCaps { + bool enabled; +}; + +class Params; + +// Bridge into pixpat_pattern.cpp (defined there iff PIXPAT_FEATURE_PATTERN). +void dispatch_draw_pattern(FormatId id, const char* pattern_name, + const Params& params, + const pixpat_buffer* dst, size_t W, size_t H, + size_t by_start, size_t by_end, ColorSpec spec); + +// Bridge into pixpat_convert.cpp (defined there iff PIXPAT_FEATURE_CONVERT). +void dispatch_convert(FormatId src_id, FormatId dst_id, + const pixpat_buffer* src, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, ColorSpec spec); + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp b/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp new file mode 100644 index 0000000..e8ac780 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/pixpat_pattern.cpp @@ -0,0 +1,168 @@ +// Pattern-feature TU: built only when PIXPAT_FEATURE_PATTERN is on +// (controlled by the meson source list). pixpat.cpp's pixpat_draw_pattern +// entry calls into dispatch_draw_pattern() below via if-constexpr; when +// the feature is off this file isn't compiled, the discarded if-constexpr +// branch emits no symbol reference, and the .so simply lacks these +// symbols. + +#include <cassert> +#include <cstdint> +#include <string_view> +#include <type_traits> +#include <vector> + +#include "color.h" +#include "error.h" +#include "params.h" +#include "pattern.h" +#include "pattern_catalog.h" +#include "pipeline.h" +#include "pixpat_internal.h" + +namespace pixpat +{ + +// Generated: s_pattern_* enable flags + DefaultPattern alias. Included +// inside namespace pixpat so the unqualified FormatId / s_format_catalog_count +// references resolve. +#include "pixpat_caps.inc" + +// Cold pattern path: fill a per-thread normalized line buffer with +// Pattern samples in the pattern's native color kind, run a cross- +// color-kind pass over the buffer if the sink wants the other kind, +// then hand the buffer to the destination's per-format pack via +// s_format_info. Same shape as run_norm in pixpat_convert.cpp. +template <typename Pattern> +static void run_pattern_norm(const Pattern& pat, + FormatId dst_id, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + using P = typename Pattern::Pixel; + constexpr bool pat_is_rgb = std::is_same_v<P, RGB16>; + + const auto& di = s_format_info[size_t(dst_id)]; + const size_t bh = di.snk_block_h; + // Entry point (pixpat_draw_pattern) validates W%bw / H%bh. + assert(W % di.snk_block_w == 0 && H % bh == 0); + + thread_local std::vector<uint8_t> norm; + norm.resize(bh * W * sizeof(RGB16)); // RGB16 / YUV16 same size + + const ColorCoeffs c = coeffs_for(spec); + const bool need_xfm = (pat_is_rgb && di.kind == ColorKind::YUV) || + (!pat_is_rgb && di.kind == ColorKind::RGB); + + for (size_t by = by_start; by < by_end; by += bh) { + auto* px = reinterpret_cast<P*>(norm.data()); + for (size_t dy = 0; dy < bh; ++dy) + for (size_t x = 0; x < W; ++x) + px[dy * W + x] = pat.sample(x, by + dy, W, H); + if (need_xfm) { + const size_t n = bh * W; + if constexpr (pat_is_rgb) + norm_rgb_to_yuv(norm.data(), n, c); + else + norm_yuv_to_rgb(norm.data(), n, c); + } + di.pack(dst, norm.data(), by, W); + } +} + +// Construct, ready-check, and run a pattern. Patterns whose colors +// depend on the call's ColorSpec (e.g. native-YUV bar variants) opt +// in by exposing a (Params, ColorSpec) constructor; the rest take +// Params only and stay unchanged. +template <typename Pattern> +static void run_one_pattern(const Params& params, + FormatId id, const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + auto pat = [&] { + if constexpr (std::is_constructible_v< + Pattern, const Params&, ColorSpec>) + return Pattern(params, spec); + else + return Pattern(params); + }(); + if constexpr (requires { pat.ready(); }) { + if (!pat.ready()) + throw invalid_argument("pattern parameters not accepted"); + } + run_pattern_norm(pat, id, dst, W, H, by_start, by_end, spec); +} + +// Per-pattern dispatch arm. Templated on the catalog row's RGB and +// YUV variants (either may be `void` if the pattern has no variant +// in that kind). When both are present, the sink kind picks the +// matching variant so the cross-kind pass is a no-op; when only one +// is present, the pipeline runs the cross-kind pass for opposite- +// kind sinks. +// +// Wrapping in a templated helper is what keeps the binary size down: +// `if constexpr (Enabled = false)` discards the run_pattern_norm +// reference, and because try_pattern is itself a template, the +// discarded branch is *not instantiated* — so disabled patterns +// emit no code, and the `void` arms of partial patterns never +// instantiate `Pattern::Pixel` or run_pattern_norm<void>. +template <bool Enabled, typename Rgb, typename Yuv> +static bool try_pattern(std::string_view name, std::string_view want, + const Params& params, + FormatId id, ColorKind sink_kind, + const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + if constexpr (Enabled) { + if (name == want) { + constexpr bool has_rgb = !std::is_void_v<Rgb>; + constexpr bool has_yuv = !std::is_void_v<Yuv>; + static_assert(has_rgb || has_yuv, + "pattern needs at least one variant"); + if constexpr (has_rgb && has_yuv) { + if (sink_kind == ColorKind::YUV) + run_one_pattern<Yuv>(params, id, dst, W, H, + by_start, by_end, spec); + else + run_one_pattern<Rgb>(params, id, dst, W, H, + by_start, by_end, spec); + } else if constexpr (has_rgb) { + run_one_pattern<Rgb>(params, id, dst, W, H, + by_start, by_end, spec); + } else { + run_one_pattern<Yuv>(params, id, dst, W, H, + by_start, by_end, spec); + } + return true; + } + } + return false; +} + +void dispatch_draw_pattern(FormatId id, const char* pattern_name, + const Params& params, + const pixpat_buffer* dst, + size_t W, size_t H, + size_t by_start, size_t by_end, + ColorSpec spec) +{ + using namespace patterns; + // NULL pattern_name selects the default ("kmstest"); see pixpat.h. + const std::string_view name = pattern_name ? pattern_name : "kmstest"; + const ColorKind kind = s_format_info[size_t(id)].kind; + +#define X(label, rgb, yuv, str) \ + if (try_pattern<s_pattern_caps[size_t(PatternId::label)].enabled, rgb, yuv>( \ + name, str, params, id, kind, dst, W, H, by_start, by_end, spec)) \ + return; + PIXPAT_PATTERN_LIST(X) +#undef X + + throw invalid_argument("unknown or disabled pattern name"); +} + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/threading.h b/subprojects/pixpat/pixpat-native/src/threading.h new file mode 100644 index 0000000..5e7fc01 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/threading.h @@ -0,0 +1,95 @@ +#pragma once + +#include <unistd.h> + +#include <cassert> +#include <cstddef> +#include <exception> +#include <thread> +#include <vector> + +namespace pixpat +{ + +inline unsigned default_thread_count() +{ + long n = sysconf(_SC_NPROCESSORS_ONLN); + if (n < 1) + return 1; + // Cap to keep per-stripe work meaningful and avoid heavy + // oversubscription on large NUMA hosts. + if (n > 16) + n = 16; + return static_cast<unsigned>(n); +} + +/* + * Run `fn(start_y, end_y)` over `[0, height)` partitioned into stripes + * aligned to `v_sub`. Half-open ranges, matching the `for (by = 0; + * by < H; by += bh)` block-loop style. + * + * `fn` must be callable as `void(size_t start_y, size_t end_y)` and is + * invoked concurrently from multiple threads — it must be safe to call + * with disjoint Y-ranges in parallel. Exceptions thrown from a worker + * are captured and the first (by stripe index) is rethrown after all + * workers join. + * + * When `n_threads <= 1`, `fn` is called inline on the calling thread — + * no `std::thread` is spawned, no allocation occurs. + */ +template<typename F> +void run_stripes(size_t height, unsigned v_sub, unsigned n_threads, F&& fn) +{ + if (height == 0 || v_sub == 0) + return; + + // Callers (pixpat_convert / pixpat_draw_pattern) validate divisibility + // at the entry point. + assert(height % v_sub == 0); + + const size_t max_useful = height / v_sub; + if (n_threads == 0) + n_threads = 1; + if (static_cast<size_t>(n_threads) > max_useful) + n_threads = static_cast<unsigned>(max_useful); + + if (n_threads <= 1) { + fn(size_t{ 0 }, height); + return; + } + + // Stripe height rounded up to v_sub; last stripe absorbs the + // remainder. + size_t part_height = (height + n_threads - 1) / n_threads; + part_height = (part_height + v_sub - 1) / v_sub * v_sub; + + std::vector<std::exception_ptr> errors(n_threads); + std::vector<std::thread> workers; + workers.reserve(n_threads); + + for (unsigned i = 0; i < n_threads; i++) { + size_t start = i * part_height; + if (start >= height) + break; + size_t end = start + part_height; + if (i == n_threads - 1 || end > height) + end = height; + + workers.emplace_back([&, i, start, end] { + try { + fn(start, end); + } catch (...) { + errors[i] = std::current_exception(); + } + }); + } + + for (auto& t : workers) + t.join(); + + for (auto& e : errors) + if (e) + std::rethrow_exception(e); +} + +} // namespace pixpat |
