diff options
| author | Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> | 2026-05-08 17:22:58 +0300 |
|---|---|---|
| committer | Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> | 2026-05-08 17:22:58 +0300 |
| commit | 4e2b291a4acdc2cbd39f005c88bda363bc06bd34 (patch) | |
| tree | e90048d5973ad1164b109d575cf577af7daf50be /subprojects/pixpat/pixpat-native/src/io | |
| parent | 8f94b39040e79eccd9312ed1e467fe8ebfab8860 (diff) | |
| parent | e0b7d30fd437292c88141fb08d60681870b86c6e (diff) | |
Merge commit 'e0b7d30fd437292c88141fb08d60681870b86c6e' as 'subprojects/pixpat'
Diffstat (limited to 'subprojects/pixpat/pixpat-native/src/io')
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/bayer.h | 318 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/csi2.h | 80 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/detail.h | 62 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/gray.h | 153 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/gray_packed.h | 78 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/mono_rgb.h | 72 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/packed.h | 106 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/packed_yuv.h | 89 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/planar.h | 257 | ||||
| -rw-r--r-- | subprojects/pixpat/pixpat-native/src/io/semiplanar.h | 242 |
10 files changed, 1457 insertions, 0 deletions
diff --git a/subprojects/pixpat/pixpat-native/src/io/bayer.h b/subprojects/pixpat/pixpat-native/src/io/bayer.h new file mode 100644 index 0000000..6b30c0e --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/bayer.h @@ -0,0 +1,318 @@ +#pragma once + +// Bayer raw read/write support. +// +// Write side: each pixel carries one of R/G/B selected by (x mod 2, +// y mod 2) and a fixed BayerOrder. Two missing channels per pixel are +// dropped on encode. +// +// Read side: bilinear demosaic over a 3x3 window. The pixel's own +// channel comes from self; missing channels are averaged from the +// same-channel neighbours that the Bayer phase guarantees to exist: +// +// * At an R or B pixel, all four cardinal (N, E, S, W) neighbours +// carry G and all four diagonal (NE, NW, SE, SW) neighbours carry +// the other colour, so each missing channel averages four samples. +// * At a G pixel, one missing colour sits in the row neighbours +// (W, E) and the other in the column neighbours (N, S), so each +// missing channel averages two samples. +// +// Sampled coordinates are clamped to the image bounds. +// +// The Layout shape is the same as a Y-only single-plane format +// (storage carries one component plus optional X padding); the +// BayerOrder is a separate template parameter on the Source / Sink. + +#include <array> +#include <cstdint> + +#include "../layout.h" +#include "csi2.h" +#include "detail.h" + +namespace pixpat +{ + +enum class BayerOrder { RGGB, BGGR, GRBG, GBRG }; + +namespace detail +{ +constexpr C bayer_pick(BayerOrder o, bool x_even, bool y_even) noexcept +{ + switch (o) { + case BayerOrder::RGGB: + return y_even ? (x_even ? C::R : C::G) + : (x_even ? C::G : C::B); + case BayerOrder::BGGR: + return y_even ? (x_even ? C::B : C::G) + : (x_even ? C::G : C::R); + case BayerOrder::GRBG: + return y_even ? (x_even ? C::G : C::R) + : (x_even ? C::B : C::G); + case BayerOrder::GBRG: + return y_even ? (x_even ? C::G : C::B) + : (x_even ? C::R : C::G); + } + return C::G; +} + +constexpr size_t clamp_coord(int v, size_t max_excl) noexcept +{ + if (v < 0) + return 0; + if (size_t(v) >= max_excl) + return max_excl - 1; + return size_t(v); +} +} // namespace detail + +template <typename L, BayerOrder Order> +struct BayerSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static_assert(y_idx < P::num_comps); + + static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + return detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]); + } + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + size_t W, size_t H) noexcept + { + const bool x_even = (x & 1) == 0; + const bool y_even = (y & 1) == 0; + const C self = detail::bayer_pick(Order, x_even, y_even); + + const size_t xL = detail::clamp_coord(int(x) - 1, W); + const size_t xR = detail::clamp_coord(int(x) + 1, W); + const size_t yT = detail::clamp_coord(int(y) - 1, H); + const size_t yB = detail::clamp_coord(int(y) + 1, H); + + const uint16_t s = read_sample(buf, x, y); + + uint16_t r = 0, g = 0, b = 0; + + if (self == C::G) { + const C h_color = detail::bayer_pick(Order, !x_even, y_even); + const uint16_t h_avg = uint16_t( + (uint32_t(read_sample(buf, xL, y)) + + read_sample(buf, xR, y) + 1u) >> 1); + const uint16_t v_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + 1u) >> 1); + g = s; + if (h_color == C::R) { r = h_avg; b = v_avg; } + else { b = h_avg; r = v_avg; } + } else { + const uint16_t g_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + + read_sample(buf, xL, y) + + read_sample(buf, xR, y) + 2u) >> 2); + const uint16_t o_avg = uint16_t( + (uint32_t(read_sample(buf, xL, yT)) + + read_sample(buf, xR, yT) + + read_sample(buf, xL, yB) + + read_sample(buf, xR, yB) + 2u) >> 2); + g = g_avg; + if (self == C::R) { r = s; b = o_avg; } + else { b = s; r = o_avg; } + } + + return RGB16{ r, g, b, uint16_t(0) }; + } +}; + +template <typename L, BayerOrder Order> +struct BayerSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(y_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][1]) noexcept + { + const C pick = detail::bayer_pick(Order, (bx & 1) == 0, + (by & 1) == 0); + const uint16_t val = pick == C::R ? block[0][0].r + : pick == C::G ? block[0][0].g + : block[0][0].b; + + std::array<uint16_t, P::num_comps> v{}; + v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, val); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +// Aliases so X-macro can register without nested template-template params. +template <typename L> using BayerSource_RGGB = BayerSource<L, BayerOrder::RGGB>; +template <typename L> using BayerSource_BGGR = BayerSource<L, BayerOrder::BGGR>; +template <typename L> using BayerSource_GRBG = BayerSource<L, BayerOrder::GRBG>; +template <typename L> using BayerSource_GBRG = BayerSource<L, BayerOrder::GBRG>; + +template <typename L> using BayerSink_RGGB = BayerSink<L, BayerOrder::RGGB>; +template <typename L> using BayerSink_BGGR = BayerSink<L, BayerOrder::BGGR>; +template <typename L> using BayerSink_GRBG = BayerSink<L, BayerOrder::GRBG>; +template <typename L> using BayerSink_GBRG = BayerSink<L, BayerOrder::GBRG>; + +// MIPI CSI-2 packed Bayer. The bit layout doesn't fit +// `Plane<Storage, Comp...>` because each pixel's bits span two +// non-contiguous bytes, so we use the shared CSI-2 helper (io/csi2.h) +// to (un)pack samples. +// +// The Layout slot is a placeholder (matches the unpacked Bayer of the +// same bit-depth so the user-facing API can pick the right buffer +// shape); bytes_per_pixel from the Plane is unused. +template <typename L, BayerOrder Order, size_t BitDepth> +struct BayerPackedSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + // Stored N-bit value upshifts to normalized-16 by `<< (16-N)`, + // matching the unpacked Bayer source. + static constexpr unsigned shift = 16 - BitDepth; + + static uint16_t read_sample(const Buffer<1>& buf, size_t x, size_t y) noexcept + { + const uint8_t* src = buf.data[0] + y * buf.stride[0] + + (x / ppg) * bpg; + const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg); + return uint16_t(val << shift); + } + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + size_t W, size_t H) noexcept + { + const bool x_even = (x & 1) == 0; + const bool y_even = (y & 1) == 0; + const C self = detail::bayer_pick(Order, x_even, y_even); + + const size_t xL = detail::clamp_coord(int(x) - 1, W); + const size_t xR = detail::clamp_coord(int(x) + 1, W); + const size_t yT = detail::clamp_coord(int(y) - 1, H); + const size_t yB = detail::clamp_coord(int(y) + 1, H); + + const uint16_t s = read_sample(buf, x, y); + + uint16_t r = 0, g = 0, b = 0; + + if (self == C::G) { + const C h_color = detail::bayer_pick(Order, !x_even, y_even); + const uint16_t h_avg = uint16_t( + (uint32_t(read_sample(buf, xL, y)) + + read_sample(buf, xR, y) + 1u) >> 1); + const uint16_t v_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + 1u) >> 1); + g = s; + if (h_color == C::R) { r = h_avg; b = v_avg; } + else { b = h_avg; r = v_avg; } + } else { + const uint16_t g_avg = uint16_t( + (uint32_t(read_sample(buf, x, yT)) + + read_sample(buf, x, yB) + + read_sample(buf, xL, y) + + read_sample(buf, xR, y) + 2u) >> 2); + const uint16_t o_avg = uint16_t( + (uint32_t(read_sample(buf, xL, yT)) + + read_sample(buf, xR, yT) + + read_sample(buf, xL, yB) + + read_sample(buf, xR, yB) + 2u) >> 2); + g = g_avg; + if (self == C::R) { r = s; b = o_avg; } + else { b = s; r = o_avg; } + } + + return RGB16{ r, g, b, uint16_t(0) }; + } +}; + +template <typename L, BayerOrder Order, size_t BitDepth> +struct BayerPackedSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppg; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][ppg]) noexcept + { + std::array<uint16_t, ppg> vals{}; + for (size_t i = 0; i < ppg; ++i) { + const C pick = detail::bayer_pick( + Order, ((bx + i) & 1) == 0, (by & 1) == 0); + const uint16_t norm = + pick == C::R ? block[0][i].r + : pick == C::G ? block[0][i].g + : block[0][i].b; + vals[i] = uint16_t(norm >> (16 - BitDepth)); + } + + uint8_t* dst = buf.data[0] + by * buf.stride[0] + + (bx / ppg) * bpg; + detail::csi2::pack_group<BitDepth>(dst, vals); + } +}; + +template <typename L> using BayerPackedSource_RGGB10 = BayerPackedSource<L, BayerOrder::RGGB, 10>; +template <typename L> using BayerPackedSource_BGGR10 = BayerPackedSource<L, BayerOrder::BGGR, 10>; +template <typename L> using BayerPackedSource_GRBG10 = BayerPackedSource<L, BayerOrder::GRBG, 10>; +template <typename L> using BayerPackedSource_GBRG10 = BayerPackedSource<L, BayerOrder::GBRG, 10>; +template <typename L> using BayerPackedSource_RGGB12 = BayerPackedSource<L, BayerOrder::RGGB, 12>; +template <typename L> using BayerPackedSource_BGGR12 = BayerPackedSource<L, BayerOrder::BGGR, 12>; +template <typename L> using BayerPackedSource_GRBG12 = BayerPackedSource<L, BayerOrder::GRBG, 12>; +template <typename L> using BayerPackedSource_GBRG12 = BayerPackedSource<L, BayerOrder::GBRG, 12>; + +template <typename L> using BayerPackedSink_RGGB10 = BayerPackedSink<L, BayerOrder::RGGB, 10>; +template <typename L> using BayerPackedSink_BGGR10 = BayerPackedSink<L, BayerOrder::BGGR, 10>; +template <typename L> using BayerPackedSink_GRBG10 = BayerPackedSink<L, BayerOrder::GRBG, 10>; +template <typename L> using BayerPackedSink_GBRG10 = BayerPackedSink<L, BayerOrder::GBRG, 10>; +template <typename L> using BayerPackedSink_RGGB12 = BayerPackedSink<L, BayerOrder::RGGB, 12>; +template <typename L> using BayerPackedSink_BGGR12 = BayerPackedSink<L, BayerOrder::BGGR, 12>; +template <typename L> using BayerPackedSink_GRBG12 = BayerPackedSink<L, BayerOrder::GRBG, 12>; +template <typename L> using BayerPackedSink_GBRG12 = BayerPackedSink<L, BayerOrder::GBRG, 12>; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/csi2.h b/subprojects/pixpat/pixpat-native/src/io/csi2.h new file mode 100644 index 0000000..59a8f8d --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/csi2.h @@ -0,0 +1,80 @@ +#pragma once + +// Shared MIPI CSI-2 byte (un)packing for the 10P / 12P forms used by +// Bayer raw and Y-only grayscale. +// +// 10P: 4 samples in 5 bytes — bytes 0..3 hold the high 8 bits of +// samples 0..3; byte 4 holds 4 x 2 LSBs (sample 0 in bits 6..7, +// sample 1 in bits 4..5, ...). +// 12P: 2 samples in 3 bytes — bytes 0..1 hold the high 8 bits of +// samples 0..1; byte 2 holds 2 x 4 LSBs (sample 0 in bits 4..7, +// sample 1 in bits 0..3). +// +// Helpers deal in the stored integer (low BitDepth bits set); +// normalization to/from the 16-bit pivot stays in the caller. + +#include <array> +#include <cstddef> +#include <cstdint> + +namespace pixpat::detail::csi2 +{ + +template <size_t BitDepth> +struct packed_traits; + +template <> +struct packed_traits<10> { + static constexpr size_t ppg = 4; + static constexpr size_t bpg = 5; +}; + +template <> +struct packed_traits<12> { + static constexpr size_t ppg = 2; + static constexpr size_t bpg = 3; +}; + +// Extract one BitDepth-bit sample from a packed group, where `i` is the +// in-group index (0..ppg-1). The returned value occupies the low +// BitDepth bits. +template <size_t BitDepth> +inline uint16_t unpack_sample(const uint8_t* src, size_t i) noexcept +{ + if constexpr (BitDepth == 10) { + const uint8_t hi = src[i]; + const uint8_t lsb = (src[4] >> ((3 - i) * 2)) & 0x03; + return uint16_t((hi << 2) | lsb); + } else { // 12 + const uint8_t hi = src[i]; + const uint8_t lsb = (i == 0) ? ((src[2] >> 4) & 0x0F) + : (src[2] & 0x0F); + return uint16_t((hi << 4) | lsb); + } +} + +// Write `ppg` BitDepth-bit samples (low BitDepth bits significant) into +// a packed group of `bpg` bytes. +template <size_t BitDepth> +inline void pack_group( + uint8_t* dst, + const std::array<uint16_t, packed_traits<BitDepth>::ppg>& vals) noexcept +{ + if constexpr (BitDepth == 10) { + dst[0] = (vals[0] >> 2) & 0xFF; + dst[1] = (vals[1] >> 2) & 0xFF; + dst[2] = (vals[2] >> 2) & 0xFF; + dst[3] = (vals[3] >> 2) & 0xFF; + dst[4] = ((vals[0] & 0x03) << 6) + | ((vals[1] & 0x03) << 4) + | ((vals[2] & 0x03) << 2) + | ((vals[3] & 0x03) << 0); + } else { // 12 + dst[0] = (vals[0] >> 4) & 0xFF; + dst[1] = (vals[1] >> 4) & 0xFF; + dst[2] = ((vals[0] & 0x0F) << 4) + | ((vals[1] & 0x0F) << 0); + } +} + +} // namespace pixpat::detail::csi2 diff --git a/subprojects/pixpat/pixpat-native/src/io/detail.h b/subprojects/pixpat/pixpat-native/src/io/detail.h new file mode 100644 index 0000000..cb2b9fb --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/detail.h @@ -0,0 +1,62 @@ +#pragma once + +// Per-component encode/decode against the descriptor + memcpy-based +// load/store_word helpers. Shared by every Source / Sink template. + +#include <cstdint> +#include <cstring> + +#include "../layout.h" + +namespace pixpat::detail +{ + +// Decode an N-bit stored value into the 16-bit normalized space and +// encode it back. Decode bit-replicates the stored value across the 16 +// bits so that N-bit max maps to normalized max (e.g. 8-bit 0xFF → +// 0xFFFF, not 0xFF00). Encode is a plain truncating right-shift: the +// replicated bits land in the low (16-N) bits and get dropped, so +// stored→norm→stored is exact for any N in [1, 16]. +// +// `bits` is taken at runtime; in every call site it traces back to a +// constexpr Plane::comps[I].bits read, which the optimizer constant- +// folds after inlining. + +constexpr uint16_t decode_norm(unsigned bits, uint16_t stored) noexcept +{ + const int N = int(bits); + // Loop, not a single OR: one replication only covers 2N bits, so + // N < 8 (RGB565, RGBA4444, 1-bit alpha, ...) needs multiple tiles. + uint32_t result = 0; + for (int s = 16 - N; s > -N; s -= N) { + if (s >= 0) + result |= uint32_t(stored) << s; + else + result |= uint32_t(stored) >> -s; + } + return uint16_t(result); +} + +constexpr uint16_t encode_norm(unsigned bits, uint16_t norm) noexcept +{ + return uint16_t(norm >> (16u - bits)); +} + +// Read one storage word from `p`. memcpy is uniform for tight and +// non-tight (e.g. BGR888 24-bit) layouts; the optimizer folds it to a +// single load when the size is constant. +template <typename Plane> +inline typename Plane::storage_t load_word(const uint8_t* p) noexcept +{ + typename Plane::storage_t word{}; + std::memcpy(&word, p, Plane::bytes_per_pixel); + return word; +} + +template <typename Plane> +inline void store_word(uint8_t* p, typename Plane::storage_t word) noexcept +{ + std::memcpy(p, &word, Plane::bytes_per_pixel); +} + +} // namespace pixpat::detail diff --git a/subprojects/pixpat/pixpat-native/src/io/gray.h b/subprojects/pixpat/pixpat-native/src/io/gray.h new file mode 100644 index 0000000..d175b68 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/gray.h @@ -0,0 +1,153 @@ +#pragma once + +// Grayscale (Y8 / Y10 / Y12 / Y16) and multi-pixel-per-word grayscale +// (XYYY2101010: 3 Y components in one uint32_t). Modeled as a YUV format +// with neutral chroma synthesized on read so cross-color-kind ColorXfm +// produces R=G=B=Y'. The sink encodes Y from YUV16 and ignores U/V. +// Y10/Y12 carry an X padding bitfield which we zero out on write. +// Neutral chroma in normalized-16 is 0x8000 (the midpoint of [0, 0xFFFF]). + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct GraySource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static_assert(y_idx < P::num_comps); + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + return YUV16{ + detail::decode_norm(P::comps[y_idx].bits, vals[y_idx]), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L> +struct GraySink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y_idx = P::template find_pos<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(y_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][1]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[y_idx] = detail::encode_norm(P::comps[y_idx].bits, block[0][0].y); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +// Multi-pixel-per-word grayscale. The Layout carries one C::Y entry per +// pixel in the group; pixels_per_word is derived from how many C::Y +// entries the layout has. All Y components must share the same bit width +// (so the encode/decode shift is shared). block_w = ppw so the sink +// writes one storage word per block. +template <typename L> +struct MultiPixelGraySource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t ppw = P::template component_count<C::Y>(); + static_assert(ppw >= 1); + + // All Y positions share the same bit width. + static constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits; + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const size_t gx = x / ppw; + const size_t off = x % ppw; + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + gx * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + + // find_pos walks the comps array at runtime; comps is constexpr + // and num_comps is small (≤4 for these formats), so it inlines. + const size_t y_pos = P::template find_pos<C::Y>(off); + + return YUV16{ + detail::decode_norm(y_bits, vals[y_pos]), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelGraySink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t ppw = P::template component_count<C::Y>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(ppw >= 1); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppw; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppw]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + // All Y slots share the same bit width. + constexpr unsigned y_bits = P::comps[P::template find_pos<C::Y>(0)].bits; + for (size_t i = 0; i < ppw; ++i) { + const size_t pos = P::template find_pos<C::Y>(i); + v[pos] = detail::encode_norm(y_bits, block[0][i].y); + } + + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + (bx / ppw) * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/gray_packed.h b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h new file mode 100644 index 0000000..dc1fa68 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/gray_packed.h @@ -0,0 +1,78 @@ +#pragma once + +// MIPI CSI-2 packed grayscale (Y10P / Y12P). Same byte packing as +// Bayer10P/Bayer12P (see io/csi2.h) but every sample is Y; the source +// emits neutral chroma to keep cross-color-kind ColorXfm consistent +// with GraySource. +// +// The Layout slot is a placeholder (matches the unpacked Y8 storage +// shape so dispatch plumbing is uniform); bytes_per_pixel from the +// Plane is unused. + +#include <array> +#include <cstdint> + +#include "../layout.h" +#include "csi2.h" + +namespace pixpat +{ + +template <typename L, size_t BitDepth> +struct GrayPackedSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + static constexpr unsigned shift = 16 - BitDepth; + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* src = buf.data[0] + y * buf.stride[0] + + (x / ppg) * bpg; + const uint16_t val = detail::csi2::unpack_sample<BitDepth>(src, x % ppg); + return YUV16{ + uint16_t(val << shift), + 0x8000, 0x8000, uint16_t(0), + }; + } +}; + +template <typename L, size_t BitDepth> +struct GrayPackedSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(BitDepth == 10 || BitDepth == 12); + + using Traits = detail::csi2::packed_traits<BitDepth>; + static constexpr size_t ppg = Traits::ppg; + static constexpr size_t bpg = Traits::bpg; + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppg; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppg]) noexcept + { + std::array<uint16_t, ppg> vals{}; + for (size_t i = 0; i < ppg; ++i) + vals[i] = uint16_t(block[0][i].y >> (16 - BitDepth)); + + uint8_t* dst = buf.data[0] + by * buf.stride[0] + + (bx / ppg) * bpg; + detail::csi2::pack_group<BitDepth>(dst, vals); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h new file mode 100644 index 0000000..f2f8206 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/mono_rgb.h @@ -0,0 +1,72 @@ +#pragma once + +// Single-channel RGB formats (R8). Storage carries one R component; +// MonoRGBSource synthesizes G=B=R on read so cross-color-kind ColorXfm +// produces sensible Y from R alone. MonoRGBSink encodes R and ignores +// G/B/A (and zeroes any X padding). Symmetric to GraySource/GraySink +// (io/gray.h) but for ColorKind::RGB on C::R. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct MonoRGBSource { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t r_idx = P::template find_pos<C::R>(); + static_assert(r_idx < P::num_comps); + + static RGB16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + const uint16_t r = detail::decode_norm(P::comps[r_idx].bits, vals[r_idx]); + return RGB16{ r, r, r, uint16_t(0) }; + } +}; + +template <typename L> +struct MonoRGBSink { + using Layout = L; + using Pixel = RGB16; + + static_assert(L::kind == ColorKind::RGB); + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr size_t r_idx = P::template find_pos<C::R>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static_assert(r_idx < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const RGB16 (&block)[1][1]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[r_idx] = detail::encode_norm(P::comps[r_idx].bits, block[0][0].r); + if constexpr (has_x) + v[x_idx] = 0; + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/packed.h b/subprojects/pixpat/pixpat-native/src/io/packed.h new file mode 100644 index 0000000..9d953bc --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/packed.h @@ -0,0 +1,106 @@ +#pragma once + +// Single-plane, single-pixel-per-storage-word formats. Works for both +// RGB layouts (XRGB8888, RGB565, ABGR16161616, ...) and YUV +// single-pixel layouts (XVUY2101010, AVUY16161616). Pixel type follows +// L::kind; the three mandatory components are R/G/B for RGB or Y/U/V +// for YUV. Both `RGB16` and `YUV16` are 4 uint16_t with the alpha last, +// so aggregate-init by position works for either. + +#include <array> +#include <type_traits> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PackedSource { + using Layout = L; + using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>; + + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y; + static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U; + static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V; + + static constexpr size_t i0 = P::template find_pos<c0>(); + static constexpr size_t i1 = P::template find_pos<c1>(); + static constexpr size_t i2 = P::template find_pos<c2>(); + static constexpr size_t a_idx = P::template find_pos<C::A>(); + static constexpr bool has_a = (a_idx < P::num_comps); + static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps); + + static Pixel read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + x * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + Pixel out{ + detail::decode_norm(P::comps[i0].bits, vals[i0]), + detail::decode_norm(P::comps[i1].bits, vals[i1]), + detail::decode_norm(P::comps[i2].bits, vals[i2]), + uint16_t(0), + }; + if constexpr (has_a) + out.a = detail::decode_norm(P::comps[a_idx].bits, vals[a_idx]); + return out; + } +}; + +template <typename L> +struct PackedSink { + using Layout = L; + using Pixel = std::conditional_t<L::kind == ColorKind::RGB, RGB16, YUV16>; + + static_assert(L::num_planes == 1); + + using P = typename L::template plane<0>; + static constexpr C c0 = (L::kind == ColorKind::RGB) ? C::R : C::Y; + static constexpr C c1 = (L::kind == ColorKind::RGB) ? C::G : C::U; + static constexpr C c2 = (L::kind == ColorKind::RGB) ? C::B : C::V; + + static constexpr size_t i0 = P::template find_pos<c0>(); + static constexpr size_t i1 = P::template find_pos<c1>(); + static constexpr size_t i2 = P::template find_pos<c2>(); + static constexpr size_t x_idx = P::template find_pos<C::X>(); + static constexpr size_t a_idx = P::template find_pos<C::A>(); + static constexpr bool has_x = (x_idx < P::num_comps); + static constexpr bool has_a = (a_idx < P::num_comps); + static_assert(i0 < P::num_comps && i1 < P::num_comps && i2 < P::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 1; + + // Aggregate-init access to RGB16/YUV16 by position: .r/.y, .g/.u, .b/.v. + // We use the field names corresponding to L::kind. + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const Pixel (&block)[1][1]) noexcept + { + const Pixel& pix = block[0][0]; + std::array<uint16_t, P::num_comps> v{}; + if constexpr (L::kind == ColorKind::RGB) { + v[i0] = detail::encode_norm(P::comps[i0].bits, pix.r); + v[i1] = detail::encode_norm(P::comps[i1].bits, pix.g); + v[i2] = detail::encode_norm(P::comps[i2].bits, pix.b); + } else { + v[i0] = detail::encode_norm(P::comps[i0].bits, pix.y); + v[i1] = detail::encode_norm(P::comps[i1].bits, pix.u); + v[i2] = detail::encode_norm(P::comps[i2].bits, pix.v); + } + if constexpr (has_x) + v[x_idx] = 0; + if constexpr (has_a) + v[a_idx] = detail::encode_norm(P::comps[a_idx].bits, pix.a); + + uint8_t* p = buf.data[0] + by * buf.stride[0] + bx * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h new file mode 100644 index 0000000..90c8b2f --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/packed_yuv.h @@ -0,0 +1,89 @@ +#pragma once + +// Packed YUV 4:2:2 (YUYV / YVYU / UYVY / VYUY): two pixels per 32-bit +// word, one shared chroma pair. The Layout uses two C::Y entries plus +// one each of C::U / C::V; we resolve the duplicate Y via +// find_pos<C::Y>(n). + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PackedYUVSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(L::h_sub == 2 && L::v_sub == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y0_idx = P::template find_pos<C::Y>(0); + static constexpr size_t y1_idx = P::template find_pos<C::Y>(1); + static constexpr size_t u_idx = P::template find_pos<C::U>(); + static constexpr size_t v_idx = P::template find_pos<C::V>(); + + static YUV16 read(const Buffer<1>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* p = buf.data[0] + y * buf.stride[0] + + (x / 2) * P::bytes_per_pixel; + const auto vals = P::unpack(detail::load_word<P>(p)); + const size_t y_pick = (x & 1) ? y1_idx : y0_idx; + // Both Y components share the same bit width, so the bit-width + // for y0 and y1 is identical — pick either. + return YUV16{ + detail::decode_norm(P::comps[y0_idx].bits, vals[y_pick]), + detail::decode_norm(P::comps[u_idx].bits, vals[u_idx]), + detail::decode_norm(P::comps[v_idx].bits, vals[v_idx]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct PackedYUVSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 1); + static_assert(L::h_sub == 2 && L::v_sub == 1); + + using P = typename L::template plane<0>; + static constexpr size_t y0_idx = P::template find_pos<C::Y>(0); + static constexpr size_t y1_idx = P::template find_pos<C::Y>(1); + static constexpr size_t u_idx = P::template find_pos<C::U>(); + static constexpr size_t v_idx = P::template find_pos<C::V>(); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = 2; + + static void write_block(Buffer<1>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][2]) noexcept + { + std::array<uint16_t, P::num_comps> v{}; + v[y0_idx] = detail::encode_norm(P::comps[y0_idx].bits, block[0][0].y); + v[y1_idx] = detail::encode_norm(P::comps[y1_idx].bits, block[0][1].y); + // Integer chroma averaging in normalized-16 space. Truncates + // (no round-half-up). + v[u_idx] = detail::encode_norm(P::comps[u_idx].bits, uint16_t( + (uint32_t(block[0][0].u) + + uint32_t(block[0][1].u)) / 2)); + v[v_idx] = detail::encode_norm(P::comps[v_idx].bits, uint16_t( + (uint32_t(block[0][0].v) + + uint32_t(block[0][1].v)) / 2)); + + uint8_t* p = buf.data[0] + by * buf.stride[0] + + (bx / 2) * P::bytes_per_pixel; + detail::store_word<P>(p, P::pack(v)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/planar.h b/subprojects/pixpat/pixpat-native/src/io/planar.h new file mode 100644 index 0000000..0dab685 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/planar.h @@ -0,0 +1,257 @@ +#pragma once + +// 3-plane planar YUV. Two flavours: +// +// PlanarSource / PlanarSink — YUV/YVU 420/422/444, single Y per word, +// single chroma per word. Chroma is averaged over h_sub × v_sub +// on write. +// +// MultiPixelPlanarSource / MultiPixelPlanarSink — T430, multi-pixel- +// per-word planar 4:4:4 (3 samples per uint32_t in each of 3 +// planes, plus 2-bit X padding). block_w = ppw, block_h = 1. +// +// Plane indices for Y / U / V are looked up via Layout::find_plane<C>(), +// so swap_uv layouts (YVU vs YUV) work without separate templates. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct PlanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static YUV16 read(const Buffer<3>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane] + + x * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const uint8_t* up = buf.data[u_plane] + cy * buf.stride[u_plane] + + cx * UP::bytes_per_pixel; + const uint8_t* vp = buf.data[v_plane] + cy * buf.stride[v_plane] + + cx * VP::bytes_per_pixel; + const auto u_vals = UP::unpack(detail::load_word<UP>(up)); + const auto v_vals = VP::unpack(detail::load_word<VP>(vp)); + + return YUV16{ + detail::decode_norm(YP::comps[0].bits, y_vals[0]), + detail::decode_norm(UP::comps[0].bits, u_vals[0]), + detail::decode_norm(VP::comps[0].bits, v_vals[0]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct PlanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t block_h = L::v_sub; + static constexpr size_t block_w = L::h_sub; + + static void write_block(Buffer<3>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y per pixel. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[y_plane] + + (by + dy) * buf.stride[y_plane]; + for (size_t dx = 0; dx < block_w; ++dx) { + std::array<uint16_t, YP::num_comps> v{}; + v[0] = detail::encode_norm(YP::comps[0].bits, block[dy][dx].y); + detail::store_word<YP>( + y_row + (bx + dx) * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One averaged U and V sample per block. Integer truncation + // (no round-half-up). + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < block_w; ++dx) { + u_sum += block[dy][dx].u; + v_sum += block[dy][dx].v; + } + } + constexpr uint32_t n = block_h * block_w; + + const size_t cx = bx / L::h_sub; + const size_t cy = by / L::v_sub; + + std::array<uint16_t, UP::num_comps> uw{}; + uw[0] = detail::encode_norm(UP::comps[0].bits, uint16_t(u_sum / n)); + detail::store_word<UP>( + buf.data[u_plane] + cy * buf.stride[u_plane] + + cx * UP::bytes_per_pixel, + UP::pack(uw)); + + std::array<uint16_t, VP::num_comps> vw{}; + vw[0] = detail::encode_norm(VP::comps[0].bits, uint16_t(v_sum / n)); + detail::store_word<VP>( + buf.data[v_plane] + cy * buf.stride[v_plane] + + cx * VP::bytes_per_pixel, + VP::pack(vw)); + } +}; + +// T430-style 3-plane multi-pixel-per-word planar 4:4:4. Each plane has +// `ppw` samples of the same component (Y in plane 0, U in 1, V in 2 — +// or whichever ordering find_plane resolves) packed into a single +// storage word. block_w = ppw, block_h = 1. No chroma subsampling. +template <typename L> +struct MultiPixelPlanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + static_assert(L::h_sub == 1 && L::v_sub == 1); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t ppw = YP::template component_count<C::Y>(); + static_assert(ppw == UP::template component_count<C::U>()); + static_assert(ppw == VP::template component_count<C::V>()); + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits; + + static YUV16 read(const Buffer<3>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const size_t gx = x / ppw; + const size_t off = x % ppw; + + const uint8_t* yp = buf.data[y_plane] + y * buf.stride[y_plane] + + gx * YP::bytes_per_pixel; + const uint8_t* up = buf.data[u_plane] + y * buf.stride[u_plane] + + gx * UP::bytes_per_pixel; + const uint8_t* vp = buf.data[v_plane] + y * buf.stride[v_plane] + + gx * VP::bytes_per_pixel; + + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + const auto u_vals = UP::unpack(detail::load_word<UP>(up)); + const auto v_vals = VP::unpack(detail::load_word<VP>(vp)); + + return YUV16{ + detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(off)]), + detail::decode_norm(u_bits, u_vals[UP::template find_pos<C::U>(off)]), + detail::decode_norm(v_bits, v_vals[VP::template find_pos<C::V>(off)]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelPlanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 3); + static_assert(L::h_sub == 1 && L::v_sub == 1); + + static constexpr size_t y_plane = L::template find_plane<C::Y>(); + static constexpr size_t u_plane = L::template find_plane<C::U>(); + static constexpr size_t v_plane = L::template find_plane<C::V>(); + + using YP = typename L::template plane<y_plane>; + using UP = typename L::template plane<u_plane>; + using VP = typename L::template plane<v_plane>; + + static constexpr size_t ppw = YP::template component_count<C::Y>(); + + static constexpr size_t y_x_idx = YP::template find_pos<C::X>(); + static constexpr size_t u_x_idx = UP::template find_pos<C::X>(); + static constexpr size_t v_x_idx = VP::template find_pos<C::X>(); + static constexpr bool y_has_x = (y_x_idx < YP::num_comps); + static constexpr bool u_has_x = (u_x_idx < UP::num_comps); + static constexpr bool v_has_x = (v_x_idx < VP::num_comps); + + static constexpr size_t block_h = 1; + static constexpr size_t block_w = ppw; + + static void write_block(Buffer<3>& buf, size_t bx, size_t by, + const YUV16 (&block)[1][ppw]) noexcept + { + std::array<uint16_t, YP::num_comps> yv{}; + std::array<uint16_t, UP::num_comps> uv{}; + std::array<uint16_t, VP::num_comps> vv{}; + + // All same-tag positions share the same bit width. + constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + constexpr unsigned u_bits = UP::comps[UP::template find_pos<C::U>(0)].bits; + constexpr unsigned v_bits = VP::comps[VP::template find_pos<C::V>(0)].bits; + for (size_t i = 0; i < ppw; ++i) { + yv[YP::template find_pos<C::Y>(i)] = + detail::encode_norm(y_bits, block[0][i].y); + uv[UP::template find_pos<C::U>(i)] = + detail::encode_norm(u_bits, block[0][i].u); + vv[VP::template find_pos<C::V>(i)] = + detail::encode_norm(v_bits, block[0][i].v); + } + + if constexpr (y_has_x) yv[y_x_idx] = 0; + if constexpr (u_has_x) uv[u_x_idx] = 0; + if constexpr (v_has_x) vv[v_x_idx] = 0; + + const size_t gx = bx / ppw; + detail::store_word<YP>( + buf.data[y_plane] + by * buf.stride[y_plane] + + gx * YP::bytes_per_pixel, + YP::pack(yv)); + detail::store_word<UP>( + buf.data[u_plane] + by * buf.stride[u_plane] + + gx * UP::bytes_per_pixel, + UP::pack(uv)); + detail::store_word<VP>( + buf.data[v_plane] + by * buf.stride[v_plane] + + gx * VP::bytes_per_pixel, + VP::pack(vv)); + } +}; + +} // namespace pixpat diff --git a/subprojects/pixpat/pixpat-native/src/io/semiplanar.h b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h new file mode 100644 index 0000000..00e7731 --- /dev/null +++ b/subprojects/pixpat/pixpat-native/src/io/semiplanar.h @@ -0,0 +1,242 @@ +#pragma once + +// 2-plane semiplanar YUV. Two flavours: +// +// SemiplanarSource / SemiplanarSink — NV12/NV21/NV16/NV61, single +// pixel per Y storage word, single chroma pair per chroma word. +// +// MultiPixelSemiplanarSource / MultiPixelSemiplanarSink — P030/P230, +// multiple Y pixels per Y word and multiple chroma pairs per +// chroma word. The Y plane has `ppw_y = component_count<Y>()` Y +// samples per storage word; the chroma plane has `pairs = +// component_count<U>()` U/V pairs per storage word. block_w = +// pairs × h_sub, block_h = v_sub — each block exactly fills one +// chroma word. + +#include <array> + +#include "../layout.h" +#include "detail.h" + +namespace pixpat +{ + +template <typename L> +struct SemiplanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t y_idx = YP::template find_pos<C::Y>(); + static constexpr size_t u_idx = CP::template find_pos<C::U>(); + static constexpr size_t v_idx = CP::template find_pos<C::V>(); + + static YUV16 read(const Buffer<2>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + const uint8_t* yp = buf.data[0] + y * buf.stride[0] + x * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel; + const auto c_vals = CP::unpack(detail::load_word<CP>(cp)); + + return YUV16{ + detail::decode_norm(YP::comps[y_idx].bits, y_vals[y_idx]), + detail::decode_norm(CP::comps[u_idx].bits, c_vals[u_idx]), + detail::decode_norm(CP::comps[v_idx].bits, c_vals[v_idx]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct SemiplanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t y_idx = YP::template find_pos<C::Y>(); + static constexpr size_t u_idx = CP::template find_pos<C::U>(); + static constexpr size_t v_idx = CP::template find_pos<C::V>(); + + static constexpr size_t block_h = L::v_sub; + static constexpr size_t block_w = L::h_sub; + + static void write_block(Buffer<2>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y per pixel. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[0] + (by + dy) * buf.stride[0]; + for (size_t dx = 0; dx < block_w; ++dx) { + std::array<uint16_t, YP::num_comps> v{}; + v[y_idx] = detail::encode_norm(YP::comps[y_idx].bits, + block[dy][dx].y); + detail::store_word<YP>( + y_row + (bx + dx) * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One averaged UV pair for the whole block. Integer truncation + // (no round-half-up). + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < block_w; ++dx) { + u_sum += block[dy][dx].u; + v_sum += block[dy][dx].v; + } + } + constexpr uint32_t n = block_h * block_w; + const uint16_t u_avg = uint16_t(u_sum / n); + const uint16_t v_avg = uint16_t(v_sum / n); + + std::array<uint16_t, CP::num_comps> uv{}; + uv[u_idx] = detail::encode_norm(CP::comps[u_idx].bits, u_avg); + uv[v_idx] = detail::encode_norm(CP::comps[v_idx].bits, v_avg); + + const size_t cx = bx / L::h_sub; + const size_t cy = by / L::v_sub; + uint8_t* cp = buf.data[1] + cy * buf.stride[1] + cx * CP::bytes_per_pixel; + detail::store_word<CP>(cp, CP::pack(uv)); + } +}; + +// Multi-pixel-per-word semiplanar (P030: 4:2:0, P230: 4:2:2). All Y +// components share the same bit width; same for U and V. +template <typename L> +struct MultiPixelSemiplanarSource { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t ppw_y = YP::template component_count<C::Y>(); + static constexpr size_t pairs = CP::template component_count<C::U>(); + static_assert(ppw_y >= 1 && pairs >= 1); + static_assert(pairs == CP::template component_count<C::V>()); + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits; + + static YUV16 read(const Buffer<2>& buf, size_t x, size_t y, + [[maybe_unused]] size_t W, + [[maybe_unused]] size_t H) noexcept + { + // Y read. + const size_t y_gx = x / ppw_y; + const size_t y_off = x % ppw_y; + const uint8_t* yp = buf.data[0] + y * buf.stride[0] + + y_gx * YP::bytes_per_pixel; + const auto y_vals = YP::unpack(detail::load_word<YP>(yp)); + + // Chroma read. + const size_t cx = x / L::h_sub; + const size_t cy = y / L::v_sub; + const size_t c_gx = cx / pairs; + const size_t c_off = cx % pairs; + const uint8_t* cp = buf.data[1] + cy * buf.stride[1] + + c_gx * CP::bytes_per_pixel; + const auto c_vals = CP::unpack(detail::load_word<CP>(cp)); + + return YUV16{ + detail::decode_norm(y_bits, y_vals[YP::template find_pos<C::Y>(y_off)]), + detail::decode_norm(u_bits, c_vals[CP::template find_pos<C::U>(c_off)]), + detail::decode_norm(v_bits, c_vals[CP::template find_pos<C::V>(c_off)]), + uint16_t(0), + }; + } +}; + +template <typename L> +struct MultiPixelSemiplanarSink { + using Layout = L; + using Pixel = YUV16; + + static_assert(L::kind == ColorKind::YUV); + static_assert(L::num_planes == 2); + + using YP = typename L::template plane<0>; + using CP = typename L::template plane<1>; + static constexpr size_t ppw_y = YP::template component_count<C::Y>(); + static constexpr size_t pairs = CP::template component_count<C::U>(); + static_assert(ppw_y >= 1 && pairs >= 1); + + // One block exactly fills one chroma word: `pairs` chroma pairs, + // each covering h_sub luma columns × v_sub rows. + static constexpr size_t block_w = pairs * L::h_sub; + static constexpr size_t block_h = L::v_sub; + static_assert(block_w % ppw_y == 0, + "block width must be a multiple of Y-pixels-per-word"); + static constexpr size_t y_words_per_row = block_w / ppw_y; + + // All same-tag positions share the same bit width. + static constexpr unsigned y_bits = YP::comps[YP::template find_pos<C::Y>(0)].bits; + static constexpr unsigned u_bits = CP::comps[CP::template find_pos<C::U>(0)].bits; + static constexpr unsigned v_bits = CP::comps[CP::template find_pos<C::V>(0)].bits; + + static void write_block(Buffer<2>& buf, size_t bx, size_t by, + const YUV16 (&block)[block_h][block_w]) noexcept + { + // Y plane: y_words_per_row Y-words per row, block_h rows. + for (size_t dy = 0; dy < block_h; ++dy) { + uint8_t* y_row = buf.data[0] + + (by + dy) * buf.stride[0]; + for (size_t w = 0; w < y_words_per_row; ++w) { + std::array<uint16_t, YP::num_comps> v{}; + for (size_t i = 0; i < ppw_y; ++i) { + const size_t pos = YP::template find_pos<C::Y>(i); + v[pos] = detail::encode_norm( + y_bits, block[dy][w * ppw_y + i].y); + } + detail::store_word<YP>( + y_row + (bx / ppw_y + w) + * YP::bytes_per_pixel, + YP::pack(v)); + } + } + + // One UV-word: `pairs` chroma pairs. Each pair averages h_sub + // horizontally × v_sub vertically luma values. + std::array<uint16_t, CP::num_comps> uv{}; + constexpr uint32_t n = L::h_sub * L::v_sub; + for (size_t p = 0; p < pairs; ++p) { + uint32_t u_sum = 0, v_sum = 0; + for (size_t dy = 0; dy < block_h; ++dy) { + for (size_t dx = 0; dx < L::h_sub; ++dx) { + u_sum += block[dy][p * L::h_sub + dx].u; + v_sum += block[dy][p * L::h_sub + dx].v; + } + } + uv[CP::template find_pos<C::U>(p)] = + detail::encode_norm(u_bits, uint16_t(u_sum / n)); + uv[CP::template find_pos<C::V>(p)] = + detail::encode_norm(v_bits, uint16_t(v_sum / n)); + } + + const size_t cy = by / L::v_sub; + const size_t uv_word_idx = bx / block_w; + detail::store_word<CP>( + buf.data[1] + cy * buf.stride[1] + + uv_word_idx * CP::bytes_per_pixel, + CP::pack(uv)); + } +}; + +} // namespace pixpat |
