From f758e324e17b52116075bb9175a3dd03d223a424 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Sun, 2 Feb 2025 11:26:23 +0200 Subject: kms++util: New template based conversion & testpat code New templated conversion and test pattern code. Signed-off-by: Tomi Valkeinen --- kms++util/src/conv-common.h | 177 +++++++++++++++++++++ kms++util/src/conv-rgb.h | 300 +++++++++++++++++++++++++++++++++++ kms++util/src/conv-yuv-packed.h | 83 ++++++++++ kms++util/src/conv-yuv-planar.h | 214 +++++++++++++++++++++++++ kms++util/src/conv-yuv-semiplanar.h | 214 +++++++++++++++++++++++++ kms++util/src/conv-yuv.h | 147 ++++++++++++++++++ kms++util/src/conv.h | 7 + kms++util/src/testpat.cpp | 301 +++++++++++++++++++++++------------- 8 files changed, 1339 insertions(+), 104 deletions(-) create mode 100644 kms++util/src/conv-common.h create mode 100644 kms++util/src/conv-rgb.h create mode 100644 kms++util/src/conv-yuv-packed.h create mode 100644 kms++util/src/conv-yuv-planar.h create mode 100644 kms++util/src/conv-yuv-semiplanar.h create mode 100644 kms++util/src/conv-yuv.h create mode 100644 kms++util/src/conv.h (limited to 'kms++util/src') diff --git a/kms++util/src/conv-common.h b/kms++util/src/conv-common.h new file mode 100644 index 0000000..837668f --- /dev/null +++ b/kms++util/src/conv-common.h @@ -0,0 +1,177 @@ +#pragma once + +#include +#include + +#define MDSPAN_IMPL_STANDARD_NAMESPACE md +#define MDSPAN_IMPL_PROPOSED_NAMESPACE exp + +#include + +namespace kms +{ + +/* + * Helpers + */ + +template +constexpr auto make_strided_view(T* data, size_t rows, size_t cols, size_t row_stride_bytes) +{ + assert(row_stride_bytes % sizeof(T) == 0 && "Row stride must be aligned to element size"); + + size_t row_stride = row_stride_bytes / sizeof(T); + std::array strides = { row_stride, 1 }; + + auto layout = md::layout_stride::mapping(md::dextents(rows, cols), strides); + + return md::mdspan, md::layout_stride>(data, layout); +} + +template +constexpr auto make_strided_fb_view(void* data, size_t rows, size_t cols, size_t row_stride_bytes) +{ + return make_strided_view(reinterpret_cast(data), rows, cols, row_stride_bytes); +} + +// Helper for static loop unrolling +template +constexpr void static_for(F&& f) +{ + [&](std::index_sequence) { + (f(std::integral_constant{}), ...); + }(std::make_index_sequence{}); +} + +template +concept HasIndexOperatorReturning = requires(T t) { + { t[0] } -> std::convertible_to; +}; + +template +concept Is2Dspan = std::convertible_to>>; + +/* + * Packing and Layouts + */ + +/* This type must be big enough to hold any of the components' value */ +using component_storage_type = uint16_t; + +enum class ComponentType { + X, A, + R, G, B, + Y, Cb, Cr, + Y0, Y1, Y2, + Cb0, Cb1, Cb2, + Cr0, Cr1, Cr2, +}; + +// Describes a single component's bit layout +template +struct ComponentLayout { + static constexpr size_t size = BitSize; + static constexpr size_t offset = BitOffset; + static constexpr ComponentType type = Type; + + static_assert(sizeof(component_storage_type) * 8 >= size); + + static constexpr auto get_mask() { return ((1ull << BitSize) - 1) << BitOffset; } + + template + static constexpr TStorage pack_value(TStorage value) + { + return (value & ((1ull << BitSize) - 1)) << BitOffset; + } + + template + static constexpr component_storage_type unpack_value(TStorage value) + { + return (value >> BitOffset) & ((1ull << BitSize) - 1); + } +}; + +// Describes N components packed into a storage type +template +struct PlaneLayout { + using components_tuple = std::tuple; + + static constexpr size_t num_components = sizeof...(Components); + + static constexpr size_t total_bits = (Components::size + ...); + static constexpr size_t storage_bits = sizeof(TStorage) * 8; + static_assert(total_bits <= storage_bits, "Components don't fit in storage type"); + + template + static constexpr size_t component_size = std::tuple_element_t::size; + + using storage_type = TStorage; + + static constexpr std::array order = { + Components::type... + }; + + template + static constexpr size_t component_count() + { + return std::ranges::count(order, C); + } + + template + static constexpr size_t find_pos() + { + return std::ranges::find(order, C) - order.begin(); + } + + template + static constexpr size_t find_nth_pos(size_t n = 0) + { + auto it = std::ranges::find_if( + order, [&n](ComponentType type) mutable { + return type == C && n-- == 0; + }); + return it - order.begin(); + } + + // Pack values into storage type (separate parameters version) + template + static constexpr TStorage pack(Values... values) + { + static_assert(sizeof...(values) == num_components, + "Number of values must match number of components"); + return (Components::template pack_value(values) | ...); + } + + // Pack values into storage type (std:array version) + template + static constexpr TStorage pack(const std::array& values) + { + static_assert(N == num_components, + "Number of values must match number of components"); + return [&](std::index_sequence) { + return (Components::template pack_value(values[I]) | ...); + }(std::make_index_sequence{}); + } + + static constexpr std::array + unpack(TStorage value) + { + return [&](std::index_sequence) { + return std::array{ + std::tuple_element_t::template unpack_value< + TStorage>(value)... + }; + }(std::make_index_sequence{}); + } +}; + +template +class FormatLayout +{ +public: + static constexpr size_t num_planes = sizeof...(Planes); + + template using plane = std::tuple_element_t>; +}; + +} // namespace kms diff --git a/kms++util/src/conv-rgb.h b/kms++util/src/conv-rgb.h new file mode 100644 index 0000000..e03ded1 --- /dev/null +++ b/kms++util/src/conv-rgb.h @@ -0,0 +1,300 @@ +#pragma once + +#include + +#include +#include + +#include "conv-common.h" + +namespace kms +{ + +/* + * RGB + */ + +template +struct RGB_16_Layout + : public FormatLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout>> +{ +}; + +using XRGB4444_Layout = RGB_16_Layout< + ComponentType::X, 4, 12, + ComponentType::R, 4, 8, + ComponentType::G, 4, 4, + ComponentType::B, 4, 0 +>; + +using ARGB4444_Layout = RGB_16_Layout< + ComponentType::A, 4, 12, + ComponentType::R, 4, 8, + ComponentType::G, 4, 4, + ComponentType::B, 4, 0 +>; + +using XRGB1555_Layout = RGB_16_Layout< + ComponentType::X, 1, 15, + ComponentType::R, 5, 10, + ComponentType::G, 5, 5, + ComponentType::B, 5, 0 +>; + +using ARGB1555_Layout = RGB_16_Layout< + ComponentType::A, 1, 15, + ComponentType::R, 5, 10, + ComponentType::G, 5, 5, + ComponentType::B, 5, 0 +>; + +using RGB565_Layout = FormatLayout, + ComponentLayout, + ComponentLayout> +>; + +using BGR565_Layout = FormatLayout, + ComponentLayout, + ComponentLayout> +>; + +using RGB888_Layout = FormatLayout, + ComponentLayout, + ComponentLayout> +>; + +using BGR888_Layout = FormatLayout, + ComponentLayout, + ComponentLayout> +>; + +template +struct RGB_32_Layout + : public FormatLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout>> +{ +}; + +template +struct RGB_8_32_Layout + : public RGB_32_Layout< + C0, 8, 0, + C1, 8, 8, + C2, 8, 16, + C3, 8, 24> +{ +}; + +using XRGB8888_Layout = RGB_8_32_Layout; +using ARGB8888_Layout = RGB_8_32_Layout; + +using XBGR8888_Layout = RGB_8_32_Layout; +using ABGR8888_Layout = RGB_8_32_Layout; + +using RGBX8888_Layout = RGB_8_32_Layout; +using RGBA8888_Layout = RGB_8_32_Layout; + +using BGRX8888_Layout = RGB_8_32_Layout; +using BGRA8888_Layout = RGB_8_32_Layout; + +using XRGB2101010_Layout = RGB_32_Layout< + ComponentType::X, 2, 30, + ComponentType::R, 10, 20, + ComponentType::G, 10, 10, + ComponentType::B, 10, 0 +>; + +using ARGB2101010_Layout = RGB_32_Layout< + ComponentType::A, 2, 30, + ComponentType::R, 10, 20, + ComponentType::G, 10, 10, + ComponentType::B, 10, 0 +>; + +using XBGR2101010_Layout = RGB_32_Layout< + ComponentType::X, 2, 30, + ComponentType::B, 10, 20, + ComponentType::G, 10, 10, + ComponentType::R, 10, 0 +>; + +using ABGR2101010_Layout = RGB_32_Layout< + ComponentType::A, 2, 30, + ComponentType::B, 10, 20, + ComponentType::G, 10, 10, + ComponentType::R, 10, 0 +>; + +using RGBX1010102_Layout = RGB_32_Layout< + ComponentType::R, 10, 22, + ComponentType::G, 10, 12, + ComponentType::B, 10, 2, + ComponentType::X, 2, 0 +>; + +using RGBA1010102_Layout = RGB_32_Layout< + ComponentType::R, 10, 22, + ComponentType::G, 10, 12, + ComponentType::B, 10, 2, + ComponentType::A, 2, 0 +>; + +using BGRX1010102_Layout = RGB_32_Layout< + ComponentType::B, 10, 22, + ComponentType::G, 10, 12, + ComponentType::R, 10, 2, + ComponentType::X, 2, 0 +>; + +using BGRA1010102_Layout = RGB_32_Layout< + ComponentType::B, 10, 22, + ComponentType::G, 10, 12, + ComponentType::R, 10, 2, + ComponentType::A, 2, 0 +>; + +template +class ARGB_Writer +{ + using Plane = Layout::template plane<0>; + using TStorage = Plane::storage_type; + + static_assert(Layout::num_planes == 1); + static_assert(Plane::num_components == 3 || Plane::num_components == 4); + + static_assert(Plane::template component_count() == 1); + static_assert(Plane::template component_count() == 1); + static_assert(Plane::template component_count() == 1); + + static constexpr bool has_alpha = Plane::template component_count(); + static constexpr bool has_padding = Plane::template component_count(); + + static constexpr bool needs_packed_access = Plane::total_bits != Plane::storage_bits; + + static constexpr size_t a_idx = Plane::template find_pos(); + static constexpr size_t x_idx = Plane::template find_pos(); + static constexpr size_t r_idx = Plane::template find_pos(); + static constexpr size_t g_idx = Plane::template find_pos(); + static constexpr size_t b_idx = Plane::template find_pos(); + + static constexpr size_t a_shift = has_alpha ? 16 - Plane::template component_size : 0; + static constexpr size_t x_shift = has_padding ? 16 - Plane::template component_size : 0; + static constexpr size_t r_shift = 16 - Plane::template component_size; + static constexpr size_t g_shift = 16 - Plane::template component_size; + static constexpr size_t b_shift = 16 - Plane::template component_size; + + static_assert(Plane::total_bits % 8 == 0); + static constexpr size_t bytes_per_pixel = Plane::total_bits / 8; + +public: + // Pack and write num_pixels pixels from src_line to dst_line + static void pack_line(HasIndexOperatorReturning auto&& dst_line, + HasIndexOperatorReturning auto&& src_line, + size_t num_pixels) + { + for (size_t x = 0; x < num_pixels; x++) { + const RGB16& pix = src_line[x]; + + std::array + components; + + if constexpr (has_alpha) + components[a_idx] = pix.a >> a_shift; + + if constexpr (has_padding) + components[x_idx] = 0; + + components[r_idx] = pix.r >> r_shift; + components[g_idx] = pix.g >> g_shift; + components[b_idx] = pix.b >> b_shift; + + if constexpr (!needs_packed_access) { + dst_line[x] = Plane::pack(components); + } else { + auto dst_bytes = reinterpret_cast(&dst_line[0]); + + TStorage packed = Plane::pack(components); + + memcpy(dst_bytes + x * bytes_per_pixel, &packed, + bytes_per_pixel); + } + } + } + + // Read and unpack num_pixels pixels from src_line to dst_line + static void unpack_line(HasIndexOperatorReturning auto&& dst_line, + HasIndexOperatorReturning auto&& src_line, + size_t num_pixels) + { + for (size_t x = 0; x < num_pixels; x++) { + decltype(Plane::unpack(src_line[x])) components; + + if constexpr (!needs_packed_access) { + components = Plane::unpack(src_line[x]); + } else { + auto src_bytes = + reinterpret_cast(&src_line[0]); + TStorage packed; + + memcpy(&packed, src_bytes + x * bytes_per_pixel, + bytes_per_pixel); + + components = Plane::unpack(packed); + } + + dst_line[x] = RGB16 { + static_cast(components[r_idx] << r_shift), + static_cast(components[g_idx] << g_shift), + static_cast(components[b_idx] << b_shift), + static_cast(has_alpha ? components[a_idx] << a_shift : 0), + }; + } + } + + static void write_pattern(IFramebuffer& fb, size_t start_y, size_t end_y, + auto&& generate_line) + { + std::vector linebuf(fb.width()); + + // View to the plane + auto view = make_strided_fb_view(fb.map(0), fb.height(), fb.width(), + fb.stride(0)); + + for (size_t y_src = start_y; y_src <= end_y; y_src++) { + generate_line(y_src, linebuf); + + auto dst = md::submdspan(view, y_src, md::full_extent); + + pack_line(dst, linebuf, fb.width()); + } + } + + static void get_line(IFramebuffer& fb, size_t w, size_t h, size_t row, std::span linebuf) + { + auto view = make_strided_fb_view(fb.map(0), fb.height(), fb.width(), + fb.stride(0)); + + auto src = md::submdspan(view, row, md::full_extent); + + unpack_line(linebuf, src); + } +}; + +} // namespace kms diff --git a/kms++util/src/conv-yuv-packed.h b/kms++util/src/conv-yuv-packed.h new file mode 100644 index 0000000..3aa01fa --- /dev/null +++ b/kms++util/src/conv-yuv-packed.h @@ -0,0 +1,83 @@ +#pragma once + +#include + +#include +#include + +#include "conv-common.h" + +namespace kms +{ + +/* YUV Packed */ + +template +struct YUV_Packed_Format + : public FormatLayout< + PlaneLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout + > + > +{ +}; + +// Define common packed YUV formats +using YUYV_Layout = YUV_Packed_Format; + +using YVYU_Layout = YUV_Packed_Format; + +using UYVY_Layout = YUV_Packed_Format; + +using VYUY_Layout = YUV_Packed_Format; + +template +class YUVPackedWriter +{ + using Plane = Layout::template plane<0>; + using TStorage = Plane::storage_type; + + static constexpr size_t y0_pos = Plane::template find_pos(); + static constexpr size_t y1_pos = Plane::template find_pos(); + static constexpr size_t cb_pos = Plane::template find_pos(); + static constexpr size_t cr_pos = Plane::template find_pos(); + +public: + static void write_pattern(IFramebuffer& fb, size_t start_y, size_t end_y, + auto&& generate_line) + { + std::vector linebuf(fb.width()); + + auto view = make_strided_fb_view(fb.map(0), fb.height(), + fb.width() / 2, // Two pixels per storage unit + fb.stride(0)); + + for (size_t y = start_y; y <= end_y; y++) { + generate_line(y, linebuf); + + for (size_t x = 0; x < fb.width(); x += 2) { + // Get two pixels + const YUV16& pix0 = linebuf[x]; + const YUV16& pix1 = linebuf[x + 1]; + + std::array components; + + components[y0_pos] = pix0.y >> 8; + components[y1_pos] = pix1.y >> 8; + components[cb_pos] = ((pix0.u + pix1.u) / 2) >> 8; + components[cr_pos] = ((pix0.v + pix1.v) / 2) >> 8; + + view(y, x / 2) = Plane::pack(components); + } + } + } +}; + +} // namespace kms diff --git a/kms++util/src/conv-yuv-planar.h b/kms++util/src/conv-yuv-planar.h new file mode 100644 index 0000000..0ece6f7 --- /dev/null +++ b/kms++util/src/conv-yuv-planar.h @@ -0,0 +1,214 @@ +#pragma once + +#include + +#include +#include + +#include "conv-common.h" + +namespace kms +{ +/* YUV Planar */ + +template +class YUV_Planar_Layout + : public FormatLayout< + PlaneLayout>, + PlaneLayout>, + PlaneLayout> + > +{ +public: + static constexpr std::array uv_order = { P1, P2 }; + static constexpr size_t h_sub = HSubUV; + static constexpr size_t v_sub = VSubUV; + + template + static constexpr size_t find_plane() + { + return 1 + (std::ranges::find(uv_order, P) - uv_order.begin()); + } + + static constexpr size_t y_plane = 0; // Y is always plane 0 + static constexpr size_t cb_plane = find_plane(); + static constexpr size_t cr_plane = find_plane(); +}; + +using YUV444_Layout = YUV_Planar_Layout; +using YVU444_Layout = YUV_Planar_Layout; +using YUV422_Layout = YUV_Planar_Layout; +using YVU422_Layout = YUV_Planar_Layout; +using YUV420_Layout = YUV_Planar_Layout; +using YVU420_Layout = YUV_Planar_Layout; + +template +class YUVPlanarWriter +{ + using YLayout = Format::template plane; + using CbLayout = Format::template plane; + using CrLayout = Format::template plane; + + using TY = typename YLayout::storage_type; + using TCb = typename CbLayout::storage_type; + using TCr = typename CrLayout::storage_type; +public: + static void write_pattern(IFramebuffer& fb, size_t start_y, size_t end_y, + auto&& generate_line) + { + assert(start_y % Format::v_sub == 0); + assert((end_y + 1) % Format::v_sub == 0); + + // Line buffers + std::vector linebuf_storage(fb.width() * Format::v_sub); + auto linebuf = md::mdspan(linebuf_storage.data(), Format::v_sub, fb.width()); + + // Views to all planes + auto y_buf = make_strided_fb_view(fb.map(Format::y_plane), fb.height(), + fb.width(), fb.stride(Format::y_plane)); + + auto cb_buf = make_strided_fb_view(fb.map(Format::cb_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cb_plane)); + + auto cr_buf = make_strided_fb_view(fb.map(Format::cr_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cr_plane)); + + for (size_t y_src = start_y; y_src <= end_y; y_src++) { + size_t y_offset = y_src % Format::v_sub; + + if (y_offset == 0) { + // Fill line buffers + for (size_t buf_y = 0; buf_y < Format::v_sub; buf_y++) { + auto line = md::submdspan(linebuf, buf_y, md::full_extent); + std::span span(line.data_handle(), line.size()); + generate_line(y_src + buf_y, span); + } + } + + // Write Y plane + write_y_line(y_buf, y_src, linebuf, y_offset); + + // Write Cb/Cr planes if we're at a subsampling boundary + if (y_offset == 0) + write_uv_line(cb_buf, cr_buf, linebuf, y_src); + } + } + + static void write_lines(IFramebuffer& fb, size_t start_y, size_t end_y, + Is2Dspan auto&& lines) + { + const size_t height = end_y - start_y + 1; + + if (lines.extent(0) < height || lines.extent(1) < fb.width()) + throw std::invalid_argument("Source line buffer too small"); + + assert(start_y % Format::v_sub == 0); + assert((end_y + 1) % Format::v_sub == 0); + + // Views to all planes + auto y_buf = make_strided_fb_view(fb.map(Format::y_plane), fb.height(), + fb.width(), fb.stride(Format::y_plane)); + + auto cb_buf = make_strided_fb_view(fb.map(Format::cb_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cb_plane)); + + auto cr_buf = make_strided_fb_view(fb.map(Format::cr_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cr_plane)); + + for (size_t y_src = start_y; y_src <= end_y; y_src++) { + size_t y_offset = y_src % Format::v_sub; + + // Write Y plane + write_y_line(y_buf, y_src, lines, y_offset); + + // Write Cb/Cr planes if we're at a subsampling boundary + if (y_offset == 0) + write_uv_line(cb_buf, cr_buf, lines, y_src); + } + } + +private: + template + static void write_y_line(YBuf& y_buf, size_t y_src, auto& linebuf, size_t y_offset) + { + for (size_t x = 0; x < linebuf.extent(1); x++) + y_buf(y_src, x) = YLayout::pack(linebuf(y_offset, x).y >> 8); + } + + template + static void write_uv_line(UVBuf& cb_buf, UVBuf& cr_buf, auto& linebuf, size_t y_src) + { + const size_t uv_y = y_src / Format::v_sub; + + for (size_t x = 0; x < linebuf.extent(1); x += Format::h_sub) { + // Average subsampled region + uint32_t u_sum = 0, v_sum = 0; + for (size_t y = 0; y < Format::v_sub; y++) { + for (size_t x_off = 0; x_off < Format::h_sub; x_off++) { + u_sum += linebuf(y, x + x_off).u; + v_sum += linebuf(y, x + x_off).v; + } + } + + const size_t total_samples = Format::h_sub * Format::v_sub; + const size_t uv_x = x / Format::h_sub; + + cb_buf(uv_y, uv_x) = CbLayout::pack(u_sum / total_samples >> 8); + cr_buf(uv_y, uv_x) = CrLayout::pack(v_sum / total_samples >> 8); + } + } + +public: + static void read_lines(IFramebuffer& fb, size_t start_y, size_t end_y, + Is2Dspan auto&& dest) + { + const size_t height = end_y - start_y + 1; + + if (dest.extent(0) < height || dest.extent(1) < fb.width()) + throw std::invalid_argument("Destination line buffer too small"); + + assert(start_y % Format::v_sub == 0); + assert((end_y + 1) % Format::v_sub == 0); + + auto y_buf = make_strided_fb_view(fb.map(Format::y_plane), fb.height(), + fb.width(), fb.stride(Format::y_plane)); + + auto cb_buf = make_strided_fb_view(fb.map(Format::cb_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cb_plane)); + + auto cr_buf = make_strided_fb_view(fb.map(Format::cr_plane), + fb.height() / Format::v_sub, + fb.width() / Format::h_sub, + fb.stride(Format::cr_plane)); + + for (size_t y = start_y; y <= end_y; y++) { + for (size_t x = 0; x < fb.width(); x++) { + const auto y_val = YLayout::unpack(y_buf(y, x))[0]; + + const size_t uv_y = y / Format::v_sub; + const size_t uv_x = x / Format::h_sub; + + const auto u_val = CbLayout::unpack(cb_buf(uv_y, uv_x))[0]; + const auto v_val = CrLayout::unpack(cr_buf(uv_y, uv_x))[0]; + + dest(y - start_y, x) = YUV16 { + static_cast(y_val << 8), + static_cast(u_val << 8), + static_cast(v_val << 8) + }; + } + } + } +}; + +} // namespace kms diff --git a/kms++util/src/conv-yuv-semiplanar.h b/kms++util/src/conv-yuv-semiplanar.h new file mode 100644 index 0000000..fa7e0cb --- /dev/null +++ b/kms++util/src/conv-yuv-semiplanar.h @@ -0,0 +1,214 @@ +#pragma once + +#include + +#include +#include + +#include "conv-common.h" + +namespace kms +{ + +/* Semiplanar YUV */ + +template +struct NV12_Family_Layout + : public FormatLayout< + PlaneLayout + >, + PlaneLayout, + ComponentLayout + > + > +{ + static constexpr size_t h_sub = HSub; + static constexpr size_t v_sub = VSub; +}; + +using NV12_Layout = NV12_Family_Layout; +using NV21_Layout = NV12_Family_Layout; + +using NV16_Layout = NV12_Family_Layout; +using NV61_Layout = NV12_Family_Layout; + +template +struct XV15_Family_Layout + : public FormatLayout< + PlaneLayout, + ComponentLayout, + ComponentLayout + >, + PlaneLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout + > + > +{ + static constexpr size_t h_sub = HSub; + static constexpr size_t v_sub = VSub; +}; + +using XV15_Layout = XV15_Family_Layout<2, 2>; +using XV20_Layout = XV15_Family_Layout<2, 1>; + +template +struct SubsampleHelper { + template static constexpr auto subsample(const View& view, size_t group_idx) + { + uint32_t sum = 0; + + static_for<0, HSub>([&](auto x) { sum += view(0, group_idx + x); }); + + if constexpr (VSub > 1) { + static_for<1, VSub>([&](auto y) { + static_for<0, HSub>([&](auto x) { sum += view(y, group_idx + x); }); + }); + } + + return sum / (HSub * VSub); + } +}; + +template +class YUVSemiPlanarWriter +{ + static_assert(Layout::num_planes == 2); + + static constexpr size_t h_sub = Layout::h_sub; + static constexpr size_t v_sub = Layout::v_sub; + + using YLayout = Layout::template plane<0>; + using UVLayout = Layout::template plane<1>; + + using TY = YLayout::storage_type; + using TCrCb = UVLayout::storage_type; + + static constexpr size_t pixels_in_group = YLayout::template component_count(); + static_assert(pixels_in_group == UVLayout::template component_count()); + static_assert(pixels_in_group == UVLayout::template component_count()); + +public: + static void write_pattern(IFramebuffer& fb, size_t start_y, size_t end_y, + auto&& generate_line) + { + assert(start_y % v_sub == 0); + assert((end_y + 1) % v_sub == 0); + if (fb.width() % pixels_in_group != 0) + throw std::invalid_argument("FB width doesn't align to pixel format"); + + // Line buffers + std::vector linebuf_storage(fb.width() * v_sub); + auto linebuf = md::mdspan(linebuf_storage.data(), v_sub, fb.width()); + + // Views to the planes + auto y_view = make_strided_fb_view(fb.map(0), fb.height(), + fb.width() / pixels_in_group, fb.stride(0)); + + auto uv_view = make_strided_fb_view(fb.map(1), fb.height() / v_sub, + fb.width() / pixels_in_group / h_sub, + fb.stride(1)); + + for (size_t y_src = start_y; y_src <= end_y; y_src++) { + size_t y_offset = y_src % v_sub; + + if (y_offset == 0) { + // Fill line buffers + for (size_t y_offset = 0; y_offset < v_sub; y_offset++) { + auto line = md::submdspan(linebuf, y_offset, md::full_extent); + std::span span(line.data_handle(), line.size()); + generate_line(y_src + y_offset, span); + } + } + + // Write Y values from the line buffer + write_y_samples(md::submdspan(y_view, y_src, md::full_extent), + md::submdspan(linebuf, y_offset, md::full_extent)); + + if (y_offset == 0) { + // Write UV values from the line buffers + write_uv_samples(uv_view, linebuf, y_src); + } + } + } + +private: + + template + static void write_y_samples(YBuf&& y_view, auto&& linebuf) + { + for (size_t x_src = 0; x_src < linebuf.extent(1); x_src += pixels_in_group) { + auto x_dst = x_src / pixels_in_group; + + write_y_group(y_view, linebuf, x_src, x_dst, + std::make_index_sequence{}); + } + } + + template + static void write_y_group(YBuf&& y_view, auto&& linebuf, size_t x_src, + size_t x_dst, std::index_sequence) + { + std::array y_values{ + static_cast((linebuf(x_src + I).y >> (16 - YLayout::template component_size)))... + }; + + y_view(x_dst) = YLayout::pack(y_values); + } + + template + static void write_uv_samples(UVBuf& uv_view, auto& linebuf, size_t y_src) + { + for (size_t x_src = 0; x_src < linebuf.extent(1); x_src += pixels_in_group * h_sub) { + const size_t y_offset = 0; + auto y_dst = (y_src + y_offset) / v_sub; + auto x_dst = x_src / (pixels_in_group * h_sub); + + auto group_view = md::submdspan(linebuf, std::tuple(y_offset, y_offset + v_sub), + std::tuple(x_src, x_src + h_sub * pixels_in_group)); + + write_uv_group(uv_view, group_view, y_dst, x_dst, + std::make_index_sequence{}); + } + } + + template + static void write_uv_group(UVBuf& uv_view, auto& group_view, size_t y_dst, size_t x_dst, + std::index_sequence) + { + std::array uv_values; + + ( + [&]() { + constexpr size_t group_idx = i * h_sub; + + constexpr size_t u_idx = UVLayout::template find_nth_pos(i); + constexpr size_t v_idx = UVLayout::template find_nth_pos(i); + + auto u = SubsampleHelper::subsample( + [&group_view](size_t y, size_t x) { return group_view(y, x).u; }, + group_idx); + + auto v = SubsampleHelper::subsample( + [&group_view](size_t y, size_t x) { return group_view(y, x).v; }, + group_idx); + + uv_values[u_idx] = + u >> (16 - UVLayout::template component_size); + uv_values[v_idx] = + v >> (16 - UVLayout::template component_size); + }.template operator()(), + ...); + + uv_view(y_dst, x_dst) = UVLayout::pack(uv_values); + } +}; + +} // namespace kms diff --git a/kms++util/src/conv-yuv.h b/kms++util/src/conv-yuv.h new file mode 100644 index 0000000..4478226 --- /dev/null +++ b/kms++util/src/conv-yuv.h @@ -0,0 +1,147 @@ +#pragma once + +#include + +#include +#include + +#include "conv-common.h" + +namespace kms +{ + +using XVUY2101010_Layout = + FormatLayout, + ComponentLayout, + ComponentLayout, + ComponentLayout>>; + +template +class YUV_Writer +{ + using Plane = Layout::template plane<0>; + using TStorage = Plane::storage_type; + + static_assert(Layout::num_planes == 1); + static_assert(Plane::num_components == 3 || Plane::num_components == 4); + + static_assert(Plane::template component_count() == 1); + static_assert(Plane::template component_count() == 1); + static_assert(Plane::template component_count() == 1); + + static constexpr bool has_alpha = Plane::template component_count(); + static constexpr bool has_padding = Plane::template component_count(); + + static constexpr bool needs_packed_access = Plane::total_bits != Plane::storage_bits; + + static constexpr size_t a_idx = Plane::template find_pos(); + static constexpr size_t x_idx = Plane::template find_pos(); + static constexpr size_t y_idx = Plane::template find_pos(); + static constexpr size_t cb_idx = Plane::template find_pos(); + static constexpr size_t cr_idx = Plane::template find_pos(); + + static constexpr size_t a_shift = has_alpha ? 16 - Plane::template component_size : 0; + static constexpr size_t x_shift = has_padding ? 16 - Plane::template component_size : 0; + static constexpr size_t y_shift = 16 - Plane::template component_size; + static constexpr size_t cb_shift = 16 - Plane::template component_size; + static constexpr size_t cr_shift = 16 - Plane::template component_size; + + static_assert(Plane::total_bits % 8 == 0); + static constexpr size_t bytes_per_pixel = Plane::total_bits / 8; + +public: + // Pack and write num_pixels pixels from src_line to dst_line + static void pack_line(HasIndexOperatorReturning auto&& dst_line, + HasIndexOperatorReturning auto&& src_line, + size_t num_pixels) + { + for (size_t x = 0; x < num_pixels; x++) { + const YUV16& pix = src_line[x]; + + std::array + components; + + if constexpr (has_alpha) + components[a_idx] = pix.a >> a_shift; + + if constexpr (has_padding) + components[x_idx] = 0; + + components[y_idx] = pix.y >> y_shift; + components[cb_idx] = pix.u >> cb_shift; + components[cr_idx] = pix.v >> cr_shift; + + if constexpr (!needs_packed_access) { + dst_line[x] = Plane::pack(components); + } else { + auto dst_bytes = reinterpret_cast(&dst_line[0]); + + TStorage packed = Plane::pack(components); + + memcpy(dst_bytes + x * bytes_per_pixel, &packed, + bytes_per_pixel); + } + } + } + + // Read and unpack num_pixels pixels from src_line to dst_line + static void unpack_line(HasIndexOperatorReturning auto&& dst_line, + HasIndexOperatorReturning auto&& src_line, + size_t num_pixels) + { + for (size_t x = 0; x < num_pixels; x++) { + decltype(Plane::unpack(src_line[x])) components; + + if constexpr (!needs_packed_access) { + components = Plane::unpack(src_line[x]); + } else { + auto src_bytes = + reinterpret_cast(&src_line[0]); + TStorage packed; + + memcpy(&packed, src_bytes + x * bytes_per_pixel, + bytes_per_pixel); + + components = Plane::unpack(packed); + } + + dst_line[x] = YUV16 { + static_cast(components[y_idx] << y_shift), + static_cast(components[cb_idx] << cb_shift), + static_cast(components[cr_idx] << cr_shift), + static_cast(has_alpha ? components[a_idx] << a_shift : 0), + }; + } + } + + static void write_pattern(IFramebuffer& fb, size_t start_y, size_t end_y, + auto&& generate_line) + { + std::vector linebuf(fb.width()); + + // View to the plane + auto view = make_strided_fb_view(fb.map(0), fb.height(), fb.width(), + fb.stride(0)); + + for (size_t y_src = start_y; y_src <= end_y; y_src++) { + generate_line(y_src, linebuf); + + auto dst = md::submdspan(view, y_src, md::full_extent); + + pack_line(dst, linebuf, fb.width()); + } + } + + static void get_line(IFramebuffer& fb, size_t w, size_t h, size_t row, std::span linebuf) + { + auto view = make_strided_fb_view(fb.map(0), fb.height(), fb.width(), + fb.stride(0)); + + auto src = md::submdspan(view, row, md::full_extent); + + unpack_line(linebuf, src); + } +}; + +} // namespace kms diff --git a/kms++util/src/conv.h b/kms++util/src/conv.h new file mode 100644 index 0000000..4b2baa5 --- /dev/null +++ b/kms++util/src/conv.h @@ -0,0 +1,7 @@ +#pragma once + +#include "conv-rgb.h" +#include "conv-yuv.h" +#include "conv-yuv-packed.h" +#include "conv-yuv-semiplanar.h" +#include "conv-yuv-planar.h" diff --git a/kms++util/src/testpat.cpp b/kms++util/src/testpat.cpp index 029684a..05efeab 100644 --- a/kms++util/src/testpat.cpp +++ b/kms++util/src/testpat.cpp @@ -1,8 +1,11 @@ -//#define DRAW_PERF_PRINT - #include -#include +#include +#include +#include +#include +#include +#include #ifdef HAS_PTHREAD #include @@ -11,15 +14,14 @@ #include #include +#include "conv.h" + using namespace std; namespace kms { -static RGB get_test_pattern_pixel(IFramebuffer& fb, unsigned x, unsigned y) +static RGB16 get_test_pattern_pixel_16(size_t w, size_t h, size_t x, size_t y) { - const unsigned w = fb.width(); - const unsigned h = fb.height(); - const unsigned mw = 20; const unsigned xm1 = mw; @@ -29,36 +31,36 @@ static RGB get_test_pattern_pixel(IFramebuffer& fb, unsigned x, unsigned y) // white margin lines if (x == xm1 || x == xm2 || y == ym1 || y == ym2) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); // white box in top left corner else if (x < xm1 && y < ym1) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); // white box outlines to corners else if ((x == 0 || x == w - 1) && (y < ym1 || y > ym2)) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); // white box outlines to corners else if ((y == 0 || y == h - 1) && (x < xm1 || x > xm2)) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); // blue bar on the left else if (x < xm1 && (y > ym1 && y < ym2)) - return RGB(0, 0, 255); + return RGB16::from_8(0, 0, 255); // blue bar on the top else if (y < ym1 && (x > xm1 && x < xm2)) - return RGB(0, 0, 255); + return RGB16::from_8(0, 0, 255); // red bar on the right else if (x > xm2 && (y > ym1 && y < ym2)) - return RGB(255, 0, 0); + return RGB16::from_8(255, 0, 0); // red bar on the bottom else if (y > ym2 && (x > xm1 && x < xm2)) - return RGB(255, 0, 0); + return RGB16::from_8(255, 0, 0); // inside the margins else if (x > xm1 && x < xm2 && y > ym1 && y < ym2) { // diagonal line if (x == y || w - x == h - y) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); // diagonal line else if (w - x - 1 == y || x == h - y - 1) - return RGB(255, 255, 255); + return RGB16::from_8(255, 255, 255); else { int t = (x - xm1 - 1) * 8 / (xm2 - xm1 - 1); unsigned r = 0, g = 0, b = 0; @@ -91,108 +93,203 @@ static RGB get_test_pattern_pixel(IFramebuffer& fb, unsigned x, unsigned y) break; } - return RGB(r, g, b); + return RGB16::from_8(r, g, b); } } else { // black corners - return RGB(0, 0, 0); + return RGB16::from_8(0, 0, 0); } } -static void draw_test_pattern_part(IFramebuffer& fb, unsigned start_y, unsigned end_y, YUVType yuvt) +static void get_test_pattern_line(size_t w, size_t h, size_t row, std::span buf) { - unsigned x, y; - unsigned w = fb.width(); - - const PixelFormatInfo& format_info = get_pixel_format_info(fb.format()); - const PixelFormatPlaneInfo& plane_info = format_info.planes[format_info.num_planes - 1]; - - switch (format_info.type) { - case PixelColorType::RGB: - for (y = start_y; y < end_y; y++) { - for (x = 0; x < w; x++) { - RGB pixel = get_test_pattern_pixel(fb, x, y); - draw_rgb_pixel(fb, x, y, pixel); - } - } + for (size_t x = 0; x < w; ++x) + buf[x] = get_test_pattern_pixel_16(w, h, x, row); +} + +static void get_test_pattern_line_yuv(size_t w, size_t h, size_t row, + std::span buf, + const TestPatternOptions& options) +{ + for (size_t x = 0; x < w; ++x) + buf[x] = get_test_pattern_pixel_16(w, h, x, row) + .to_yuv(options.rec, options.range); +} + +static void get_plain_line_rgb(size_t w, const RGB16& color, std::span buf) +{ + for (size_t x = 0; x < w; ++x) + buf[x] = color; +} + +static void get_plain_line_yuv(size_t w, const YUV16& color, std::span buf) +{ + for (size_t x = 0; x < w; ++x) + buf[x] = color; +} + +static void draw_test_pattern_part(IFramebuffer& fb, size_t start_y, size_t end_y, + const TestPatternOptions& options) +{ + std::optional solid; + + if (options.pattern == "red") + solid = RGB16(0xffff, 0, 0); + else if (options.pattern == "green") + solid = RGB16(0, 0xffff, 0); + else if (options.pattern == "blue") + solid = RGB16(0, 0, 0xffff); + else if (options.pattern == "white") + solid = RGB16(0xffff, 0xffff, 0xffff); + else if (options.pattern == "black") + solid = RGB16(0, 0, 0); + + std::function span)> generate_line_rgb; + std::function span)> generate_line_yuv; + + if (solid.has_value()) { + generate_line_rgb = [&fb, rgb = solid.value()](size_t y, + std::span span) { + get_plain_line_rgb(fb.width(), rgb, span); + }; + + generate_line_yuv = [&fb, rgb = solid.value(), + &options](size_t y, std::span span) { + get_plain_line_yuv(fb.width(), + rgb.to_yuv(options.rec, options.range), span); + }; + } else { + generate_line_rgb = [&fb](size_t y, std::span span) { + get_test_pattern_line(fb.width(), fb.height(), y, span); + }; + + generate_line_yuv = [&fb, &options](size_t y, std::span span) { + get_test_pattern_line_yuv(fb.width(), fb.height(), y, span, + options); + }; + } + +#define CASE_ARGB(x) \ + case PixelFormat::x: \ + ARGB_Writer::write_pattern(fb, start_y, end_y, \ + generate_line_rgb); \ break; - case PixelColorType::YUV: - switch (plane_info.hsub + plane_info.vsub) { - case 2: - for (y = start_y; y < end_y; y++) { - for (x = 0; x < w; x++) { - RGB pixel = get_test_pattern_pixel(fb, x, y); - draw_yuv444_pixel(fb, x, y, pixel.yuv(yuvt)); - } - } - break; - - case 3: - for (y = start_y; y < end_y; y++) { - for (x = 0; x < w; x += 2) { - RGB pixel1 = get_test_pattern_pixel(fb, x, y); - RGB pixel2 = get_test_pattern_pixel(fb, x + 1, y); - draw_yuv422_macropixel(fb, x, y, pixel1.yuv(yuvt), pixel2.yuv(yuvt)); - } - } - break; - - case 4: - for (y = start_y; y < end_y; y += 2) { - for (x = 0; x < w; x += 2) { - RGB pixel00 = get_test_pattern_pixel(fb, x, y); - RGB pixel10 = get_test_pattern_pixel(fb, x + 1, y); - RGB pixel01 = get_test_pattern_pixel(fb, x, y + 1); - RGB pixel11 = get_test_pattern_pixel(fb, x + 1, y + 1); - draw_yuv420_macropixel(fb, x, y, - pixel00.yuv(yuvt), pixel10.yuv(yuvt), - pixel01.yuv(yuvt), pixel11.yuv(yuvt)); - } - } - break; +#define CASE_YUV(x) \ + case PixelFormat::x: \ + YUV_Writer::write_pattern(fb, start_y, end_y, \ + generate_line_yuv); \ + break; - default: - throw invalid_argument("unsupported number of pixel format planes"); - } +#define CASE_YUV_PACKED(x) \ + case PixelFormat::x: \ + YUVPackedWriter::write_pattern(fb, start_y, end_y, \ + generate_line_yuv); \ + break; +#define CASE_YUV_SEMI(x) \ + case PixelFormat::x: \ + YUVSemiPlanarWriter::write_pattern(fb, start_y, end_y, \ + generate_line_yuv); \ break; +#define CASE_YUV_PLANAR(x) \ + case PixelFormat::x: \ + YUVPlanarWriter::write_pattern(fb, start_y, end_y, \ + generate_line_yuv); \ + break; + + switch (fb.format()) { + CASE_YUV_SEMI(XV20); + CASE_YUV_SEMI(XV15); + CASE_YUV_SEMI(NV12); + CASE_YUV_SEMI(NV21); + CASE_YUV_SEMI(NV16); + CASE_YUV_SEMI(NV61); + + CASE_ARGB(RGB565); + CASE_ARGB(BGR565); + + CASE_ARGB(XRGB1555); + CASE_ARGB(ARGB1555); + CASE_ARGB(XRGB4444); + CASE_ARGB(ARGB4444); + + CASE_ARGB(RGB888); + CASE_ARGB(BGR888); + + CASE_ARGB(XRGB8888); + CASE_ARGB(ARGB8888); + CASE_ARGB(XBGR8888); + CASE_ARGB(ABGR8888); + CASE_ARGB(RGBX8888); + CASE_ARGB(RGBA8888); + CASE_ARGB(BGRX8888); + CASE_ARGB(BGRA8888); + CASE_ARGB(XRGB2101010); + CASE_ARGB(ARGB2101010); + CASE_ARGB(XBGR2101010); + CASE_ARGB(ABGR2101010); + CASE_ARGB(RGBX1010102); + CASE_ARGB(RGBA1010102); + CASE_ARGB(BGRX1010102); + CASE_ARGB(BGRA1010102); + + CASE_YUV_PACKED(YUYV); + CASE_YUV_PACKED(YVYU); + CASE_YUV_PACKED(UYVY); + CASE_YUV_PACKED(VYUY); + + CASE_YUV(XVUY2101010); + + CASE_YUV_PLANAR(YUV444); + CASE_YUV_PLANAR(YVU444); + CASE_YUV_PLANAR(YUV422); + CASE_YUV_PLANAR(YVU422); + CASE_YUV_PLANAR(YUV420); + CASE_YUV_PLANAR(YVU420); + default: - throw invalid_argument("unsupported pixel format"); + break; } } -static void draw_test_pattern_impl(IFramebuffer& fb, YUVType yuvt) +void draw_test_pattern_multi(IFramebuffer& fb, const TestPatternOptions& options) { -#ifdef HAS_PTHREAD - if (fb.height() < 20) { - draw_test_pattern_part(fb, 0, fb.height(), yuvt); - return; - } + auto& info = get_pixel_format_info(fb.format()); + uint8_t v_sub = 0; + for (size_t p = 0; p < info.num_planes; ++p) + v_sub = max(v_sub, info.planes[p].vsub); + + if (fb.height() % v_sub) + throw invalid_argument("FB height must be divisible with vsub"); // Create the mmaps before starting the threads - for (unsigned i = 0; i < fb.num_planes(); ++i) + for (size_t i = 0; i < fb.num_planes(); ++i) fb.map(i); - unsigned num_threads = thread::hardware_concurrency(); - vector workers; + size_t num_threads = thread::hardware_concurrency(); + + size_t part_height = fb.height() / num_threads; - unsigned part = (fb.height() / num_threads) & ~1; + // round up to v_sub + part_height = (part_height + v_sub - 1) / v_sub * v_sub; + + vector workers; std::vector errors(num_threads); - for (unsigned n = 0; n < num_threads; ++n) { - unsigned start = n * part; - unsigned end = start + part; + for (size_t n = 0; n < num_threads; ++n) { + size_t start = n * part_height; + size_t end = start + part_height - 1; if (n == num_threads - 1) - end = fb.height(); + end = fb.height() - 1; - workers.push_back(thread([&fb, start, end, yuvt, &error = errors[n]]() { + workers.push_back(thread([&fb, start, end, &options, &error = errors[n]]() { try { - draw_test_pattern_part(fb, start, end, yuvt); - } catch(...) { + draw_test_pattern_part(fb, start, end, options); + } catch (...) { error = std::current_exception(); } })); @@ -201,27 +298,23 @@ static void draw_test_pattern_impl(IFramebuffer& fb, YUVType yuvt) for (thread& t : workers) t.join(); - auto i = std::find_if(errors.begin(), errors.end(), [](auto& e) { return e != nullptr; }); + auto i = std::find_if(errors.begin(), errors.end(), + [](auto& e) { return e != nullptr; }); if (i != errors.end()) std::rethrow_exception(*i); - -#else - draw_test_pattern_part(fb, 0, fb.height(), yuvt); -#endif } -void draw_test_pattern(IFramebuffer& fb, YUVType yuvt) +void draw_test_pattern_single(IFramebuffer& fb, const TestPatternOptions& options) { -#ifdef DRAW_PERF_PRINT - Stopwatch sw; - sw.start(); -#endif - - draw_test_pattern_impl(fb, yuvt); + draw_test_pattern_part(fb, 0, fb.height() - 1, options); +} -#ifdef DRAW_PERF_PRINT - double us = sw.elapsed_us(); - printf("draw took %u us\n", (unsigned)us); +void draw_test_pattern(IFramebuffer& fb, const TestPatternOptions& options) +{ +#ifdef HAS_PTHREAD + draw_test_pattern_multi(fb, options); +#else + draw_test_pattern_single(fb, options); #endif } -- cgit v1.2.3