From 6c49fe5b811464f59e3a31b869734071da0ec7c1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 5 Feb 2025 19:09:43 +0200 Subject: Add mdspan includes From https://github.com/kokkos/mdspan Signed-off-by: Tomi Valkeinen --- .../experimental/__p2630_bits/strided_slice.hpp | 48 ++ .../experimental/__p2630_bits/submdspan.hpp | 40 ++ .../__p2630_bits/submdspan_extents.hpp | 418 ++++++++++++++ .../__p2630_bits/submdspan_mapping.hpp | 630 +++++++++++++++++++++ 4 files changed, 1136 insertions(+) create mode 100644 ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp create mode 100644 ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp create mode 100644 ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp create mode 100644 ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp (limited to 'ext/mdspan/include/experimental/__p2630_bits') diff --git a/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp new file mode 100644 index 0000000..89ba820 --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp @@ -0,0 +1,48 @@ + +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + +namespace { + template + struct __mdspan_is_integral_constant: std::false_type {}; + + template + struct __mdspan_is_integral_constant>: std::true_type {}; +} + +// Slice Specifier allowing for strides and compile time extent +template +struct strided_slice { + using offset_type = OffsetType; + using extent_type = ExtentType; + using stride_type = StrideType; + + _MDSPAN_NO_UNIQUE_ADDRESS OffsetType offset{}; + _MDSPAN_NO_UNIQUE_ADDRESS ExtentType extent{}; + _MDSPAN_NO_UNIQUE_ADDRESS StrideType stride{}; + + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); +}; + +} // MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp new file mode 100644 index 0000000..abddd0b --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "submdspan_extents.hpp" +#include "submdspan_mapping.hpp" + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +template +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan(const mdspan &src, + SliceSpecifiers... slices) { + const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); + // NVCC has a problem with the deduction so lets figure out the type + using sub_mapping_t = std::remove_cv_t; + using sub_extents_t = typename sub_mapping_t::extents_type; + using sub_layout_t = typename sub_mapping_t::layout_type; + using sub_accessor_t = typename AccessorPolicy::offset_policy; + return mdspan( + src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), + sub_submdspan_mapping_result.mapping, + sub_accessor_t(src.accessor())); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp new file mode 100644 index 0000000..4fe5dc6 --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -0,0 +1,418 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "strided_slice.hpp" +#include "../__p0009_bits/utility.hpp" + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +// Mapping from submapping ranks to srcmapping ranks +// InvMapRank is an index_sequence, which we build recursively +// to contain the mapped indices. +// end of recursion specialization containing the final index_sequence +template +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant, std::index_sequence) { + return std::index_sequence(); +} + +// specialization reducing rank by one (i.e., integral slice specifier) +template +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant, std::index_sequence, Slice, + SliceSpecifiers... slices) { + using next_idx_seq_t = std::conditional_t, + std::index_sequence, + std::index_sequence>; + + return inv_map_rank(std::integral_constant(), next_idx_seq_t(), + slices...); +} + +// Helper for identifying strided_slice +template struct is_strided_slice : std::false_type {}; + +template +struct is_strided_slice< + strided_slice> : std::true_type {}; + +// Helper for identifying valid pair like things +template struct index_pair_like : std::false_type {}; + +template +struct index_pair_like, IndexType> { + static constexpr bool value = std::is_convertible_v && + std::is_convertible_v; +}; + +template +struct index_pair_like, IndexType> { + static constexpr bool value = std::is_convertible_v && + std::is_convertible_v; +}; + +template +struct index_pair_like, IndexType> { + static constexpr bool value = std::is_convertible_v && + std::is_convertible_v; +}; + +template +struct index_pair_like, IndexType> { + static constexpr bool value = std::is_convertible_v; +}; + +template +struct index_pair_like, IndexType> { + static constexpr bool value = std::is_convertible_v; +}; + +// first_of(slice): getting begin of slice specifier range +MDSPAN_TEMPLATE_REQUIRES( + class Integral, + /* requires */(std::is_convertible_v) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const Integral &i) { + return i; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const std::integral_constant&) { + return integral_constant(); +} + +MDSPAN_INLINE_FUNCTION +constexpr integral_constant +first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { + return integral_constant(); +} + +MDSPAN_TEMPLATE_REQUIRES( + class Slice, + /* requires */(index_pair_like::value) +) +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const Slice &i) { + return get<0>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +constexpr auto first_of(const std::tuple& i) { + return get<0>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const std::pair& i) { + return i.first; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const std::complex &i) { + return i.real(); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +first_of(const strided_slice &r) { + return r.offset; +} + +// last_of(slice): getting end of slice specifier range +// We need however not just the slice but also the extents +// of the original view and which rank from the extents. +// This is needed in the case of slice being full_extent_t. +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Integral, + /* requires */(std::is_convertible_v) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral + last_of(std::integral_constant, const Extents &, const Integral &i) { + return i; +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Slice, + /* requires */(index_pair_like::value) +) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &, + const Slice &i) { + return get<1>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +constexpr auto last_of(std::integral_constant, const Extents &, const std::tuple& i) { + return get<1>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &, const std::pair& i) { + return i.second; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &, const std::complex &i) { + return i.imag(); +} + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &ext, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + if constexpr (Extents::static_extent(k) == dynamic_extent) { + return ext.extent(k); + } else { + return integral_constant(); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + // Even with CUDA_ARCH protection this thing warns about calling host function + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +template +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +last_of(std::integral_constant, const Extents &, + const strided_slice &r) { + return r.extent; +} + +// get stride of slices +template +MDSPAN_INLINE_FUNCTION +constexpr auto stride_of(const T &) { + return integral_constant(); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto +stride_of(const strided_slice &r) { + return r.stride; +} + +// divide which can deal with integral constant preservation +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const T0 &v0, const T1 &v1) { + return IndexT(v0) / IndexT(v1); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const std::integral_constant &, + const std::integral_constant &) { + // cutting short division by zero + // this is used for strided_slice with zero extent/stride + return integral_constant(); +} + +// multiply which can deal with integral constant preservation +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const T0 &v0, const T1 &v1) { + return IndexT(v0) * IndexT(v1); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const std::integral_constant &, + const std::integral_constant &) { + return integral_constant(); +} + +// compute new static extent from range, preserving static knowledge +template struct StaticExtentFromRange { + constexpr static size_t value = dynamic_extent; +}; + +template +struct StaticExtentFromRange, + std::integral_constant> { + constexpr static size_t value = val1 - val0; +}; + +template +struct StaticExtentFromRange, + integral_constant> { + constexpr static size_t value = val1 - val0; +}; + +// compute new static extent from strided_slice, preserving static +// knowledge +template struct StaticExtentFromStridedRange { + constexpr static size_t value = dynamic_extent; +}; + +template +struct StaticExtentFromStridedRange, + std::integral_constant> { + constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; +}; + +template +struct StaticExtentFromStridedRange, + integral_constant> { + constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; +}; + +// creates new extents through recursive calls to next_extent member function +// next_extent has different overloads for different types of stride specifiers +template +struct extents_constructor { + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */(!std::is_convertible_v && + !is_strided_slice::value) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &sl, + SlicesAndExtents... slices_and_extents) { + constexpr size_t new_static_extent = StaticExtentFromRange< + decltype(first_of(std::declval())), + decltype(last_of(std::integral_constant(), + std::declval(), + std::declval()))>::value; + + using next_t = + extents_constructor; + using index_t = typename Extents::index_type; + return next_t::next_extent( + ext, slices_and_extents..., + index_t(last_of(std::integral_constant(), ext, + sl)) - + index_t(first_of(sl))); + } + + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */ (std::is_convertible_v) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &, + SlicesAndExtents... slices_and_extents) { + using next_t = extents_constructor; + return next_t::next_extent(ext, slices_and_extents...); + } + + template + MDSPAN_INLINE_FUNCTION + constexpr static auto + next_extent(const Extents &ext, + const strided_slice &r, + SlicesAndExtents... slices_and_extents) { + using index_t = typename Extents::index_type; + using new_static_extent_t = + StaticExtentFromStridedRange; + if constexpr (new_static_extent_t::value == dynamic_extent) { + using next_t = + extents_constructor; + return next_t::next_extent( + ext, slices_and_extents..., + r.extent > 0 ? 1 + divide(r.extent - 1, r.stride) : 0); + } else { + constexpr size_t new_static_extent = new_static_extent_t::value; + using next_t = + extents_constructor; + return next_t::next_extent( + ext, slices_and_extents..., index_t(divide(ExtentType(), StrideType()))); + } + } +}; + +template +struct extents_constructor<0, Extents, NewStaticExtents...> { + + template + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &, NewExtents... new_exts) { + return extents( + new_exts...); + } +}; + +} // namespace detail + +// submdspan_extents creates new extents given src extents and submdspan slice +// specifiers +template +MDSPAN_INLINE_FUNCTION +constexpr auto submdspan_extents(const extents &src_exts, + SliceSpecifiers... slices) { + + using ext_t = extents; + return detail::extents_constructor::next_extent( + src_exts, slices...); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp new file mode 100644 index 0000000..46ccbaa --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -0,0 +1,630 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include +#include +#include // index_sequence +#include "../__p0009_bits/utility.hpp" + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +//****************************************** +// Return type of submdspan_mapping overloads +//****************************************** +template struct submdspan_mapping_result { + _MDSPAN_NO_UNIQUE_ADDRESS LayoutMapping mapping{}; + size_t offset; +}; + +namespace detail { + +// We use const Slice& and not Slice&& because the various +// submdspan_mapping_impl overloads use their slices arguments +// multiple times. This makes perfect forwarding not useful, but we +// still don't want to pass those (possibly of size 64 x 3 bits) +// objects by value. +template +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t; + return static_cast(detail::first_of(slice)) == + static_cast(ext); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds_helper(std::index_sequence, + const extents &exts, + const Slices &... slices) { + return _MDSPAN_FOLD_OR( + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents &exts, + const Slices &... slices) { + return any_slice_out_of_bounds_helper( + std::make_index_sequence(), exts, slices...); +} + +// constructs sub strides +template +struct sub_strides +{ + T values[N > 0 ? N : 1]; +}; + +template +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence, + const MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple &slices_stride_factor) { + using index_type = typename SrcMapping::index_type; + return sub_strides{{ + (static_cast(src_mapping.stride(InvMapIdxs)) * + static_cast(get(slices_stride_factor)))...}}; +} + +template +struct is_range_slice { + constexpr static bool value = + std::is_same_v || + index_pair_like::value; +}; + +template +constexpr bool is_range_slice_v = is_range_slice::value; + +template +struct is_index_slice { + constexpr static bool value = std::is_convertible_v; +}; + +template +constexpr bool is_index_slice_v = is_index_slice::value; + +} // namespace detail + +//********************************** +// layout_left submdspan_mapping +//********************************* +namespace detail { + +// Figure out whether to preserve layout_left +template +struct deduce_layout_left_submapping; + +template +struct deduce_layout_left_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + using count_range = index_sequence_scan_impl< + 0u, (is_index_slice_v ? 0u : 1u)...>; + + constexpr static int gap_len = + (((Idx > 0 && count_range::get(Idx) == 1 && + is_index_slice_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v)&&...); + } else { + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v) || + ((Idx == SubRank - 1) && is_range_slice_v) || + ((Idx > SubRank - 1) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v)) && ... ); + } +}; + +// We are reusing the same thing for layout_left and layout_left_padded +// For layout_left as source StaticStride is static_extent(0) +template +struct compute_s_static_layout_left { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + +} // namespace detail + +// Actual submdspan mapping call +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_left_value()) { + // layout_left case + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { + // layout_stride case + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + detail::tuple{ + detail::stride_of(slices)...}).values), +#else + detail::tuple{detail::stride_of(slices)...}).values), +#endif + offset + }; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // The following for some reasons leads to compiler error later, while not using a typedef works: + // Compilers: CUDA 11.2 with GCC 9.1 + // + // using dst_mapping_t = typename layout_left::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + // + // Error: submdspan_mapping.hpp:299:23: error: 'dst_mapping_t' does not name a type + // 299 | using dst_mapping_t = typename layout_left::template mapping; + // The same error is given (about dst_mapping_t not naming type) when a different name is used in 299: + // using dst_mapping_t2 = typename layout_left::template mapping; + + return submdspan_mapping_result> + {typename layout_left::template mapping{dst_ext}, offset}; + } else { // general case + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +//********************************** +// layout_right submdspan_mapping +//********************************* +namespace detail { + +// Figure out whether to preserve layout_right +template +struct deduce_layout_right_submapping; + +template +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using count_range = index_sequence_scan_impl< + 0u, (std::is_convertible_v ? 0u : 1u)...>; + //__static_partial_sums...>; + constexpr static int gap_len = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v)&&...); + } else { + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v) || + ((Idx == Rank - SubRank) && is_range_slice_v) || + ((Idx < Rank - SubRank) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v)) && ... ); + } +}; + +// We are reusing the same thing for layout_right and layout_right_padded +// For layout_right as source StaticStride is static_extent(Rank-1) +template +struct compute_s_static_layout_right { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + +} // namespace detail + +// Actual submdspan mapping call +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +layout_right::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_right_value()) { + // layout_right case + using dst_mapping_t = typename layout_right::mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), + offset}; + } else { + // layout_stride case + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ + detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{detail::stride_of(slices)...}).values), +#endif + offset + }; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // Same issue as in layout_left_padded: see comment there + // using dst_mapping_t = typename layout_right::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + return submdspan_mapping_result> + {typename layout_right::template mapping{dst_ext}, offset}; + } else { // general case + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost + using dst_mapping_t = typename layout_right::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +//********************************** +// layout_stride submdspan_mapping +//********************************* +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +layout_stride::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple( + detail::stride_of(slices)...)).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values), +#endif + offset + }; +} + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif -- cgit v1.2.3