diff options
| author | Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> | 2025-02-05 19:09:43 +0200 |
|---|---|---|
| committer | Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> | 2025-03-26 15:44:00 +0200 |
| commit | 6c49fe5b811464f59e3a31b869734071da0ec7c1 (patch) | |
| tree | 3fb287472a670b1efe1866906db1ac67229d6d96 /ext/mdspan/include/experimental/__p2630_bits | |
| parent | 9b2a7728b2b0b26065ba79cfbbd20f783f4a9988 (diff) | |
Add mdspan includes
From https://github.com/kokkos/mdspan
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Diffstat (limited to 'ext/mdspan/include/experimental/__p2630_bits')
4 files changed, 1136 insertions, 0 deletions
diff --git a/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp new file mode 100644 index 0000000..89ba820 --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp @@ -0,0 +1,48 @@ + +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include <type_traits> + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + +namespace { + template<class T> + struct __mdspan_is_integral_constant: std::false_type {}; + + template<class T, T val> + struct __mdspan_is_integral_constant<std::integral_constant<T,val>>: std::true_type {}; +} + +// Slice Specifier allowing for strides and compile time extent +template <class OffsetType, class ExtentType, class StrideType> +struct strided_slice { + using offset_type = OffsetType; + using extent_type = ExtentType; + using stride_type = StrideType; + + _MDSPAN_NO_UNIQUE_ADDRESS OffsetType offset{}; + _MDSPAN_NO_UNIQUE_ADDRESS ExtentType extent{}; + _MDSPAN_NO_UNIQUE_ADDRESS StrideType stride{}; + + static_assert(std::is_integral_v<OffsetType> || __mdspan_is_integral_constant<OffsetType>::value); + static_assert(std::is_integral_v<ExtentType> || __mdspan_is_integral_constant<ExtentType>::value); + static_assert(std::is_integral_v<StrideType> || __mdspan_is_integral_constant<StrideType>::value); +}; + +} // MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp new file mode 100644 index 0000000..abddd0b --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "submdspan_extents.hpp" +#include "submdspan_mapping.hpp" + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +template <class ElementType, class Extents, class LayoutPolicy, + class AccessorPolicy, class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan(const mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy> &src, + SliceSpecifiers... slices) { + const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); + // NVCC has a problem with the deduction so lets figure out the type + using sub_mapping_t = std::remove_cv_t<decltype(sub_submdspan_mapping_result.mapping)>; + using sub_extents_t = typename sub_mapping_t::extents_type; + using sub_layout_t = typename sub_mapping_t::layout_type; + using sub_accessor_t = typename AccessorPolicy::offset_policy; + return mdspan<ElementType, sub_extents_t, sub_layout_t, sub_accessor_t>( + src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), + sub_submdspan_mapping_result.mapping, + sub_accessor_t(src.accessor())); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp new file mode 100644 index 0000000..4fe5dc6 --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -0,0 +1,418 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include <complex> + +#include "strided_slice.hpp" +#include "../__p0009_bits/utility.hpp" + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +// Mapping from submapping ranks to srcmapping ranks +// InvMapRank is an index_sequence, which we build recursively +// to contain the mapped indices. +// end of recursion specialization containing the final index_sequence +template <size_t Counter, size_t... MapIdxs> +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant<size_t, Counter>, std::index_sequence<MapIdxs...>) { + return std::index_sequence<MapIdxs...>(); +} + +// specialization reducing rank by one (i.e., integral slice specifier) +template<size_t Counter, class Slice, class... SliceSpecifiers, size_t... MapIdxs> +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant<size_t, Counter>, std::index_sequence<MapIdxs...>, Slice, + SliceSpecifiers... slices) { + using next_idx_seq_t = std::conditional_t<std::is_convertible_v<Slice, size_t>, + std::index_sequence<MapIdxs...>, + std::index_sequence<MapIdxs..., Counter>>; + + return inv_map_rank(std::integral_constant<size_t,Counter + 1>(), next_idx_seq_t(), + slices...); +} + +// Helper for identifying strided_slice +template <class T> struct is_strided_slice : std::false_type {}; + +template <class OffsetType, class ExtentType, class StrideType> +struct is_strided_slice< + strided_slice<OffsetType, ExtentType, StrideType>> : std::true_type {}; + +// Helper for identifying valid pair like things +template <class T, class IndexType> struct index_pair_like : std::false_type {}; + +template <class IdxT1, class IdxT2, class IndexType> +struct index_pair_like<std::pair<IdxT1, IdxT2>, IndexType> { + static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> && + std::is_convertible_v<IdxT2, IndexType>; +}; + +template <class IdxT1, class IdxT2, class IndexType> +struct index_pair_like<std::tuple<IdxT1, IdxT2>, IndexType> { + static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> && + std::is_convertible_v<IdxT2, IndexType>; +}; + +template <class IdxT1, class IdxT2, class IndexType> +struct index_pair_like<tuple<IdxT1, IdxT2>, IndexType> { + static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> && + std::is_convertible_v<IdxT2, IndexType>; +}; + +template <class IdxT, class IndexType> +struct index_pair_like<std::complex<IdxT>, IndexType> { + static constexpr bool value = std::is_convertible_v<IdxT, IndexType>; +}; + +template <class IdxT, class IndexType> +struct index_pair_like<std::array<IdxT, 2>, IndexType> { + static constexpr bool value = std::is_convertible_v<IdxT, IndexType>; +}; + +// first_of(slice): getting begin of slice specifier range +MDSPAN_TEMPLATE_REQUIRES( + class Integral, + /* requires */(std::is_convertible_v<Integral, size_t>) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const Integral &i) { + return i; +} + +template<class Integral, Integral v> +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const std::integral_constant<Integral, v>&) { + return integral_constant<Integral, v>(); +} + +MDSPAN_INLINE_FUNCTION +constexpr integral_constant<size_t, 0> +first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { + return integral_constant<size_t, 0>(); +} + +MDSPAN_TEMPLATE_REQUIRES( + class Slice, + /* requires */(index_pair_like<Slice, size_t>::value) +) +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const Slice &i) { + return get<0>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + class IdxT1, class IdxT2, + /* requires */ (index_pair_like<std::tuple<IdxT1, IdxT2>, size_t>::value) + ) +constexpr auto first_of(const std::tuple<IdxT1, IdxT2>& i) { + return get<0>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + class IdxT1, class IdxT2, + /* requires */ (index_pair_like<std::pair<IdxT1, IdxT2>, size_t>::value) + ) +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const std::pair<IdxT1, IdxT2>& i) { + return i.first; +} + +template<class T> +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const std::complex<T> &i) { + return i.real(); +} + +template <class OffsetType, class ExtentType, class StrideType> +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +first_of(const strided_slice<OffsetType, ExtentType, StrideType> &r) { + return r.offset; +} + +// last_of(slice): getting end of slice specifier range +// We need however not just the slice but also the extents +// of the original view and which rank from the extents. +// This is needed in the case of slice being full_extent_t. +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Integral, + /* requires */(std::is_convertible_v<Integral, size_t>) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral + last_of(std::integral_constant<size_t, k>, const Extents &, const Integral &i) { + return i; +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Slice, + /* requires */(index_pair_like<Slice, size_t>::value) +) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, + const Slice &i) { + return get<1>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like<std::tuple<IdxT1, IdxT2>, size_t>::value) + ) +constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::tuple<IdxT1, IdxT2>& i) { + return get<1>(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like<std::pair<IdxT1, IdxT2>, size_t>::value) + ) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::pair<IdxT1, IdxT2>& i) { + return i.second; +} + +template<size_t k, class Extents, class T> +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::complex<T> &i) { + return i.imag(); +} + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template <size_t k, class Extents> +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &ext, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + if constexpr (Extents::static_extent(k) == dynamic_extent) { + return ext.extent(k); + } else { + return integral_constant<size_t, Extents::static_extent(k)>(); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + // Even with CUDA_ARCH protection this thing warns about calling host function + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +template <size_t k, class Extents, class OffsetType, class ExtentType, + class StrideType> +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +last_of(std::integral_constant<size_t, k>, const Extents &, + const strided_slice<OffsetType, ExtentType, StrideType> &r) { + return r.extent; +} + +// get stride of slices +template <class T> +MDSPAN_INLINE_FUNCTION +constexpr auto stride_of(const T &) { + return integral_constant<size_t, 1>(); +} + +template <class OffsetType, class ExtentType, class StrideType> +MDSPAN_INLINE_FUNCTION +constexpr auto +stride_of(const strided_slice<OffsetType, ExtentType, StrideType> &r) { + return r.stride; +} + +// divide which can deal with integral constant preservation +template <class IndexT, class T0, class T1> +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const T0 &v0, const T1 &v1) { + return IndexT(v0) / IndexT(v1); +} + +template <class IndexT, class T0, T0 v0, class T1, T1 v1> +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const std::integral_constant<T0, v0> &, + const std::integral_constant<T1, v1> &) { + // cutting short division by zero + // this is used for strided_slice with zero extent/stride + return integral_constant<IndexT, v0 == 0 ? 0 : v0 / v1>(); +} + +// multiply which can deal with integral constant preservation +template <class IndexT, class T0, class T1> +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const T0 &v0, const T1 &v1) { + return IndexT(v0) * IndexT(v1); +} + +template <class IndexT, class T0, T0 v0, class T1, T1 v1> +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const std::integral_constant<T0, v0> &, + const std::integral_constant<T1, v1> &) { + return integral_constant<IndexT, v0 * v1>(); +} + +// compute new static extent from range, preserving static knowledge +template <class Arg0, class Arg1> struct StaticExtentFromRange { + constexpr static size_t value = dynamic_extent; +}; + +template <class Integral0, Integral0 val0, class Integral1, Integral1 val1> +struct StaticExtentFromRange<std::integral_constant<Integral0, val0>, + std::integral_constant<Integral1, val1>> { + constexpr static size_t value = val1 - val0; +}; + +template <class Integral0, Integral0 val0, class Integral1, Integral1 val1> +struct StaticExtentFromRange<integral_constant<Integral0, val0>, + integral_constant<Integral1, val1>> { + constexpr static size_t value = val1 - val0; +}; + +// compute new static extent from strided_slice, preserving static +// knowledge +template <class Arg0, class Arg1> struct StaticExtentFromStridedRange { + constexpr static size_t value = dynamic_extent; +}; + +template <class Integral0, Integral0 val0, class Integral1, Integral1 val1> +struct StaticExtentFromStridedRange<std::integral_constant<Integral0, val0>, + std::integral_constant<Integral1, val1>> { + constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; +}; + +template <class Integral0, Integral0 val0, class Integral1, Integral1 val1> +struct StaticExtentFromStridedRange<integral_constant<Integral0, val0>, + integral_constant<Integral1, val1>> { + constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; +}; + +// creates new extents through recursive calls to next_extent member function +// next_extent has different overloads for different types of stride specifiers +template <size_t K, class Extents, size_t... NewExtents> +struct extents_constructor { + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */(!std::is_convertible_v<Slice, size_t> && + !is_strided_slice<Slice>::value) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &sl, + SlicesAndExtents... slices_and_extents) { + constexpr size_t new_static_extent = StaticExtentFromRange< + decltype(first_of(std::declval<Slice>())), + decltype(last_of(std::integral_constant<size_t, Extents::rank() - K>(), + std::declval<Extents>(), + std::declval<Slice>()))>::value; + + using next_t = + extents_constructor<K - 1, Extents, NewExtents..., new_static_extent>; + using index_t = typename Extents::index_type; + return next_t::next_extent( + ext, slices_and_extents..., + index_t(last_of(std::integral_constant<size_t, Extents::rank() - K>(), ext, + sl)) - + index_t(first_of(sl))); + } + + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */ (std::is_convertible_v<Slice, size_t>) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &, + SlicesAndExtents... slices_and_extents) { + using next_t = extents_constructor<K - 1, Extents, NewExtents...>; + return next_t::next_extent(ext, slices_and_extents...); + } + + template <class OffsetType, class ExtentType, class StrideType, + class... SlicesAndExtents> + MDSPAN_INLINE_FUNCTION + constexpr static auto + next_extent(const Extents &ext, + const strided_slice<OffsetType, ExtentType, StrideType> &r, + SlicesAndExtents... slices_and_extents) { + using index_t = typename Extents::index_type; + using new_static_extent_t = + StaticExtentFromStridedRange<ExtentType, StrideType>; + if constexpr (new_static_extent_t::value == dynamic_extent) { + using next_t = + extents_constructor<K - 1, Extents, NewExtents..., dynamic_extent>; + return next_t::next_extent( + ext, slices_and_extents..., + r.extent > 0 ? 1 + divide<index_t>(r.extent - 1, r.stride) : 0); + } else { + constexpr size_t new_static_extent = new_static_extent_t::value; + using next_t = + extents_constructor<K - 1, Extents, NewExtents..., new_static_extent>; + return next_t::next_extent( + ext, slices_and_extents..., index_t(divide<index_t>(ExtentType(), StrideType()))); + } + } +}; + +template <class Extents, size_t... NewStaticExtents> +struct extents_constructor<0, Extents, NewStaticExtents...> { + + template <class... NewExtents> + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &, NewExtents... new_exts) { + return extents<typename Extents::index_type, NewStaticExtents...>( + new_exts...); + } +}; + +} // namespace detail + +// submdspan_extents creates new extents given src extents and submdspan slice +// specifiers +template <class IndexType, size_t... Extents, class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION +constexpr auto submdspan_extents(const extents<IndexType, Extents...> &src_exts, + SliceSpecifiers... slices) { + + using ext_t = extents<IndexType, Extents...>; + return detail::extents_constructor<ext_t::rank(), ext_t>::next_extent( + src_exts, slices...); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp new file mode 100644 index 0000000..46ccbaa --- /dev/null +++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -0,0 +1,630 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include <array> +#include <type_traits> +#include <utility> // index_sequence +#include "../__p0009_bits/utility.hpp" + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +//****************************************** +// Return type of submdspan_mapping overloads +//****************************************** +template <class LayoutMapping> struct submdspan_mapping_result { + _MDSPAN_NO_UNIQUE_ADDRESS LayoutMapping mapping{}; + size_t offset; +}; + +namespace detail { + +// We use const Slice& and not Slice&& because the various +// submdspan_mapping_impl overloads use their slices arguments +// multiple times. This makes perfect forwarding not useful, but we +// still don't want to pass those (possibly of size 64 x 3 bits) +// objects by value. +template <class IndexType, class Slice> +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t<decltype(detail::first_of(slice)), IndexType>; + return static_cast<common_t>(detail::first_of(slice)) == + static_cast<common_t>(ext); +} + +template <size_t... RankIndices, class IndexType, size_t... Exts, + class... Slices> +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds_helper(std::index_sequence<RankIndices...>, + const extents<IndexType, Exts...> &exts, + const Slices &... slices) { + return _MDSPAN_FOLD_OR( + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); +} + +template <class IndexType, size_t... Exts, class... Slices> +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents<IndexType, Exts...> &exts, + const Slices &... slices) { + return any_slice_out_of_bounds_helper( + std::make_index_sequence<sizeof...(Slices)>(), exts, slices...); +} + +// constructs sub strides +template<class T, size_t N> +struct sub_strides +{ + T values[N > 0 ? N : 1]; +}; + +template <class SrcMapping, class... slice_strides, size_t... InvMapIdxs> +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence<InvMapIdxs...>, + const MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<slice_strides...> &slices_stride_factor) { + using index_type = typename SrcMapping::index_type; + return sub_strides<typename SrcMapping::index_type, sizeof...(InvMapIdxs)>{{ + (static_cast<index_type>(src_mapping.stride(InvMapIdxs)) * + static_cast<index_type>(get<InvMapIdxs>(slices_stride_factor)))...}}; +} + +template<class SliceSpecifier, class IndexType> +struct is_range_slice { + constexpr static bool value = + std::is_same_v<SliceSpecifier, full_extent_t> || + index_pair_like<SliceSpecifier, IndexType>::value; +}; + +template<class SliceSpecifier, class IndexType> +constexpr bool is_range_slice_v = is_range_slice<SliceSpecifier, IndexType>::value; + +template<class SliceSpecifier, class IndexType> +struct is_index_slice { + constexpr static bool value = std::is_convertible_v<SliceSpecifier, IndexType>; +}; + +template<class SliceSpecifier, class IndexType> +constexpr bool is_index_slice_v = is_index_slice<SliceSpecifier, IndexType>::value; + +} // namespace detail + +//********************************** +// layout_left submdspan_mapping +//********************************* +namespace detail { + +// Figure out whether to preserve layout_left +template <class IndexType, size_t SubRank, class IndexSequence, + class... SliceSpecifiers> +struct deduce_layout_left_submapping; + +template <class IndexType, size_t SubRank, size_t... Idx, + class... SliceSpecifiers> +struct deduce_layout_left_submapping< + IndexType, SubRank, std::index_sequence<Idx...>, SliceSpecifiers...> { + + using count_range = index_sequence_scan_impl< + 0u, (is_index_slice_v<SliceSpecifiers, IndexType> ? 0u : 1u)...>; + + constexpr static int gap_len = + (((Idx > 0 && count_range::get(Idx) == 1 && + is_index_slice_v<SliceSpecifiers, IndexType>) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v<SliceSpecifiers, IndexType>)&&...); + } else { + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) || + ((Idx == SubRank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx > SubRank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v<SliceSpecifiers, IndexType>) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ... ); + } +}; + +// We are reusing the same thing for layout_left and layout_left_padded +// For layout_left as source StaticStride is static_extent(0) +template<class Extents, size_t NumGaps, size_t StaticStride> +struct compute_s_static_layout_left { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template<size_t ... Idx> + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence<Idx...>) { + size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + +} // namespace detail + +// Actual submdspan mapping call +template <class Extents> +template <class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping<Extents>::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence<src_ext_t::rank()>, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast<size_t>( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_left_value()) { + // layout_left case + using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, Extents::static_extent(0)>::value(std::make_index_sequence<Extents::rank()>()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<S_static>::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { + // layout_stride case + using dst_mapping_t = typename layout_stride::mapping<dst_ext_t>; + auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result<dst_mapping_t> { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + detail::tuple<decltype(detail::stride_of(slices))...>{ + detail::stride_of(slices)...}).values), +#else + detail::tuple{detail::stride_of(slices)...}).values), +#endif + offset + }; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +template <size_t PaddingValue> +template <class Extents> +template <class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<PaddingValue>::mapping<Extents>::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<PaddingValue>::template mapping<Extents>; + return submdspan_mapping_result<dst_mapping_t>{*this, 0}; + } else { + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast<size_t>( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // The following for some reasons leads to compiler error later, while not using a typedef works: + // Compilers: CUDA 11.2 with GCC 9.1 + // + // using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>; + // return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset}; + // + // Error: submdspan_mapping.hpp:299:23: error: 'dst_mapping_t' does not name a type + // 299 | using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>; + // The same error is given (about dst_mapping_t not naming type) when a different name is used in 299: + // using dst_mapping_t2 = typename layout_left::template mapping<dst_ext_t>; + + return submdspan_mapping_result<typename layout_left::template mapping<dst_ext_t>> + {typename layout_left::template mapping<dst_ext_t>{dst_ext}, offset}; + } else { // general case + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence<src_ext_t::rank()>()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost + using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, static_padding_stride>::value(std::make_index_sequence<Extents::rank()>()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<S_static>::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant<size_t, 0>(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t> { + dst_mapping_t(mdspan_non_standard, dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +//********************************** +// layout_right submdspan_mapping +//********************************* +namespace detail { + +// Figure out whether to preserve layout_right +template <class IndexType, size_t SubRank, class IndexSequence, + class... SliceSpecifiers> +struct deduce_layout_right_submapping; + +template <class IndexType, size_t SubRank, size_t... Idx, + class... SliceSpecifiers> +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence<Idx...>, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using count_range = index_sequence_scan_impl< + 0u, (std::is_convertible_v<SliceSpecifiers, IndexType> ? 0u : 1u)...>; + //__static_partial_sums<!std::is_convertible_v<SliceSpecifiers, + // IndexType>...>; + constexpr static int gap_len = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && + std::is_convertible_v<SliceSpecifiers, IndexType>) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v<SliceSpecifiers, IndexType>)&&...); + } else { + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v<SliceSpecifiers, full_extent_t>) || + ((Idx == Rank - SubRank) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx < Rank - SubRank) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v<SliceSpecifiers, IndexType>) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ... ); + } +}; + +// We are reusing the same thing for layout_right and layout_right_padded +// For layout_right as source StaticStride is static_extent(Rank-1) +template<class Extents, size_t NumGaps, size_t StaticStride> +struct compute_s_static_layout_right { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template<size_t ... Idx> + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence<Idx...>) { + size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + +} // namespace detail + +// Actual submdspan mapping call +template <class Extents> +template <class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION constexpr auto +layout_right::mapping<Extents>::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence<src_ext_t::rank()>, + SliceSpecifiers...>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast<size_t>( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_right_value()) { + // layout_right case + using dst_mapping_t = typename layout_right::mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, Extents::static_extent(Extents::rank() - 1)>::value(std::make_index_sequence<Extents::rank()>()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<S_static>::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{ + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), + offset}; + } else { + // layout_stride case + using dst_mapping_t = typename layout_stride::mapping<dst_ext_t>; + auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result<dst_mapping_t> { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>{ + detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{detail::stride_of(slices)...}).values), +#endif + offset + }; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +template <size_t PaddingValue> +template <class Extents> +template <class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<PaddingValue>::mapping<Extents>::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<PaddingValue>::template mapping<Extents>; + return submdspan_mapping_result<dst_mapping_t>{*this, 0}; + } else { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast<size_t>( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // Same issue as in layout_left_padded: see comment there + // using dst_mapping_t = typename layout_right::template mapping<dst_ext_t>; + // return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset}; + return submdspan_mapping_result<typename layout_right::template mapping<dst_ext_t>> + {typename layout_right::template mapping<dst_ext_t>{dst_ext}, offset}; + } else { // general case + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence<src_ext_t::rank()>()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost + using dst_mapping_t = typename layout_right::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right<Extents, deduce_layout::gap_len, static_padding_stride>::value(std::make_index_sequence<Extents::rank()>()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<S_static>::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t>{ + dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant<size_t, 0>(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>; + return submdspan_mapping_result<dst_mapping_t> { + dst_mapping_t(mdspan_non_standard, dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + +//********************************** +// layout_stride submdspan_mapping +//********************************* +template <class Extents> +template <class... SliceSpecifiers> +MDSPAN_INLINE_FUNCTION constexpr auto +layout_stride::mapping<Extents>::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast<size_t>( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result<dst_mapping_t> { + dst_mapping_t(mdspan_non_standard, dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>( + detail::stride_of(slices)...)).values), +#else + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values), +#endif + offset + }; +} + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif |
