summaryrefslogtreecommitdiff
path: root/ext/mdspan/include/experimental/__p2630_bits
diff options
context:
space:
mode:
authorTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2025-02-05 19:09:43 +0200
committerTomi Valkeinen <tomi.valkeinen@ideasonboard.com>2025-03-26 15:44:00 +0200
commit6c49fe5b811464f59e3a31b869734071da0ec7c1 (patch)
tree3fb287472a670b1efe1866906db1ac67229d6d96 /ext/mdspan/include/experimental/__p2630_bits
parent9b2a7728b2b0b26065ba79cfbbd20f783f4a9988 (diff)
Add mdspan includes
From https://github.com/kokkos/mdspan Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Diffstat (limited to 'ext/mdspan/include/experimental/__p2630_bits')
-rw-r--r--ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp48
-rw-r--r--ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp40
-rw-r--r--ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp418
-rw-r--r--ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp630
4 files changed, 1136 insertions, 0 deletions
diff --git a/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp
new file mode 100644
index 0000000..89ba820
--- /dev/null
+++ b/ext/mdspan/include/experimental/__p2630_bits/strided_slice.hpp
@@ -0,0 +1,48 @@
+
+//@HEADER
+// ************************************************************************
+//
+// Kokkos v. 4.0
+// Copyright (2022) National Technology & Engineering
+// Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#pragma once
+
+#include <type_traits>
+
+namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
+
+namespace {
+ template<class T>
+ struct __mdspan_is_integral_constant: std::false_type {};
+
+ template<class T, T val>
+ struct __mdspan_is_integral_constant<std::integral_constant<T,val>>: std::true_type {};
+}
+
+// Slice Specifier allowing for strides and compile time extent
+template <class OffsetType, class ExtentType, class StrideType>
+struct strided_slice {
+ using offset_type = OffsetType;
+ using extent_type = ExtentType;
+ using stride_type = StrideType;
+
+ _MDSPAN_NO_UNIQUE_ADDRESS OffsetType offset{};
+ _MDSPAN_NO_UNIQUE_ADDRESS ExtentType extent{};
+ _MDSPAN_NO_UNIQUE_ADDRESS StrideType stride{};
+
+ static_assert(std::is_integral_v<OffsetType> || __mdspan_is_integral_constant<OffsetType>::value);
+ static_assert(std::is_integral_v<ExtentType> || __mdspan_is_integral_constant<ExtentType>::value);
+ static_assert(std::is_integral_v<StrideType> || __mdspan_is_integral_constant<StrideType>::value);
+};
+
+} // MDSPAN_IMPL_STANDARD_NAMESPACE
diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp
new file mode 100644
index 0000000..abddd0b
--- /dev/null
+++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan.hpp
@@ -0,0 +1,40 @@
+//@HEADER
+// ************************************************************************
+//
+// Kokkos v. 4.0
+// Copyright (2022) National Technology & Engineering
+// Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#pragma once
+
+#include "submdspan_extents.hpp"
+#include "submdspan_mapping.hpp"
+
+namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
+template <class ElementType, class Extents, class LayoutPolicy,
+ class AccessorPolicy, class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION
+constexpr auto
+submdspan(const mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy> &src,
+ SliceSpecifiers... slices) {
+ const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...);
+ // NVCC has a problem with the deduction so lets figure out the type
+ using sub_mapping_t = std::remove_cv_t<decltype(sub_submdspan_mapping_result.mapping)>;
+ using sub_extents_t = typename sub_mapping_t::extents_type;
+ using sub_layout_t = typename sub_mapping_t::layout_type;
+ using sub_accessor_t = typename AccessorPolicy::offset_policy;
+ return mdspan<ElementType, sub_extents_t, sub_layout_t, sub_accessor_t>(
+ src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset),
+ sub_submdspan_mapping_result.mapping,
+ sub_accessor_t(src.accessor()));
+}
+} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE
diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp
new file mode 100644
index 0000000..4fe5dc6
--- /dev/null
+++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp
@@ -0,0 +1,418 @@
+//@HEADER
+// ************************************************************************
+//
+// Kokkos v. 4.0
+// Copyright (2022) National Technology & Engineering
+// Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#pragma once
+
+#include <complex>
+
+#include "strided_slice.hpp"
+#include "../__p0009_bits/utility.hpp"
+
+namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
+namespace detail {
+
+// Mapping from submapping ranks to srcmapping ranks
+// InvMapRank is an index_sequence, which we build recursively
+// to contain the mapped indices.
+// end of recursion specialization containing the final index_sequence
+template <size_t Counter, size_t... MapIdxs>
+MDSPAN_INLINE_FUNCTION
+constexpr auto inv_map_rank(std::integral_constant<size_t, Counter>, std::index_sequence<MapIdxs...>) {
+ return std::index_sequence<MapIdxs...>();
+}
+
+// specialization reducing rank by one (i.e., integral slice specifier)
+template<size_t Counter, class Slice, class... SliceSpecifiers, size_t... MapIdxs>
+MDSPAN_INLINE_FUNCTION
+constexpr auto inv_map_rank(std::integral_constant<size_t, Counter>, std::index_sequence<MapIdxs...>, Slice,
+ SliceSpecifiers... slices) {
+ using next_idx_seq_t = std::conditional_t<std::is_convertible_v<Slice, size_t>,
+ std::index_sequence<MapIdxs...>,
+ std::index_sequence<MapIdxs..., Counter>>;
+
+ return inv_map_rank(std::integral_constant<size_t,Counter + 1>(), next_idx_seq_t(),
+ slices...);
+}
+
+// Helper for identifying strided_slice
+template <class T> struct is_strided_slice : std::false_type {};
+
+template <class OffsetType, class ExtentType, class StrideType>
+struct is_strided_slice<
+ strided_slice<OffsetType, ExtentType, StrideType>> : std::true_type {};
+
+// Helper for identifying valid pair like things
+template <class T, class IndexType> struct index_pair_like : std::false_type {};
+
+template <class IdxT1, class IdxT2, class IndexType>
+struct index_pair_like<std::pair<IdxT1, IdxT2>, IndexType> {
+ static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> &&
+ std::is_convertible_v<IdxT2, IndexType>;
+};
+
+template <class IdxT1, class IdxT2, class IndexType>
+struct index_pair_like<std::tuple<IdxT1, IdxT2>, IndexType> {
+ static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> &&
+ std::is_convertible_v<IdxT2, IndexType>;
+};
+
+template <class IdxT1, class IdxT2, class IndexType>
+struct index_pair_like<tuple<IdxT1, IdxT2>, IndexType> {
+ static constexpr bool value = std::is_convertible_v<IdxT1, IndexType> &&
+ std::is_convertible_v<IdxT2, IndexType>;
+};
+
+template <class IdxT, class IndexType>
+struct index_pair_like<std::complex<IdxT>, IndexType> {
+ static constexpr bool value = std::is_convertible_v<IdxT, IndexType>;
+};
+
+template <class IdxT, class IndexType>
+struct index_pair_like<std::array<IdxT, 2>, IndexType> {
+ static constexpr bool value = std::is_convertible_v<IdxT, IndexType>;
+};
+
+// first_of(slice): getting begin of slice specifier range
+MDSPAN_TEMPLATE_REQUIRES(
+ class Integral,
+ /* requires */(std::is_convertible_v<Integral, size_t>)
+)
+MDSPAN_INLINE_FUNCTION
+constexpr Integral first_of(const Integral &i) {
+ return i;
+}
+
+template<class Integral, Integral v>
+MDSPAN_INLINE_FUNCTION
+constexpr Integral first_of(const std::integral_constant<Integral, v>&) {
+ return integral_constant<Integral, v>();
+}
+
+MDSPAN_INLINE_FUNCTION
+constexpr integral_constant<size_t, 0>
+first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) {
+ return integral_constant<size_t, 0>();
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ class Slice,
+ /* requires */(index_pair_like<Slice, size_t>::value)
+)
+MDSPAN_INLINE_FUNCTION
+constexpr auto first_of(const Slice &i) {
+ return get<0>(i);
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ class IdxT1, class IdxT2,
+ /* requires */ (index_pair_like<std::tuple<IdxT1, IdxT2>, size_t>::value)
+ )
+constexpr auto first_of(const std::tuple<IdxT1, IdxT2>& i) {
+ return get<0>(i);
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ class IdxT1, class IdxT2,
+ /* requires */ (index_pair_like<std::pair<IdxT1, IdxT2>, size_t>::value)
+ )
+MDSPAN_INLINE_FUNCTION
+constexpr auto first_of(const std::pair<IdxT1, IdxT2>& i) {
+ return i.first;
+}
+
+template<class T>
+MDSPAN_INLINE_FUNCTION
+constexpr auto first_of(const std::complex<T> &i) {
+ return i.real();
+}
+
+template <class OffsetType, class ExtentType, class StrideType>
+MDSPAN_INLINE_FUNCTION
+constexpr OffsetType
+first_of(const strided_slice<OffsetType, ExtentType, StrideType> &r) {
+ return r.offset;
+}
+
+// last_of(slice): getting end of slice specifier range
+// We need however not just the slice but also the extents
+// of the original view and which rank from the extents.
+// This is needed in the case of slice being full_extent_t.
+MDSPAN_TEMPLATE_REQUIRES(
+ size_t k, class Extents, class Integral,
+ /* requires */(std::is_convertible_v<Integral, size_t>)
+)
+MDSPAN_INLINE_FUNCTION
+constexpr Integral
+ last_of(std::integral_constant<size_t, k>, const Extents &, const Integral &i) {
+ return i;
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ size_t k, class Extents, class Slice,
+ /* requires */(index_pair_like<Slice, size_t>::value)
+)
+MDSPAN_INLINE_FUNCTION
+constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &,
+ const Slice &i) {
+ return get<1>(i);
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ size_t k, class Extents, class IdxT1, class IdxT2,
+ /* requires */ (index_pair_like<std::tuple<IdxT1, IdxT2>, size_t>::value)
+ )
+constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::tuple<IdxT1, IdxT2>& i) {
+ return get<1>(i);
+}
+
+MDSPAN_TEMPLATE_REQUIRES(
+ size_t k, class Extents, class IdxT1, class IdxT2,
+ /* requires */ (index_pair_like<std::pair<IdxT1, IdxT2>, size_t>::value)
+ )
+MDSPAN_INLINE_FUNCTION
+constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::pair<IdxT1, IdxT2>& i) {
+ return i.second;
+}
+
+template<size_t k, class Extents, class T>
+MDSPAN_INLINE_FUNCTION
+constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &, const std::complex<T> &i) {
+ return i.imag();
+}
+
+// Suppress spurious warning with NVCC about no return statement.
+// This is a known issue in NVCC and NVC++
+// Depending on the CUDA and GCC version we need both the builtin
+// and the diagnostic push. I tried really hard to find something shorter
+// but no luck ...
+#if defined __NVCC__
+ #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+ #pragma nv_diagnostic push
+ #pragma nv_diag_suppress = implicit_return_from_non_void_function
+ #else
+ #ifdef __CUDA_ARCH__
+ #pragma diagnostic push
+ #pragma diag_suppress implicit_return_from_non_void_function
+ #endif
+ #endif
+#elif defined __NVCOMPILER
+ #pragma diagnostic push
+ #pragma diag_suppress = implicit_return_from_non_void_function
+#endif
+template <size_t k, class Extents>
+MDSPAN_INLINE_FUNCTION
+constexpr auto last_of(std::integral_constant<size_t, k>, const Extents &ext,
+ ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) {
+ if constexpr (Extents::static_extent(k) == dynamic_extent) {
+ return ext.extent(k);
+ } else {
+ return integral_constant<size_t, Extents::static_extent(k)>();
+ }
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ // Even with CUDA_ARCH protection this thing warns about calling host function
+ __builtin_unreachable();
+#endif
+}
+#if defined __NVCC__
+ #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+ #pragma nv_diagnostic pop
+ #else
+ #ifdef __CUDA_ARCH__
+ #pragma diagnostic pop
+ #endif
+ #endif
+#elif defined __NVCOMPILER
+ #pragma diagnostic pop
+#endif
+
+template <size_t k, class Extents, class OffsetType, class ExtentType,
+ class StrideType>
+MDSPAN_INLINE_FUNCTION
+constexpr OffsetType
+last_of(std::integral_constant<size_t, k>, const Extents &,
+ const strided_slice<OffsetType, ExtentType, StrideType> &r) {
+ return r.extent;
+}
+
+// get stride of slices
+template <class T>
+MDSPAN_INLINE_FUNCTION
+constexpr auto stride_of(const T &) {
+ return integral_constant<size_t, 1>();
+}
+
+template <class OffsetType, class ExtentType, class StrideType>
+MDSPAN_INLINE_FUNCTION
+constexpr auto
+stride_of(const strided_slice<OffsetType, ExtentType, StrideType> &r) {
+ return r.stride;
+}
+
+// divide which can deal with integral constant preservation
+template <class IndexT, class T0, class T1>
+MDSPAN_INLINE_FUNCTION
+constexpr auto divide(const T0 &v0, const T1 &v1) {
+ return IndexT(v0) / IndexT(v1);
+}
+
+template <class IndexT, class T0, T0 v0, class T1, T1 v1>
+MDSPAN_INLINE_FUNCTION
+constexpr auto divide(const std::integral_constant<T0, v0> &,
+ const std::integral_constant<T1, v1> &) {
+ // cutting short division by zero
+ // this is used for strided_slice with zero extent/stride
+ return integral_constant<IndexT, v0 == 0 ? 0 : v0 / v1>();
+}
+
+// multiply which can deal with integral constant preservation
+template <class IndexT, class T0, class T1>
+MDSPAN_INLINE_FUNCTION
+constexpr auto multiply(const T0 &v0, const T1 &v1) {
+ return IndexT(v0) * IndexT(v1);
+}
+
+template <class IndexT, class T0, T0 v0, class T1, T1 v1>
+MDSPAN_INLINE_FUNCTION
+constexpr auto multiply(const std::integral_constant<T0, v0> &,
+ const std::integral_constant<T1, v1> &) {
+ return integral_constant<IndexT, v0 * v1>();
+}
+
+// compute new static extent from range, preserving static knowledge
+template <class Arg0, class Arg1> struct StaticExtentFromRange {
+ constexpr static size_t value = dynamic_extent;
+};
+
+template <class Integral0, Integral0 val0, class Integral1, Integral1 val1>
+struct StaticExtentFromRange<std::integral_constant<Integral0, val0>,
+ std::integral_constant<Integral1, val1>> {
+ constexpr static size_t value = val1 - val0;
+};
+
+template <class Integral0, Integral0 val0, class Integral1, Integral1 val1>
+struct StaticExtentFromRange<integral_constant<Integral0, val0>,
+ integral_constant<Integral1, val1>> {
+ constexpr static size_t value = val1 - val0;
+};
+
+// compute new static extent from strided_slice, preserving static
+// knowledge
+template <class Arg0, class Arg1> struct StaticExtentFromStridedRange {
+ constexpr static size_t value = dynamic_extent;
+};
+
+template <class Integral0, Integral0 val0, class Integral1, Integral1 val1>
+struct StaticExtentFromStridedRange<std::integral_constant<Integral0, val0>,
+ std::integral_constant<Integral1, val1>> {
+ constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0;
+};
+
+template <class Integral0, Integral0 val0, class Integral1, Integral1 val1>
+struct StaticExtentFromStridedRange<integral_constant<Integral0, val0>,
+ integral_constant<Integral1, val1>> {
+ constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0;
+};
+
+// creates new extents through recursive calls to next_extent member function
+// next_extent has different overloads for different types of stride specifiers
+template <size_t K, class Extents, size_t... NewExtents>
+struct extents_constructor {
+ MDSPAN_TEMPLATE_REQUIRES(
+ class Slice, class... SlicesAndExtents,
+ /* requires */(!std::is_convertible_v<Slice, size_t> &&
+ !is_strided_slice<Slice>::value)
+ )
+ MDSPAN_INLINE_FUNCTION
+ constexpr static auto next_extent(const Extents &ext, const Slice &sl,
+ SlicesAndExtents... slices_and_extents) {
+ constexpr size_t new_static_extent = StaticExtentFromRange<
+ decltype(first_of(std::declval<Slice>())),
+ decltype(last_of(std::integral_constant<size_t, Extents::rank() - K>(),
+ std::declval<Extents>(),
+ std::declval<Slice>()))>::value;
+
+ using next_t =
+ extents_constructor<K - 1, Extents, NewExtents..., new_static_extent>;
+ using index_t = typename Extents::index_type;
+ return next_t::next_extent(
+ ext, slices_and_extents...,
+ index_t(last_of(std::integral_constant<size_t, Extents::rank() - K>(), ext,
+ sl)) -
+ index_t(first_of(sl)));
+ }
+
+ MDSPAN_TEMPLATE_REQUIRES(
+ class Slice, class... SlicesAndExtents,
+ /* requires */ (std::is_convertible_v<Slice, size_t>)
+ )
+ MDSPAN_INLINE_FUNCTION
+ constexpr static auto next_extent(const Extents &ext, const Slice &,
+ SlicesAndExtents... slices_and_extents) {
+ using next_t = extents_constructor<K - 1, Extents, NewExtents...>;
+ return next_t::next_extent(ext, slices_and_extents...);
+ }
+
+ template <class OffsetType, class ExtentType, class StrideType,
+ class... SlicesAndExtents>
+ MDSPAN_INLINE_FUNCTION
+ constexpr static auto
+ next_extent(const Extents &ext,
+ const strided_slice<OffsetType, ExtentType, StrideType> &r,
+ SlicesAndExtents... slices_and_extents) {
+ using index_t = typename Extents::index_type;
+ using new_static_extent_t =
+ StaticExtentFromStridedRange<ExtentType, StrideType>;
+ if constexpr (new_static_extent_t::value == dynamic_extent) {
+ using next_t =
+ extents_constructor<K - 1, Extents, NewExtents..., dynamic_extent>;
+ return next_t::next_extent(
+ ext, slices_and_extents...,
+ r.extent > 0 ? 1 + divide<index_t>(r.extent - 1, r.stride) : 0);
+ } else {
+ constexpr size_t new_static_extent = new_static_extent_t::value;
+ using next_t =
+ extents_constructor<K - 1, Extents, NewExtents..., new_static_extent>;
+ return next_t::next_extent(
+ ext, slices_and_extents..., index_t(divide<index_t>(ExtentType(), StrideType())));
+ }
+ }
+};
+
+template <class Extents, size_t... NewStaticExtents>
+struct extents_constructor<0, Extents, NewStaticExtents...> {
+
+ template <class... NewExtents>
+ MDSPAN_INLINE_FUNCTION
+ constexpr static auto next_extent(const Extents &, NewExtents... new_exts) {
+ return extents<typename Extents::index_type, NewStaticExtents...>(
+ new_exts...);
+ }
+};
+
+} // namespace detail
+
+// submdspan_extents creates new extents given src extents and submdspan slice
+// specifiers
+template <class IndexType, size_t... Extents, class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION
+constexpr auto submdspan_extents(const extents<IndexType, Extents...> &src_exts,
+ SliceSpecifiers... slices) {
+
+ using ext_t = extents<IndexType, Extents...>;
+ return detail::extents_constructor<ext_t::rank(), ext_t>::next_extent(
+ src_exts, slices...);
+}
+} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE
diff --git a/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp
new file mode 100644
index 0000000..46ccbaa
--- /dev/null
+++ b/ext/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp
@@ -0,0 +1,630 @@
+//@HEADER
+// ************************************************************************
+//
+// Kokkos v. 4.0
+// Copyright (2022) National Technology & Engineering
+// Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility> // index_sequence
+#include "../__p0009_bits/utility.hpp"
+
+// Suppress spurious warning with NVCC about no return statement.
+// This is a known issue in NVCC and NVC++
+// Depending on the CUDA and GCC version we need both the builtin
+// and the diagnostic push. I tried really hard to find something shorter
+// but no luck ...
+#if defined __NVCC__
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress = implicit_return_from_non_void_function
+#else
+#ifdef __CUDA_ARCH__
+#pragma diagnostic push
+#pragma diag_suppress implicit_return_from_non_void_function
+#endif
+#endif
+#elif defined __NVCOMPILER
+#pragma diagnostic push
+#pragma diag_suppress = implicit_return_from_non_void_function
+#endif
+
+namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
+//******************************************
+// Return type of submdspan_mapping overloads
+//******************************************
+template <class LayoutMapping> struct submdspan_mapping_result {
+ _MDSPAN_NO_UNIQUE_ADDRESS LayoutMapping mapping{};
+ size_t offset;
+};
+
+namespace detail {
+
+// We use const Slice& and not Slice&& because the various
+// submdspan_mapping_impl overloads use their slices arguments
+// multiple times. This makes perfect forwarding not useful, but we
+// still don't want to pass those (possibly of size 64 x 3 bits)
+// objects by value.
+template <class IndexType, class Slice>
+MDSPAN_INLINE_FUNCTION constexpr bool
+one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) {
+ using common_t =
+ std::common_type_t<decltype(detail::first_of(slice)), IndexType>;
+ return static_cast<common_t>(detail::first_of(slice)) ==
+ static_cast<common_t>(ext);
+}
+
+template <size_t... RankIndices, class IndexType, size_t... Exts,
+ class... Slices>
+MDSPAN_INLINE_FUNCTION constexpr bool
+any_slice_out_of_bounds_helper(std::index_sequence<RankIndices...>,
+ const extents<IndexType, Exts...> &exts,
+ const Slices &... slices) {
+ return _MDSPAN_FOLD_OR(
+ (one_slice_out_of_bounds(exts.extent(RankIndices), slices)));
+}
+
+template <class IndexType, size_t... Exts, class... Slices>
+MDSPAN_INLINE_FUNCTION constexpr bool
+any_slice_out_of_bounds(const extents<IndexType, Exts...> &exts,
+ const Slices &... slices) {
+ return any_slice_out_of_bounds_helper(
+ std::make_index_sequence<sizeof...(Slices)>(), exts, slices...);
+}
+
+// constructs sub strides
+template<class T, size_t N>
+struct sub_strides
+{
+ T values[N > 0 ? N : 1];
+};
+
+template <class SrcMapping, class... slice_strides, size_t... InvMapIdxs>
+MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides(
+ const SrcMapping &src_mapping, std::index_sequence<InvMapIdxs...>,
+ const MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<slice_strides...> &slices_stride_factor) {
+ using index_type = typename SrcMapping::index_type;
+ return sub_strides<typename SrcMapping::index_type, sizeof...(InvMapIdxs)>{{
+ (static_cast<index_type>(src_mapping.stride(InvMapIdxs)) *
+ static_cast<index_type>(get<InvMapIdxs>(slices_stride_factor)))...}};
+}
+
+template<class SliceSpecifier, class IndexType>
+struct is_range_slice {
+ constexpr static bool value =
+ std::is_same_v<SliceSpecifier, full_extent_t> ||
+ index_pair_like<SliceSpecifier, IndexType>::value;
+};
+
+template<class SliceSpecifier, class IndexType>
+constexpr bool is_range_slice_v = is_range_slice<SliceSpecifier, IndexType>::value;
+
+template<class SliceSpecifier, class IndexType>
+struct is_index_slice {
+ constexpr static bool value = std::is_convertible_v<SliceSpecifier, IndexType>;
+};
+
+template<class SliceSpecifier, class IndexType>
+constexpr bool is_index_slice_v = is_index_slice<SliceSpecifier, IndexType>::value;
+
+} // namespace detail
+
+//**********************************
+// layout_left submdspan_mapping
+//*********************************
+namespace detail {
+
+// Figure out whether to preserve layout_left
+template <class IndexType, size_t SubRank, class IndexSequence,
+ class... SliceSpecifiers>
+struct deduce_layout_left_submapping;
+
+template <class IndexType, size_t SubRank, size_t... Idx,
+ class... SliceSpecifiers>
+struct deduce_layout_left_submapping<
+ IndexType, SubRank, std::index_sequence<Idx...>, SliceSpecifiers...> {
+
+ using count_range = index_sequence_scan_impl<
+ 0u, (is_index_slice_v<SliceSpecifiers, IndexType> ? 0u : 1u)...>;
+
+ constexpr static int gap_len =
+ (((Idx > 0 && count_range::get(Idx) == 1 &&
+ is_index_slice_v<SliceSpecifiers, IndexType>)
+ ? 1
+ : 0) +
+ ... + 0);
+
+ MDSPAN_INLINE_FUNCTION
+ constexpr static bool layout_left_value() {
+ // Use layout_left for rank 0
+ if constexpr (SubRank == 0) {
+ return true;
+ // Use layout_left for rank 1 result if leftmost slice specifier is range like
+ } else if constexpr (SubRank == 1) {
+ return ((Idx > 0 || is_range_slice_v<SliceSpecifiers, IndexType>)&&...);
+ } else {
+ // Preserve if leftmost SubRank-1 slices are full_extent_t and
+ // the slice at idx Subrank - 1 is a range and
+ // for idx > SubRank the slice is an index
+ return ((((Idx < SubRank - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) ||
+ ((Idx == SubRank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx > SubRank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ...);
+ }
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+ }
+
+ MDSPAN_INLINE_FUNCTION
+ constexpr static bool layout_left_padded_value() {
+ // Technically could also keep layout_left_padded for SubRank==0
+ // and SubRank==1 with leftmost slice specifier being a contiguous range
+ // but we intercept these cases separately
+
+ // In all other cases:
+ // leftmost slice must be range
+ // then there can be a gap with index slices
+ // then SubRank - 2 full_extent slices
+ // then another range slice
+ // then more index slices
+ // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10
+ return ((((Idx == 0) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx > 0 && Idx <= gap_len) && is_index_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) ||
+ ((Idx == gap_len + SubRank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx > gap_len + SubRank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ... );
+ }
+};
+
+// We are reusing the same thing for layout_left and layout_left_padded
+// For layout_left as source StaticStride is static_extent(0)
+template<class Extents, size_t NumGaps, size_t StaticStride>
+struct compute_s_static_layout_left {
+ // Neither StaticStride nor any of the provided extents can be zero.
+ // StaticStride can never be zero, the static_extents we are looking at are associated with
+ // integral slice specifiers - which wouldn't be valid for zero extent
+ template<size_t ... Idx>
+ MDSPAN_INLINE_FUNCTION
+ static constexpr size_t value(std::index_sequence<Idx...>) {
+ size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride));
+ return val == 0?dynamic_extent:val;
+ }
+};
+
+} // namespace detail
+
+// Actual submdspan mapping call
+template <class Extents>
+template <class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION constexpr auto
+layout_left::mapping<Extents>::submdspan_mapping_impl(
+ SliceSpecifiers... slices) const {
+
+ // compute sub extents
+ using src_ext_t = Extents;
+ auto dst_ext = submdspan_extents(extents(), slices...);
+ using dst_ext_t = decltype(dst_ext);
+
+ // figure out sub layout type
+ using deduce_layout = detail::deduce_layout_left_submapping<
+ typename dst_ext_t::index_type, dst_ext_t::rank(),
+ std::make_index_sequence<src_ext_t::rank()>,
+ SliceSpecifiers...>;
+
+ // Figure out if any slice's lower bound equals the corresponding extent.
+ // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060.
+ const bool out_of_bounds =
+ detail::any_slice_out_of_bounds(this->extents(), slices...);
+ auto offset = static_cast<size_t>(
+ out_of_bounds ? this->required_span_size()
+ : this->operator()(detail::first_of(slices)...));
+
+ if constexpr (deduce_layout::layout_left_value()) {
+ // layout_left case
+ using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t(dst_ext),
+ offset};
+ } else if constexpr (deduce_layout::layout_left_padded_value()) {
+ constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, Extents::static_extent(0)>::value(std::make_index_sequence<Extents::rank()>());
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<S_static>::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{
+ dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset};
+ } else {
+ // layout_stride case
+ using dst_mapping_t = typename layout_stride::mapping<dst_ext_t>;
+ auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(),
+ std::index_sequence<>(), slices...);
+ return submdspan_mapping_result<dst_mapping_t> {
+ dst_mapping_t(mdspan_non_standard, dst_ext,
+ detail::construct_sub_strides(
+ *this, inv_map,
+// HIP needs deduction guides to have markups so we need to be explicit
+// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
+// the issue but Clang-CUDA also doesn't accept the use of deduction guide so
+// disable it for CUDA altogether
+#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
+ detail::tuple<decltype(detail::stride_of(slices))...>{
+ detail::stride_of(slices)...}).values),
+#else
+ detail::tuple{detail::stride_of(slices)...}).values),
+#endif
+ offset
+ };
+ }
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+}
+
+template <size_t PaddingValue>
+template <class Extents>
+template <class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION constexpr auto
+MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<PaddingValue>::mapping<Extents>::submdspan_mapping_impl(
+ SliceSpecifiers... slices) const {
+
+ // compute sub extents
+ using src_ext_t = Extents;
+ auto dst_ext = submdspan_extents(extents(), slices...);
+ using dst_ext_t = decltype(dst_ext);
+
+ if constexpr (Extents::rank() == 0) { // rank-0 case
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<PaddingValue>::template mapping<Extents>;
+ return submdspan_mapping_result<dst_mapping_t>{*this, 0};
+ } else {
+ const bool out_of_bounds =
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...);
+ auto offset = static_cast<size_t>(
+ out_of_bounds ? this->required_span_size()
+ : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...));
+ if constexpr (dst_ext_t::rank() == 0) { // result rank-0
+ // The following for some reasons leads to compiler error later, while not using a typedef works:
+ // Compilers: CUDA 11.2 with GCC 9.1
+ //
+ // using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>;
+ // return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset};
+ //
+ // Error: submdspan_mapping.hpp:299:23: error: 'dst_mapping_t' does not name a type
+ // 299 | using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>;
+ // The same error is given (about dst_mapping_t not naming type) when a different name is used in 299:
+ // using dst_mapping_t2 = typename layout_left::template mapping<dst_ext_t>;
+
+ return submdspan_mapping_result<typename layout_left::template mapping<dst_ext_t>>
+ {typename layout_left::template mapping<dst_ext_t>{dst_ext}, offset};
+ } else { // general case
+ // Figure out if any slice's lower bound equals the corresponding extent.
+ // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060.
+ // figure out sub layout type
+ using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping<
+ typename dst_ext_t::index_type, dst_ext_t::rank(),
+ decltype(std::make_index_sequence<src_ext_t::rank()>()),
+ SliceSpecifiers...>;
+
+ if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost
+ using dst_mapping_t = typename layout_left::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset};
+ } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded
+ constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, static_padding_stride>::value(std::make_index_sequence<Extents::rank()>());
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<S_static>::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{
+ dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset};
+ } else { // layout_stride
+ auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant<size_t, 0>(),
+ std::index_sequence<>(), slices...);
+ using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t> {
+ dst_mapping_t(mdspan_non_standard, dst_ext,
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides(
+ *this, inv_map,
+// HIP needs deduction guides to have markups so we need to be explicit
+// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
+// the issue but Clang-CUDA also doesn't accept the use of deduction guide so
+// disable it for CUDA alltogether
+#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values),
+#else
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values),
+#endif
+ offset
+ };
+ }
+ }
+ }
+
+
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+}
+
+//**********************************
+// layout_right submdspan_mapping
+//*********************************
+namespace detail {
+
+// Figure out whether to preserve layout_right
+template <class IndexType, size_t SubRank, class IndexSequence,
+ class... SliceSpecifiers>
+struct deduce_layout_right_submapping;
+
+template <class IndexType, size_t SubRank, size_t... Idx,
+ class... SliceSpecifiers>
+struct deduce_layout_right_submapping<
+ IndexType, SubRank, std::index_sequence<Idx...>, SliceSpecifiers...> {
+
+ static constexpr size_t Rank = sizeof...(Idx);
+ using count_range = index_sequence_scan_impl<
+ 0u, (std::is_convertible_v<SliceSpecifiers, IndexType> ? 0u : 1u)...>;
+ //__static_partial_sums<!std::is_convertible_v<SliceSpecifiers,
+ // IndexType>...>;
+ constexpr static int gap_len =
+ (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 &&
+ std::is_convertible_v<SliceSpecifiers, IndexType>)
+ ? 1
+ : 0) +
+ ... + 0);
+
+ MDSPAN_INLINE_FUNCTION
+ constexpr static bool layout_right_value() {
+ // Use layout_right for rank 0
+ if constexpr (SubRank == 0) {
+ return true;
+ // Use layout_right for rank 1 result if rightmost slice specifier is range like
+ } else if constexpr (SubRank == 1) {
+ return ((Idx < Rank - 1 || is_range_slice_v<SliceSpecifiers, IndexType>)&&...);
+ } else {
+ // Preserve if rightmost SubRank-1 slices are full_extent_t and
+ // the slice at idx Rank-Subrank is a range and
+ // for idx < Rank - SubRank the slice is an index
+ return ((((Idx >= Rank - SubRank) && std::is_same_v<SliceSpecifiers, full_extent_t>) ||
+ ((Idx == Rank - SubRank) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx < Rank - SubRank) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ...);
+ }
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+ }
+
+ MDSPAN_INLINE_FUNCTION
+ constexpr static bool layout_right_padded_value() {
+ // Technically could also keep layout_right_padded for SubRank==0
+ // and SubRank==1 with rightmost slice specifier being a contiguous range
+ // but we intercept these cases separately
+
+ // In all other cases:
+ // rightmost slice must be range
+ // then there can be a gap with index slices
+ // then SubRank - 2 full_extent slices
+ // then another range slice
+ // then more index slices
+ // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10
+ return ((((Idx == Rank - 1) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v<SliceSpecifiers, full_extent_t>) ||
+ ((Idx == Rank - gap_len - SubRank) && is_range_slice_v<SliceSpecifiers, IndexType>) ||
+ ((Idx < Rank - gap_len - SubRank) && is_index_slice_v<SliceSpecifiers, IndexType>)) && ... );
+ }
+};
+
+// We are reusing the same thing for layout_right and layout_right_padded
+// For layout_right as source StaticStride is static_extent(Rank-1)
+template<class Extents, size_t NumGaps, size_t StaticStride>
+struct compute_s_static_layout_right {
+ // Neither StaticStride nor any of the provided extents can be zero.
+ // StaticStride can never be zero, the static_extents we are looking at are associated with
+ // integral slice specifiers - which wouldn't be valid for zero extent
+ template<size_t ... Idx>
+ MDSPAN_INLINE_FUNCTION
+ static constexpr size_t value(std::index_sequence<Idx...>) {
+ size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride));
+ return val == 0?dynamic_extent:val;
+ }
+};
+
+} // namespace detail
+
+// Actual submdspan mapping call
+template <class Extents>
+template <class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION constexpr auto
+layout_right::mapping<Extents>::submdspan_mapping_impl(
+ SliceSpecifiers... slices) const {
+
+ // compute sub extents
+ using src_ext_t = Extents;
+ auto dst_ext = submdspan_extents(extents(), slices...);
+ using dst_ext_t = decltype(dst_ext);
+
+ // figure out sub layout type
+ using deduce_layout = detail::deduce_layout_right_submapping<
+ typename dst_ext_t::index_type, dst_ext_t::rank(),
+ std::make_index_sequence<src_ext_t::rank()>,
+ SliceSpecifiers...>;
+
+ // Figure out if any slice's lower bound equals the corresponding extent.
+ // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060.
+ const bool out_of_bounds =
+ detail::any_slice_out_of_bounds(this->extents(), slices...);
+ auto offset = static_cast<size_t>(
+ out_of_bounds ? this->required_span_size()
+ : this->operator()(detail::first_of(slices)...));
+
+ if constexpr (deduce_layout::layout_right_value()) {
+ // layout_right case
+ using dst_mapping_t = typename layout_right::mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t(dst_ext),
+ offset};
+ } else if constexpr (deduce_layout::layout_right_padded_value()) {
+ constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left<Extents, deduce_layout::gap_len, Extents::static_extent(Extents::rank() - 1)>::value(std::make_index_sequence<Extents::rank()>());
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<S_static>::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{
+ dst_mapping_t(dst_ext,
+ stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)),
+ offset};
+ } else {
+ // layout_stride case
+ using dst_mapping_t = typename layout_stride::mapping<dst_ext_t>;
+ auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(),
+ std::index_sequence<>(), slices...);
+ return submdspan_mapping_result<dst_mapping_t> {
+ dst_mapping_t(mdspan_non_standard, dst_ext,
+ detail::construct_sub_strides(
+ *this, inv_map,
+// HIP needs deduction guides to have markups so we need to be explicit
+// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
+// the issue but Clang-CUDA also doesn't accept the use of deduction guide so
+// disable it for CUDA altogether
+#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>{
+ detail::stride_of(slices)...}).values),
+#else
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{detail::stride_of(slices)...}).values),
+#endif
+ offset
+ };
+ }
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+}
+
+template <size_t PaddingValue>
+template <class Extents>
+template <class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION constexpr auto
+MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<PaddingValue>::mapping<Extents>::submdspan_mapping_impl(
+ SliceSpecifiers... slices) const {
+
+ // compute sub extents
+ using src_ext_t = Extents;
+ auto dst_ext = submdspan_extents(extents(), slices...);
+ using dst_ext_t = decltype(dst_ext);
+
+ if constexpr (Extents::rank() == 0) { // rank-0 case
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<PaddingValue>::template mapping<Extents>;
+ return submdspan_mapping_result<dst_mapping_t>{*this, 0};
+ } else {
+ // Figure out if any slice's lower bound equals the corresponding extent.
+ // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060.
+ // figure out sub layout type
+ const bool out_of_bounds =
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...);
+ auto offset = static_cast<size_t>(
+ out_of_bounds ? this->required_span_size()
+ : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...));
+ if constexpr (dst_ext_t::rank() == 0) { // result rank-0
+ // Same issue as in layout_left_padded: see comment there
+ // using dst_mapping_t = typename layout_right::template mapping<dst_ext_t>;
+ // return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset};
+ return submdspan_mapping_result<typename layout_right::template mapping<dst_ext_t>>
+ {typename layout_right::template mapping<dst_ext_t>{dst_ext}, offset};
+ } else { // general case
+ using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping<
+ typename dst_ext_t::index_type, dst_ext_t::rank(),
+ decltype(std::make_index_sequence<src_ext_t::rank()>()),
+ SliceSpecifiers...>;
+
+ if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost
+ using dst_mapping_t = typename layout_right::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{dst_mapping_t{dst_ext}, offset};
+ } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded
+ constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right<Extents, deduce_layout::gap_len, static_padding_stride>::value(std::make_index_sequence<Extents::rank()>());
+ using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<S_static>::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t>{
+ dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset};
+ } else { // layout_stride
+ auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant<size_t, 0>(),
+ std::index_sequence<>(), slices...);
+ using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>;
+ return submdspan_mapping_result<dst_mapping_t> {
+ dst_mapping_t(mdspan_non_standard, dst_ext,
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides(
+ *this, inv_map,
+// HIP needs deduction guides to have markups so we need to be explicit
+// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
+// the issue but Clang-CUDA also doesn't accept the use of deduction guide so
+// disable it for CUDA alltogether
+#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values),
+#else
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values),
+#endif
+ offset
+ };
+ }
+ }
+ }
+
+
+#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__)
+ __builtin_unreachable();
+#endif
+}
+
+//**********************************
+// layout_stride submdspan_mapping
+//*********************************
+template <class Extents>
+template <class... SliceSpecifiers>
+MDSPAN_INLINE_FUNCTION constexpr auto
+layout_stride::mapping<Extents>::submdspan_mapping_impl(
+ SliceSpecifiers... slices) const {
+ auto dst_ext = submdspan_extents(extents(), slices...);
+ using dst_ext_t = decltype(dst_ext);
+ auto inv_map = detail::inv_map_rank(std::integral_constant<size_t, 0>(),
+ std::index_sequence<>(), slices...);
+ using dst_mapping_t = typename layout_stride::template mapping<dst_ext_t>;
+
+ // Figure out if any slice's lower bound equals the corresponding extent.
+ // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060.
+ const bool out_of_bounds =
+ detail::any_slice_out_of_bounds(this->extents(), slices...);
+ auto offset = static_cast<size_t>(
+ out_of_bounds ? this->required_span_size()
+ : this->operator()(detail::first_of(slices)...));
+
+ return submdspan_mapping_result<dst_mapping_t> {
+ dst_mapping_t(mdspan_non_standard, dst_ext,
+ detail::construct_sub_strides(
+ *this, inv_map,
+// HIP needs deduction guides to have markups so we need to be explicit
+// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
+// the issue but Clang-CUDA also doesn't accept the use of deduction guide so
+// disable it for CUDA alltogether
+#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>(
+ detail::stride_of(slices)...)).values),
+#else
+ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values),
+#endif
+ offset
+ };
+}
+
+} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE
+
+#if defined __NVCC__
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diagnostic pop
+#else
+#ifdef __CUDA_ARCH__
+#pragma diagnostic pop
+#endif
+#endif
+#elif defined __NVCOMPILER
+#pragma diagnostic pop
+#endif