#pragma once #include #include #include #include #if defined(MDSPAN_IMPL_HAS_CUDA) && defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1260) #include #else #include #endif #include "macros.hpp" namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { // type alias used for rank-based tag dispatch // // this is used to enable alternatives to constexpr if when building for C++14 // template using with_rank = std::integral_constant; template MDSPAN_INLINE_FUNCTION constexpr bool common_integral_compare(I1 x, I2 y) { static_assert(std::is_integral::value && std::is_integral::value, ""); using I = std::common_type_t; return static_cast(x) == static_cast(y); } template MDSPAN_INLINE_FUNCTION constexpr bool rankwise_equal(with_rank<0>, const T1&, const T2&, F) { return true; } template MDSPAN_INLINE_FUNCTION constexpr bool rankwise_equal(with_rank, const T1& x, const T2& y, F func) { bool match = true; for (std::size_t r = 0; r < N; r++) { match = match && common_integral_compare(func(x, r), func(y, r)); } return match; } #if MDSPAN_HAS_CXX_17 inline #endif constexpr struct extent_functor { template MDSPAN_INLINE_FUNCTION constexpr auto operator()(const T& x, I i) const { return x.extent(i); } } extent; #if MDSPAN_HAS_CXX_17 inline #endif constexpr struct stride_functor { template MDSPAN_INLINE_FUNCTION constexpr auto operator()(const T& x, I i) const { return x.stride(i); } } stride; // same as std::integral_constant but with __host__ __device__ annotations on // the implicit conversion function and the call operator template struct integral_constant { using value_type = T; using type = integral_constant; static constexpr T value = v; MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr integral_constant() = default; // These interop functions work, because other than the value_type operator // everything of std::integral_constant works on device (defaulted functions) MDSPAN_FUNCTION constexpr integral_constant(std::integral_constant) {} MDSPAN_FUNCTION constexpr operator std::integral_constant() const noexcept { return std::integral_constant{}; } MDSPAN_FUNCTION constexpr operator value_type() const noexcept { return value; } MDSPAN_FUNCTION constexpr value_type operator()() const noexcept { return value; } }; // The tuple implementation only comes in play when using capabilities // such as submdspan which require C++17 anyway #if MDSPAN_HAS_CXX_17 template struct tuple_member { using type = T; static constexpr size_t idx = Idx; T val; MDSPAN_FUNCTION constexpr T& get() { return val; } MDSPAN_FUNCTION constexpr const T& get() const { return val; } }; // A helper class which will be used via a fold expression to // select the type with the correct Idx in a pack of tuple_member template struct tuple_idx_matcher { using type = tuple_member; template MDSPAN_FUNCTION constexpr auto operator | ([[maybe_unused]] Other v) const { if constexpr (Idx == SearchIdx) { return *this; } else { return v; } } }; template struct tuple_impl; template struct tuple_impl, Elements...>: public tuple_member ... { MDSPAN_FUNCTION constexpr tuple_impl(Elements ... vals):tuple_member{vals}... {} template MDSPAN_FUNCTION constexpr auto& get() { using base_t = decltype((tuple_idx_matcher() | ...) ); return base_t::type::get(); } template MDSPAN_FUNCTION constexpr const auto& get() const { using base_t = decltype((tuple_idx_matcher() | ...) ); return base_t::type::get(); } }; // A simple tuple-like class for representing slices internally and is compatible with device code // This doesn't support type access since we don't need it // This is not meant as an external API template struct tuple: public tuple_impl()), Elements...> { MDSPAN_FUNCTION constexpr tuple(Elements ... vals):tuple_impl()), Elements ...>(vals ...) {} }; template MDSPAN_FUNCTION constexpr auto& get(tuple& vals) { return vals.template get(); } template MDSPAN_FUNCTION constexpr const auto& get(const tuple& vals) { return vals.template get(); } template tuple(Elements ...) -> tuple; #endif #if MDSPAN_HAS_CXX_17 // std::in_range and friends, tagged for device execution // Backport from https://en.cppreference.com/w/cpp/utility/intcmp // and https://en.cppreference.com/w/cpp/utility/in_range template MDSPAN_INLINE_FUNCTION constexpr bool cmp_less(T t, U u) noexcept { if constexpr (std::is_signed_v == std::is_signed_v) return t < u; else if constexpr (std::is_signed_v) return t < 0 || std::make_unsigned_t(t) < u; else return u >= 0 && t < std::make_unsigned_t(u); } template MDSPAN_INLINE_FUNCTION constexpr bool cmp_less_equal(T t, U u) noexcept { return !cmp_less(u, t); } template MDSPAN_INLINE_FUNCTION constexpr bool cmp_greater_equal(T t, U u) noexcept { return !cmp_less(t, u); } template MDSPAN_INLINE_FUNCTION constexpr bool in_range(T t) noexcept { #if defined(MDSPAN_IMPL_HAS_CUDA) && defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1260) using cuda::std::numeric_limits; #else using std::numeric_limits; #endif return cmp_greater_equal(t, numeric_limits::min()) && cmp_less_equal(t, numeric_limits::max()); } template MDSPAN_INLINE_FUNCTION constexpr bool check_mul_result_is_nonnegative_and_representable(T a, T b) { // FIXME_SYCL The code below compiles to old_llvm.umul.with.overflow.i64 // which isn't defined in device code #ifdef __SYCL_DEVICE_ONLY__ return true; #else if (b == 0 || a == 0) return true; if constexpr (std::is_signed_v) { if ( a < 0 || b < 0 ) return false; } #if defined(MDSPAN_IMPL_HAS_CUDA) && defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1260) using cuda::std::numeric_limits; #else using std::numeric_limits; #endif return a <= numeric_limits::max() / b; #endif } #endif } // namespace detail #if MDSPAN_HAS_CXX_17 inline #endif constexpr struct mdspan_non_standard_tag { } mdspan_non_standard; } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE