CUDA-3.1¶

Use functions only in the execution spaces they target

Required inputs: IR

CUDA 3.1 [share.function] Use functions only in the execution spaces they target

Functions should only be executed in the execution space(s) they target.

__host__ __device__ functions target both the host and device execution space.
__host__ functions and functions with no execution space specifier target the host execution space.
__device__ functions and kernels (__global__ functions) target the device execution space.

Scope: Host, Device.

Audience: CUDA C++, CUDA Libraries.

Category: Required.

Hardware Applicability: All Compute Capabilities.

Rationale

Functions are only compiled for the architectures necessary for their execution space. __device__ only functions may appear to have a definition in __host__ code, however, these definitions are no-ops and are purely an implementation detail. Any ODR-use of a function in an execution space that it does not support results in undefined behavior.

Example 1 (Bad)

# include <memory>
# include <algorithm>
# include <vector>
# include <cassert>

struct square {
  template <typename T>
  __device__ T operator()(T t) {
    return t * t;
  }
};

template <typename ForwardIt, typename Size, typename UnaryOp>

__host__ __device__

void inplace_transform_n(ForwardIt first, Size n, UnaryOp op) {
  for (Size i = 0; i < n; ++first, (void) ++i)
    *first = op(*first);
}

void vectorSquare(std::vector<int32_t> &u) {
  // This instantiates and uses `inplace_transform_n` in `__host__` code with a
  // `__device__` only `UnaryOp`, which is invalid and leads to a compile time
  // warning and a run time failure.
  inplace_transform_n(u.begin(), u.size(), square{});
}

Example 2 (Bad)

# include <memory>
# include <algorithm>
# include <vector>
# include <cassert>

struct square {
  template <typename T>
  __device__ T operator()(T t) {
    return t * t;
  }
};
// This template function can be instantiated and used in `__host__` code with
// a `__host__` only `UnaryOp`, but this will generate a vacuous warning about
// using a `__host__` only function in a `__host__ __device__` function. This
// warning can be disabled with `#pragma nv_exec_check_disable`, but this also
// disables relevant warnings for any invalid instantiations in `__device__`
// code with `__host__` only `UnaryOp`s.
# pragma nv_exec_check_disable

template <typename ForwardIt, typename Size, typename UnaryOp>

__host__ __device__

void inplace_transform_n(ForwardIt first, Size n, UnaryOp op) {
  for (Size i = 0; i < n; ++first, (void) ++i)
    *first = op(*first);
}

void vectorSquare(std::vector<int32_t> &u)) {
  // This instantiates and uses `inplace_transform_n` in `__host__` code with a
  // `__device__` only `UnaryOp`, which is invalid and leads to a compile time
  // warning and a run time failure.
  inplace_transform_n(u.begin(), u.size(), square{});
}

Example 3 (Bad)

# include <memory>
# include <algorithm>
# include <cassert>
# include "testTerminate.h"

struct square {
  template <typename T>
  T operator()(T t) {
    return t * t;
  }
};

template <typename ForwardIt, typename Size, typename UnaryOp>

__host__ __device__

void inplace_transform_n(ForwardIt first, Size n, UnaryOp op) {
  for (Size i = 0; i < n; ++first, (void) ++i)
    *first = op(*first);
}

__global__ void square_each_element(int32_t* u, ptrdiff_t per_thread) {
  inplace_transform_n(u, per_thread, square{});
}

void ptrSquare(std::unique_ptr<int32_t> &up, int32_t n, int32_t m) {
  // This kernel instantiates and uses `inplace_transform_n` in `__device__`
  // code with a `__host__` only `UnaryOp`, which is invalid and leads to a
  // compile time warning and a run time failure.
  square_each_element<<<1, n>>>(up.get(), m);
  cudaError_t const error2 = cudaGetLastError();
  testTerminate(error2);
  cudaError_t const error3 = cudaDeviceSynchronize();
  assert(cudaSuccess == error3);
}

Example 4 (Bad)

# include <memory>
# include <algorithm>
# include <cassert>
# include "testTerminate.h"

struct square {
  template <typename T>
  T operator()(T t) {
    return t * t;
  }
};
// This template function can be instantiated and used in `__host__` code with
// a `__host__` only `UnaryOp`, but this will generate a vacuous warning about
// using a `__host__` only function in a `__host__ __device__` function. This
// warning can be disabled with `#pragma nv_exec_check_disable`, but this also
// disables relevant warnings for any invalid instantiations in `__device__`
// code with `__host__` only `UnaryOp`s.
# pragma nv_exec_check_disable

template <typename ForwardIt, typename Size, typename UnaryOp>

__host__ __device__

void inplace_transform_n(ForwardIt first, Size n, UnaryOp f) {
  for (Size i = 0; i < n; ++first, (void) ++i)
    *first = f(*first);
}

__global__ void square_each_element(int32_t* u, ptrdiff_t per_thread) {
  inplace_transform_n(u, per_thread, square{});
}

void ptrSquare(std::unique_ptr<int32_t> &up, int32_t n, int32_t m) {
  // This kernel instantiates and uses `inplace_transform_n` in `__device__`
  // code with a `__host__` only `UnaryOp`, which is invalid and leads to a run
  // time failure, but no compile time warning is produced.
  square_each_element<<<1, n>>>(up.get(), m);
  cudaError_t const error2 = cudaGetLastError();
  testTerminate(error2);
  cudaError_t const error3 = cudaDeviceSynchronize();
  assert(cudaSuccess == error3);
}

Possible Messages

Key	Text	Severity	Disabled
constexpr_device_func_called_from_const_host	This call is in host code but attempts to call a constexpr device function	None	False
constexpr_device_func_called_from_host	This call is in host code but attempts to call a constexpr device function outside of a manifestly constant evaluated context	None	False
constexpr_host_func_called_from_const_device	This call is in device code but attempts to call a constexpr host function	None	False
constexpr_host_func_called_from_device	This call is in device code but attempts to call a constexpr host function outside of a manifestly constant evaluated context	None	False
device_func_called_from_host	This call is in host code but attempts to call a device function	None	False
host_func_called_from_device	This call is in device code but attempts to call a host function	None	False

Options

This rule shares the following common options: exclude_in_macros, exclude_messages_in_system_headers, excludes, extend_exclude_to_macro_invocations, includes, justification_checker, languages, post_processing, provider, report_at, severity
The following places define options that affect this rule: Stylechecks, Analysis-GlobalOptions
This rule has no individual options.

Axivion Suite 7.12.2-public

Navigation