Files
clang-uml/tests/t20050/t20050.cu

38 lines
711 B
Plaintext

#include "t20050.cuh"
namespace clanguml {
namespace t20050 {
constexpr unsigned long N{1000};
template <typename T> __device__ T add(T a, T b) { return a + b; }
__device__ float square(float a) { return a * a; }
__global__ void vector_square_add(float *out, float *a, float *b, int n)
{
for (int i = 0; i < n; i++) {
out[i] = add(square(a[i]), square(b[i]));
}
}
int tmain()
{
float *a, *b, *out;
a = (float *)malloc(sizeof(float) * N);
b = (float *)malloc(sizeof(float) * N);
out = (float *)malloc(sizeof(float) * N);
for (int i = 0; i < N; i++) {
a[i] = 1.0f;
b[i] = 2.0f;
}
vector_square_add<<<1, 1>>>(out, a, b, N);
return 0;
}
}
}