Add an algorithm in CUDA C (no c++) to this kernel code that will find the max v
ID: 3862611 • Letter: A
Question
Add an algorithm in CUDA C (no c++) to this kernel code that will find the max value from the vector the kernel reduces (float *in).
__global__ void reduction(float *out, float *in, unsigned size)
{
// INSERT KERNEL CODE HERE
#ifdef SIMPLE
__shared__ float in_s[2 * BLOCK_SIZE];
int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;
in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f);
in_s[threadIdx.x + BLOCK_SIZE] = ((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);
for (int stride = 1; stride < BLOCK_SIZE << 1; stride <<= 1) {
__syncthreads();
if (threadIdx.x % stride == 0)
in_s[2 * threadIdx.x] += in_s[2 * threadIdx.x + stride];
}
#else
__shared__ float in_s[BLOCK_SIZE];
int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;
in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f) +
((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);
for (int stride = BLOCK_SIZE >> 1; stride > 0; stride >>= 1) {
__syncthreads();
if (threadIdx.x < stride)
in_s[threadIdx.x] += in_s[threadIdx.x + stride];
}
#endif
if (threadIdx.x == 0)
out[blockIdx.x] = in_s[0];
}
Explanation / Answer
Here is the code which I tried from the assignment.
__global__ void add_vectors_kernel(float *A, float *B, float *C, int N) {
// Determine which element the thread is computing
int block_Id = blockIdx.x + gridDim.x * blockIdx.y;
int thread_Id = blockDim.x * block_Id + threadIdx.x;
// Compute a single element of the result vector (if the element is valid)
if (thread_Id < N) C[thread_Id] = A[thread_Id] + B[thread_Id];
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.