Download User Manual - Support
Transcript
8.5 Available Tools (c) Bright Computing Taras Shapovalov <[email protected]> */ #include <cuda.h> #include <cutil_inline.h> #include <stdio.h> // CUDA kernel definition: undo shift13 __global__ void helloWorld(char* str) { int idx = blockIdx.x * blockDim.x + threadIdx.x; str[idx] -= 13; } int main(int argc, char** argv ) { char s[] = "Hello World!"; printf("String for encode/decode: %s\n", s); // CPU shift13 int len = sizeof(s); for (int i = 0; i < len; i++) { s[i] += 13; } printf("String encoded on CPU as: %s\n", s); // Allocate memory on the CUDA device char *cuda_s; cudaMalloc((void**)&cuda_s, len); // Copy the string to the CUDA device cudaMemcpy(cuda_s, s, len, cudaMemcpyHostToDevice); // Set the grid and block sizes (dim3 is a type) // and "Hello World!" is 12 characters, say 3x4 dim3 dimGrid(3); dim3 dimBlock(4); // Invoke the kernel to undo shift13 in GPU helloWorld<<< dimGrid, dimBlock >>>(cuda_s); // Retrieve the results from the CUDA device cudaMemcpy(s, cuda_s, len, cudaMemcpyDeviceToHost); // Free up the allocated memory on the CUDA device cudaFree(cuda_s); } printf("String decoded on GPU as: %s\n", s); return 0; The preceding code example may be compiled and run with: [fred@bright52 ~]$ nvcc hello.cu -o hello [fred@bright52 ~]$ module add shared openmpi/gcc/64/1.4.4 slurm © Bright Computing, Inc. 53