Download User Manual - Support

Transcript
8.5 Available Tools
(c) Bright Computing
Taras Shapovalov <[email protected]>
*/
#include <cuda.h>
#include <cutil_inline.h>
#include <stdio.h>
// CUDA kernel definition: undo shift13
__global__ void helloWorld(char* str) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
str[idx] -= 13;
}
int main(int argc, char** argv )
{
char s[] = "Hello World!";
printf("String for encode/decode: %s\n", s);
// CPU shift13
int len = sizeof(s);
for (int i = 0; i < len; i++) {
s[i] += 13;
}
printf("String encoded on CPU as: %s\n", s);
// Allocate memory on the CUDA device
char *cuda_s;
cudaMalloc((void**)&cuda_s, len);
// Copy the string to the CUDA device
cudaMemcpy(cuda_s, s, len, cudaMemcpyHostToDevice);
// Set the grid and block sizes (dim3 is a type)
// and "Hello World!" is 12 characters, say 3x4
dim3 dimGrid(3);
dim3 dimBlock(4);
// Invoke the kernel to undo shift13 in GPU
helloWorld<<< dimGrid, dimBlock >>>(cuda_s);
// Retrieve the results from the CUDA device
cudaMemcpy(s, cuda_s, len, cudaMemcpyDeviceToHost);
// Free up the allocated memory on the CUDA device
cudaFree(cuda_s);
}
printf("String decoded on GPU as: %s\n", s);
return 0;
The preceding code example may be compiled and run with:
[fred@bright52 ~]$ nvcc hello.cu -o hello
[fred@bright52 ~]$ module add shared openmpi/gcc/64/1.4.4 slurm
© Bright Computing, Inc.
53