cuda examples

Upload: swati-choudhary

Post on 08-Jan-2016

214 views

Category:

Documents


0 download

DESCRIPTION

Example programs for CUDA C

TRANSCRIPT

  • 1ExamplesofCuda code

    1) Thedotproduct2) Matrixvectormultiplication3) Sparsematrixmultiplication4) Globalreduction

    Computingy=ax+ywithaSerialLoopvoidsaxpy_serial(int n,floatalpha,float*x,float*y){for(inti=0;i

  • 2__global__voidmm_simple(float*C,float*A,float*B,int n){int row=blockIdx.y *blockDim.y +threadIdx.y;int col=blockIdx.x *blockDim.x +threadIdx.x;

    floatsum=0.0f;for(int k=0;k

  • 3voidcsrmul_serial(int *Ap,int *Aj,float*Av,int num_rows,float*x,float*y)

    {for(int row=0;row

  • 4Cachinginsharedmemory

    =

    Block_begin

    Block_end

    AthreadCacheinsharedmemory

    Expectmostofthenonzeroelementshere(aroundthediagonal)

    therowexecutedbyathread

    _global_void csrmul_cached(int *Ap,int *Aj,float*Av,int num_rows,constfloat*x,float*y){_shared_float cache[blocksize];//Cachetherowsofx[]correspondingtothisblock.int block_begin =blockIdx.x *blockDim.x;int block_end =block_begin +blockDim.x;int row=block_begin +threadIdx.x;//Fetchandcacheourwindowofx[].if(row

  • 5_global_void plus_reduce(int *input,int N,int *total){int tid =threadIdx.x;int i =blockIdx.x*blockDim.x +threadIdx.x;//Eachblockloadsitselementsintosharedmemory_shared_int x[blocksize];x[tid]=(i0;s=s/2){if(tid