下面是个 kernel函数,nisight调试出错,显示:error = access violation on load (shared memory)。在调用这个kernel的函数中,用cudaStatus = cudaDeviceSynchronize(); 检查,显示cudaDeviceSynchronize returned error code 30。
琢磨好久了,无果。请问,是什么个错误呢?
__global__ static void ComputeMatrix(BYTE* NewImage, int LocalImageWidth,int LocalImageHeight,float *final_sum1,int *GPUPMH )
{
int xIndex=__mul24(blockDim.x,blockIdx.x)+threadIdx.x;
int yIndex=__mul24(blockDim.y,blockIdx.y)+threadIdx.y;
// int xIndex=blockDim.x*blockIdx.x+threadIdx.x;
// int yIndex=blockDim.y*blockIdx.y+threadIdx.y;
int tid_in_x=threadIdx.x;
int tid_in_y=threadIdx.y;
int tid_in_block=__mul24(threadIdx.y,blockDim.x)+threadIdx.x;
// int tid_in_block=threadIdx.y*blockDim.x+threadIdx.x;
unsigned int index=yIndex*LocalImageWidth+xIndex;
__shared__ int sin[K][K]; //图像
__shared__ int sgrayCoMatrixRD[G][G];//灰度共生矩阵
sin[tid_in_x][tid_in_y]=0;
__syncthreads();//块内线程同步
oMatrixRD[tid_in_x][tid_in_y]=0;
__syncthreads();//块内线程同步
//传值
sin[tid_in_x][tid_in_y]=(unsigned int)NewImage[index]; //听到这不动 sin 不变 tid _in_x 不变 都是0
__syncthreads();//块内线程同步
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x-D][tid_in_y+D]][sin[tid_in_x][tid_in_y]],1);
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x][tid_in_y]][sin[tid_in_x-D][tid_in_y+D]],1);
GPUPMH[index]=sgrayCoMatrixRD[tid_in_x][tid_in_y];
__syncthreads();//块内线程同步
}
|