玫瑰幻想 发表于 2013-10-25 22:28 
楼主您好,
您都承认您再复制的时候没有指定偏移量,您怎可能读取出来正确的值?您这样后续的204次复制将 ...
谢谢版主的解答,我看到volumeRender的数据由指针绑定纹理时就没有设置偏移量,不过它由指针直接只想fread进来的数据,关于texture reference的设置我都参考了volumerender中(归一化),然后它就可以在沿着ray叠加各个(pos+step)上的像素值,然后在与颜色纹理相乘得到最终的数据。
还有我真的不是有意忽视版主认真的解答,实在是由于本人出学,只能参考samples种的东西来做的;
那我把数据绑定那块代码也附上吧。
数据读取
short *loadRawFile(char *filename, size_t size)
{
FILE *fp = fopen(filename, "rb");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", filename);
return 0;
}
short *data = (short*)malloc(size);
if (NULL == data)
{
printf("Malloc data failue!\n");
return 0;
}
size_t read = fread(data, 2, 512*512*408, fp);
if (0 == read)
{
printf("Read data failue!\n");
return 0;
}
fclose(fp);
//test the file data
//ofstream out("out.dat");
//int i = 512*512*1;
//int j = 0;
//while (j<512*512)
//{
// //printf("%d\n",dat[i++]);
// out<<data[i++]<<" ";
// if((i+1)/512 == 0)
// out<<endl;
// j++;
//}
//out.close();
printf("Read '%s', %d bytes\n", filename, read);
return data;
}
把数据的按照204张循环拷贝给cudaArray
cudaExtent volumeSize = make_cudaExtent(512, 512, 204);
size_t VolumeSize = volumeSize.width*volumeSize.height* 408 *sizeof(VolumeType);
size_t newVolumeSize = volumeSize.width*volumeSize.height* 204 *sizeof(VolumeType);
size_t subVolumeSize = volumeSize.width*volumeSize.height* 51 *sizeof(VolumeType);
size_t PicSize = volumeSize.width * volumeSize.height *sizeof(VolumeType);
short *h_volume = loadRawFile(path, VolumeSize);
if ( NULL == h_volume)
{
printf("h_volume is NULL!\n");
return ;
}
short *out_volume = (short*)malloc(512*512*102*sizeof(short)); //save data after Interpolation
if ( NULL == out_volume)
{
printf("h_volume is NULL!\n");
return ;
}
short *h_Involume = (short*) malloc(512*512*204*sizeof(short));//¿œ±ŽžøÎÆÀíÊýŸÝ
if ( NULL == h_Involume)
{
printf("h_volume is NULL!\n");
return ;
}
int iTimes = 1;//408/102 ;//cuda calculate times
int iTime = 0;
while(iTime < iTimes)
{
memcpy(h_Involume, h_volume+512*512*204 , 512*512*204*sizeof(short));
ofstream test1("myOut1.dat");
int ii1=512*512*0;
while (ii1<512*512*1)
{
test1<<h_Involume[ii1++]<<" ";
}
test1.close();
ofstream test2("myOut2.dat");
int ii2=512*512*1;
while (ii2<512*512*2)
{
test2<<h_Involume[ii2++]<<" ";
}
test2.close();
ofstream test3("myOut3.dat");
int ii3=512*512*2;
while (ii3<512*512*3)
{
test3<<h_Involume[ii3++]<<" ";
}
test3.close();
ofstream test4("myOut4.dat");
int ii4=512*512*3;
while (ii4<512*512*4)
{
test4<<h_Involume[ii4++]<<" ";
}
test4.close();
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<VolumeType>();
checkCudaErrors(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr(h_Involume, volumeSize.width*sizeof(VolumeType), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
checkCudaErrors(cudaMemcpy3D(©Params));
// set texture parameters
tex.normalized = true; // access with normalized texture coordinates
tex.filterMode = cudaFilterModeLinear; // linear interpolation
tex.addressMode[0] = cudaAddressModeClamp; // clamp texture coordinates
tex.addressMode[1] = cudaAddressModeClamp;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
cudaFreeArray(d_volumeArray);
getLastCudaError("Kernel execution failed");
short* dOutData;
cudaMalloc((short **)&dOutData,512*512*51*sizeof(short));
dim3 dimBlock(32, 32, 1);
dim3 dimGrid(16, 16, 1);
// Warmup
transformKernel<<<dimGrid, dimBlock>>>(dOutData, width, height);
getLastCudaError("Kernel execution failed");
checkCudaErrors(cudaDeviceSynchronize());
StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
// Execute the kernel
transformKernel<<<dimGrid, dimBlock>>>(dOutData, width, height);
// Check if kernel execution generated an error
getLastCudaError("Kernel execution failed");
cudaMemcpy(out_volume + 512*512*51*iTime, dOutData, (512*512*51*sizeof(short)), cudaMemcpyDeviceToHost);
ofstream out("fisrtPage.dat");
int j=0;
while (j<512*512)
{
out<<out_volume[j]<<endl;
j++;
}
out.close();
cudaFree(dOutData);
iTime++;
}
free(h_Involume);
free(h_volume);
kernel中函数
__global__ void transformKernel(short *outputData,int imageW, int imageH)
{
const int maxSteps = 10;
const float tstep = 0.02f;
const float3 boxMin = make_float3(-1.0f, -1.0f, -1.0f);
const float3 boxMax = make_float3(1.0f, 1.0f, 1.0f);
uint x = blockIdx.x*blockDim.x + threadIdx.x;
uint y = blockIdx.y*blockDim.y + threadIdx.y;
if ((x >= imageW) || (y >= imageH)) return;
float u = (x / (float) imageW)*2.0f-1.0f;
float v = (y / (float) imageH)*2.0f-1.0f;
// calculate eye ray in world space
//ÖŽÐÐÁËÏÂÃæÓïŸäÖ®ºó,eyeRay.o ={0, 0, 4}
Ray eyeRay;
eyeRay.o = make_float3(mul(c_invViewMatrix, make_float4(0.0f, 0.0f, 0.0f, 1.0f)));
eyeRay.d = normalize(make_float3(u, v, -2.0f));
eyeRay.d = mul(c_invViewMatrix, eyeRay.d);
// find intersection with box
float tnear, tfar;
int hit = intersectBox(eyeRay, boxMin, boxMax, &tnear, &tfar);
if (!hit) return;
if (tnear < 0.0f) tnear = 0.0f; // clamp to near plane
float t = tnear;
float3 pos = eyeRay.o + eyeRay.d*tnear;
float3 step = eyeRay.d*tstep;
for (int i=0; i<maxSteps; i++)
{
// read from 3D texture
// remap position to [0, 1] coordinates
//ÔÊŒÊýŸÝucharÒÑŸ±»×ª³ÉfloatÐÍÁË£¬Î»ÖÃÒ²ÊÇfloatÐÍ£¬ŽÓfloatÐÍλÖÃÔÙ¶ÁÍŒÏñÔʌֵ
//pos.x = pos.x*volumeSize.width/2.0f+volumeSize.width/2.0f; // map position.x to [0, 512] coordinates
//pos.y = pos.y*volumeSize.height/2.0f+volumeSize.height/2.0f; // map position.y to [0, 512] coordinates
//pos.z = pos.z*volumeSize.depth/2.0f+volumeSize.depth/2.0f; // map position.z to [0, 331] coordinates
float sample = tex3D(tex, pos.x*0.5f+0.5f, pos.y*0.5f+0.5f, pos.z*0.5f+0.5f);
t += tstep;
if (t > tfar) break;
pos += step;
outputData[y*imageW + x + i*imageW*imageH] = (short)round(sample);
}
}
再次真心谢谢版主!麻烦了 |