for(int i=0;i<=(T-1)/2;i++)
{
//////////////////////////判断可见性,在CPU上执行///////////////////////////
cout<<"计算 ib="<<i<<" 时角系数,网格总数2626"<<endl;
cout<<"计算 ib="<<Mb01-1-i<<" 时角系数,网格总数2626"<<endl;
soild_jxs_panduan(i);
soild_jxs_panduan(Mb01-1-i);
virtual_jxs_panduan(i);
virtual_jxs_panduan(Mb01-1-i);
////////////////////判断可见性,在cpu上执行//////////////////
/////////////////////////计算角系数,在GPU上执行//////////////////////////////
checkCudaErrors(cudaMemcpy(nd_jx11+T*i,jx11[0]+T*i , sizeof(float)*T,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(nd_jx11+T*(T-1-i),jx11[0]+T*(T-1-i) , sizeof(float)*T,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(nd_jx11+T*i,jx11[0]+T*i , sizeof(float)*T,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(nd_jx12+G*(T-1-i),jx12[0]+G*(T-1-i) , sizeof(float)*G,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(nd_jx12+G*i,jx12[0]+G*i , sizeof(float)*G,
cudaMemcpyHostToDevice));
solid_jxs_jisuan<<<grid1,threads1>>>(i,d_jaj);
solid_jxs_jisuan<<<grid1,threads1>>>(T-1-i,d_jaj);
virtual_jxs_jisuan<<<grid2,threads2>>>(i,d_jaj);
virtual_jxs_jisuan<<<grid2,threads2>>>(T-1-i,d_jaj);
//////////////////////////计算角系数,在Gpu上执行///////////
}
////////////////接下来将数据考回主机端//////////////////////
checkCudaErrors(cudaMemcpy(jx11[0],nd_jx11 , sizeof(float)*T*T,
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaMemcpy(jx12[0],nd_jx12 , sizeof(float)*T*G,
cudaMemcpyDeviceToHost));
for(int i=0;i<Mb01;i++)
{
Sum=0;
jxs_divide_num=*jaj;
//jxs_divide_num[Mb01-1-i]=*jaj;
for(int j=0;j<Mb01;j++)
{
Sum+=jx11[j];
}
for(int j=0;j<Mb02;j++)
{
Sum+=jx12[j];
}
jxs_sum=Sum;
for(int j=0;j<Mb01;j++)
{
if(jxs_sum>0)
jx11[j]=jx11[j]/jxs_sum;
else
jx11=0;
}
for(int j=0;j<Mb02;j++)
{
if(jxs_sum>0)
jx12[j]=jx12[j]/jxs_sum;
else
jx12=0;
}
for(int j=0;j<Mb01;j++)
{
fprintf(fp1,"%16.9E ",jx11[j]);
}
fprintf(fp1,"\n");
for(int j=0;j<Mb02;j++)
{
fprintf(fp2,"%16.9E ",jx12[j]);
}
fprintf(fp2,"\n");
}
对上个算例,在小数据时没有错误,但是为什么数据一大就出错了呢?会因为大数据计算前面的循环结束了而设备没有完成计算而cpu继续计算而出错吗?
|