我计算传热学里辐射问题的角系数
代码如下:
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include<iostream>
#define h 5
#define r 5
#define R 10
#define Pi 3.1415
// includes CUDA
#include <cuda_runtime.h>
// includes, project
#include <helper_cuda.h>
#include <helper_functions.h> // helper functions for SDK examples
using std::cout;
using std::endl;
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest(int argc, char **argv);
extern "C"
void computeGold(float *reference, float *idata, const unsigned int len);
////////////////////////////////////////////////////////////////////////////////
//! Simple test kernel for device functionality
//! @param g_idata input data in global memory
//! @param g_odata output data in global memory
////////////////////////////////////////////////////////////////////////////////
__device__ float Find1(int i,int j,int k ,int n)
{
float x1=R/blockDim.x*i*cos(2*Pi/blockDim.x*j);
float y1=R/blockDim.x*i*sin(2*Pi/blockDim.x*j);
float x2=r/blockDim.x*k*cos(2*Pi/blockDim.x*n);
float y2=r/blockDim.x*k*sin(2*Pi/blockDim.x*n);
float S=(x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+h*h;
return(h*h/(Pi*S*S));
}
__global__ void
testKernel(float *g_A)
{
// shared memory
// the size is determined by the host application
extern __shared__ float sdata1[];
float s1=0,s2=0,s3=0;
// access thread id
const unsigned int tid = threadIdx.x;
// access number of threads in this block
const float num_threads = blockDim.x;
for(int i=1;i<=num_threads;i++)//R圆半径对应的数 i
{
for(int j=1;j<=num_threads;j++)//R圆弧度对应的数 j
{
for(int k=1;k<=num_threads;k++)//r圆半径对应的数 k
{
sdata1[tid]=Find1(i,j,k,tid+1)*(2*Pi/num_threads);
__syncthreads();
for(int n=num_threads;n>=1;n/=2)
{
sdata1[tid]+=sdata1[tid+n];
}
if(tid==0)
{
s1+=sdata1[0]*(r*k/num_threads)*(r/num_threads);
}
}
if(tid==0)
{
s2+=s1*(2*Pi/num_threads);
s1=0;
}
}
if(tid==0)
{
s3+=s2*(R/num_threads*i)*(R/num_threads);
s2=0;
}
}
// read in input data from global memory
__syncthreads();
// perform some computations
if(tid==0)
*g_A=s3/(Pi*R*R);
// write data to global memory
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
runTest(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest(int argc, char **argv)
{
bool bTestResult = true;
printf("%s Starting...\n\n", argv[0]);
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
StopWatchInterface *timer = 0;
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
int num_threads = 512;
int mem_size = sizeof(float) * num_threads;
// allocate host memory
float *h_odata = (float *) malloc(sizeof(float));
// initalize the memory
// allocate device memory
float *d_odata;
checkCudaErrors(cudaMalloc((void **) &d_odata, sizeof(float)));
// copy host memory to device
// allocate device memory for result
// setup execution parameters
dim3 grid(1, 1, 1);
dim3 threads(num_threads, 1, 1);
// execute the kernel
testKernel<<< grid, threads, mem_size >>>(d_odata);
// check if kernel execution generated and error
getLastCudaError("Kernel execution failed");
// allocate mem for the result on host side
// copy result from device to host
checkCudaErrors(cudaMemcpy(h_odata, d_odata, sizeof(float) ,
cudaMemcpyDeviceToHost));
sdkStopTimer(&timer);
printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer);
std::cout<<h_odata[0];
// compute reference solution
//float *reference = (float *) malloc(mem_size);
//computeGold(reference, h_idata, num_threads);
// check result
/* if (checkCmdLineFlag(argc, (const char **) argv, "regression"))
{
// write file for regression test
sdkWriteFile("./data/regression.dat", h_odata, num_threads, 0.0f, false);
}
else
{
// custom output handling when no regression test running
// in this case check if the result is equivalent to the expected soluion
bTestResult = compareData(reference, h_odata, num_threads, 0.0f, 0.0f);
}*/
// cleanup memory
free(h_odata);
checkCudaErrors(cudaFree(d_odata));
cudaDeviceReset();
exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
出现了这个编译问题,我真没发现什么错误:
初次接触CUDA,经验不足,还请大神指正
|