用户
 找回密码
 立即注册
发表于 2021-6-25 11:26:42
65190
代码如下
  1. __global__ void BBBBBBBBBBBBBBBBBB(cufftComplex* Rx, cufftComplex* steerVector, float* sound_field ,float* sound_central)

  2. {

  3. unsigned int i = threadIdx.x + blockDim.x * blockIdx.x;

  4. unsigned int j = threadIdx.y + blockDim.y * blockIdx.y;

  5. int width = blockDim.x * gridDim.x;

  6. int index = j * (width) + i;



  7. float distance = 0.0f;

  8. cufftComplex* steer_vector = steerVector + index * ARRAY_CHANNEL_NUM;

  9. // cufftComplex steer_vector_conj[ARRAY_CHANNEL_NUM];

  10. cufftComplex temp_vector[ARRAY_CHANNEL_NUM];

  11. cufftComplex Rx_vector[ARRAY_CHANNEL_NUM];





  12. cufftComplex soundcomplex{.0f, .0f};





  13. for (int k1 = 0; k1 < ARRAY_CHANNEL_NUM; k1++)

  14. {

  15. Rx_vector[k1].x = 0.0f;

  16. Rx_vector[k1].y = 0.0f;



  17. for (int k2 = 0; k2 < ARRAY_CHANNEL_NUM; k2++)

  18. {

  19. Rx_vector[k2].x = Rx[ARRAY_CHANNEL_NUM * k2 + k1].x;

  20. Rx_vector[k2].y = Rx[ARRAY_CHANNEL_NUM * k2 + k1].y;

  21. }

  22. temp_vector[k1].x = 0.0f;

  23. temp_vector[k1].y = 0.0f;



  24. for (int k3 = 0; k3 < ARRAY_CHANNEL_NUM; k3++)

  25. {

  26. // temp_vector[k1].x += steer_vector_conj[k3].x * Rx_vector[k3].x - steer_vector_conj[k3].y * Rx_vector[k3].y;

  27. // temp_vector[k1].y += steer_vector_conj[k3].x * Rx_vector[k3].y + steer_vector_conj[k3].y * Rx_vector[k3].x;

  28. temp_vector[k1].x += steer_vector[k3].x * Rx_vector[k3].x - (-steer_vector[k3].y) * Rx_vector[k3].y;

  29. temp_vector[k1].y += steer_vector[k3].x * (-Rx_vector[k3].y) + (-steer_vector[k3].y) * Rx_vector[k3].x;

  30. }



  31. soundcomplex.x += temp_vector[k1].x * steer_vector[k1].x - temp_vector[k1].y * steer_vector[k1].y;

  32. soundcomplex.y += temp_vector[k1].x * steer_vector[k1].y + temp_vector[k1].y * steer_vector[k1].x;



  33. }



  34. sound_field[j * 640 + i] = 0.0f;





  35. int k = 0;

  36. // 不注释需要94个寄存器, 注释掉只要11个寄存器

  37. // sound_field[j * 640 + i] = temp_vector[k].x * steer_vector[k].y + temp_vector[k].y * steer_vector[k].x;



  38. }
复制代码


操作系统Windows10
cuda10.2
编译参数
  1. nvcc.exe -D___CUDACC__ --use_fast_math -lcuda -lcudadevrt -lcudart -lcufft -lcublas --machine 64 -arch=compute_61 -code=sm_61 --ptxas-options=-v --compile -cudart static -D_MBCS -Xcompiler /wd4819,/EHsc,/W3,/nologo,/O2,/Zi -Xcompiler /MD -c -o  ...
复制代码





使用道具 举报 回复
发新帖
您需要登录后才可以回帖 登录 | 立即注册