用户
 找回密码
 立即注册
发表于 2021-6-17 22:33:25
56211
#include <stdio.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusparse.h>
#include<iostream>
using namespace std;


int main()
{
        //初始化 cuSparse library
        cusparseHandle_t handle = NULL;
        cusparseMatDescr_t descrA, descrB, descrC = NULL;
        cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
        cudaError_t cudaStat1 = cudaSuccess;
        cudaError_t cudaStat2 = cudaSuccess;
        cudaError_t cudaStat3 = cudaSuccess;
        cudaError_t cudaStat4 = cudaSuccess;
        status = cusparseCreate(&handle);
        assert(CUSPARSE_STATUS_SUCCESS == status);

        //初始化矩阵描述符
        status = cusparseCreateMatDescr(&descrA);
        assert(CUSPARSE_STATUS_SUCCESS == status);
        status = cusparseCreateMatDescr(&descrB);
        assert(CUSPARSE_STATUS_SUCCESS == status);
        status = cusparseCreateMatDescr(&descrC);
        assert(CUSPARSE_STATUS_SUCCESS == status);
       
        const int n = 3;                    // --- 行数与列数
        const int nnzA = 4;                 // --- matrix A 的非零个数
        const int nnzB = 4;                 // --- matrix B 的非零个数
         //matrix A 的CSR格式
        const int csrRowPtrA[n + 1] = { 1, 2, 4, 5 };
        const int csrColIndA[nnzA] = { 1, 2, 3, 3 };
        const double csrValA[nnzA] = { 1.0, 2.0, 4.0, 5.0 };

        //matrix B 的CSR格式
        const int csrRowPtrB[n + 1] = { 1, 1, 1, 1 };
        const int csrColIndB[nnzB] = { 1, 2, 2, 3 };
        const double csrValB[nnzB] = { 1.0, 3.0, 4.0, 5.0 };

        //在Cuda上分配matrix A与matrix B的内存及数据转移
        int* d_csrRowPtrA;
        cudaStat1 = cudaMalloc(&d_csrRowPtrA, sizeof(int) * (n + 1));
        assert(cudaSuccess == cudaStat1);
        cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n + 1), cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat1);
        int* d_csrColIndA;
        cudaStat2 = cudaMalloc(&d_csrColIndA, sizeof(int) * nnzA);
        assert(cudaSuccess == cudaStat2);
        cudaStat2 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA, cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat2);
        double* d_csrValA;
        cudaStat3 = cudaMalloc(&d_csrValA, sizeof(double) * nnzA);
        assert(cudaSuccess == cudaStat3);
        cudaStat3 = cudaMemcpy(d_csrValA, csrValA, sizeof(double) * nnzA, cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat3);

        int* d_csrRowPtrB;
        cudaStat1 = cudaMalloc(&d_csrRowPtrB, sizeof(int) * (n + 1));
        assert(cudaSuccess == cudaStat1);
        cudaStat1 = cudaMemcpy(d_csrRowPtrB, csrRowPtrB, sizeof(int) * (n + 1), cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat1);
        int* d_csrColIndB;
        cudaStat2 = cudaMalloc(&d_csrColIndB, sizeof(int) * nnzB);
        assert(cudaSuccess == cudaStat2);
        cudaStat2 = cudaMemcpy(d_csrColIndB, csrColIndB, sizeof(int) * nnzB, cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat2);
        double* d_csrValB;
        cudaStat3 = cudaMalloc(&d_csrValB, sizeof(double) * nnzB);
        assert(cudaSuccess == cudaStat3);
        cudaStat3 = cudaMemcpy(d_csrValB, csrValB, sizeof(double) * nnzB, cudaMemcpyHostToDevice);
        assert(cudaSuccess == cudaStat3);

        //matrix A与matrix B 求和
        int baseC, nnzC;
        double alpha = 1.f, beta = 1.f;
        size_t BufferSizeInBytes;
        char* buffer = NULL;
        int* nnzTotalDevHostPtr = &nnzC;
        cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST);
        int* d_csrRowPtrC = NULL, * d_csrColIndC = NULL; double* d_csrValC = NULL;
        cudaStat1 = cudaMalloc(&d_csrRowPtrC, sizeof(int) * (n + 1));
        assert(cudaSuccess == cudaStat1);
       
        cusparseDcsrgeam2_bufferSizeExt(handle, n, n,
                &alpha,
                descrA, nnzA, d_csrValA, d_csrRowPtrA, d_csrColIndA,
                &beta,
                descrB, nnzB, d_csrValB, d_csrRowPtrB, d_csrColIndB,
                descrC, d_csrValC, d_csrRowPtrC, d_csrColIndC,
                &BufferSizeInBytes);
        cudaMalloc((void**)&buffer, sizeof(char) * BufferSizeInBytes);
        status = cusparseXcsrgeam2Nnz(handle, n, n, descrA, nnzA, d_csrRowPtrA, d_csrColIndA, descrB, nnzB, d_csrRowPtrB, d_csrColIndB, descrC, d_csrRowPtrC,
                nnzTotalDevHostPtr, buffer);
        if (NULL != nnzTotalDevHostPtr)
        {
                nnzC = *nnzTotalDevHostPtr;
        }
        else
        {
                cudaMemcpy(&nnzC, d_csrRowPtrC + n, sizeof(int), cudaMemcpyDeviceToHost);
                cudaMemcpy(&baseC, d_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost);
                nnzC -= baseC;
        }
        cudaMalloc(&d_csrColIndC, sizeof(int) * nnzC);
        cudaMalloc(&d_csrValC, sizeof(double) * nnzC);
        status = cusparseDcsrgeam2(
                handle, n, n,
                &alpha,
                descrA, nnzA, d_csrValA, d_csrRowPtrA, d_csrColIndA,
                &beta,
                descrB, nnzB, d_csrValB, d_csrRowPtrB, d_csrColIndB,
                descrC, d_csrValC, d_csrRowPtrC, d_csrColIndC,
                buffer);

        //将矩阵C的所有资料往CPU上回传
        int* csrRowPtrC = NULL;
        int* csrColIndC = NULL;
        float* csrValC = NULL;
        csrRowPtrC = (int*)malloc(sizeof(int) * (n + 1));
        csrColIndC = (int*)malloc(sizeof(int) * nnzC);
        csrValC = (float*)malloc(sizeof(float) * nnzC);
        assert(NULL != csrRowPtrC);
        assert(NULL != csrColIndC);
        assert(NULL != csrValC);
        cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int) * (n + 1),cudaMemcpyDeviceToHost);
        cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int) * nnzC,cudaMemcpyDeviceToHost);
        cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float) * nnzC,cudaMemcpyDeviceToHost);
        assert(cudaSuccess == cudaStat1);
        assert(cudaSuccess == cudaStat2);
        assert(cudaSuccess == cudaStat3);
       
        cout << *csrValC << ",";
        cout << endl;
        cout << *csrColIndC << ",";
        cout << endl;
        cout << *csrRowPtrC << ",";
        cout << endl;

        return 0;

}


使用道具 举报 回复
发表于 2021-6-17 22:40:36
请问下这个输出结果很怪,是哪里错了?

本帖子中包含更多资源

您需要 登录 才可以下载或查看,没有帐号?立即注册

x
使用道具 举报 回复 支持 反对
发新帖
您需要登录后才可以回帖 登录 | 立即注册