ожидаемая ошибка выражения на CUDA в IDE QtCreaotr? - PullRequest
0 голосов
/ 13 октября 2019

Я пытаюсь изучить CUDA, и ниже приведены мои файлы кода. Есть ожидаемая ошибка выражения, которую я не могу решить. Что здесь не так? На простом примере Hello World я получаю ту же ошибку. Ошибка в строке ниже. Даже когда я копирую код из github, я получаю ту же ошибку при вызове функций угловой скобки.

cudamult << <blocksPerGrid, threadsPerBlock >> > (n, p, m, A, B, C);

main.cpp

#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include <cuda_runtime.h>
#include<mycuda.cu>

using namespace std;

//extern void use_saxpy_cuda(int n, float a, float * x, float * y);
extern void use_cudamult(int n, int p, int m, float* A, float* B, float* C);

void matmult(int n, int p, int m, float* A, float* B, float* C)
{
    use_cudamult(n, p, m, A, B, C);
}


int main()
{

    int n = 3;
    int p = 4;
    int m = 5;

    float * A;
    float * B;
    float * C;
    // Allocate memory
    cudaMallocManaged(&A, n*p*sizeof(float));
    cudaMallocManaged(&B, p*m * sizeof(float));
    cudaMallocManaged(&C, n*m * sizeof(float));


    for (int i = 0; i < n*p; i++) {
        A[i] = 1.0f;
    }

    for (int i = 0; i < p*m; i++) {
        B[i] = 1.0f;
    }

    cout << "A: ";
    for (int i = 0; i < n*p; i++) {
        if (i%p == 0)
            cout << endl;
        cout << A[i] << " ";
    }
    cout << endl;

    cout << "B: ";
    for (int i = 0; i < p*m; i++) {
        if (i%m == 0)
            cout << endl;
        cout << B[i] << " ";
    }
    cout << endl;

    matmult(n, p , m, A, B, C);
    cudaDeviceSynchronize();

    cout << "C: ";
    for (int i = 0; i < n*m; i++) {
        if (i%m == 0 )
            cout << endl;
        cout << C[i] << " ";
    }


    cudaFree(A);
    cudaFree(B);
    cudaFree(C);

}

mycuda.cu



#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include<cuda.h>

#include<cuda_runtime.h>
#include <cuda_runtime_api.h>

#include "device_launch_parameters.h"
#ifndef __CUDACC__
#define __CUDACC__
#endif

#include <device_functions.h>

#include <iostream>
#include <chrono>
#include <cuda_runtime.h>

#include <stdio.h>
#include <iostream>
#include <algorithm>

using namespace std;


__global__ void cudamult(int n, int p, int m, float* A, float* B, float* C)
{
    //int index = threadIdx.x;

    int ROW = blockIdx.y*blockDim.y + threadIdx.y;
    int COL = blockIdx.x*blockDim.x + threadIdx.x;

    float tmpSum = 0;

    if (ROW > n || COL > m)
        return;
    // each thread computes one element of the block sub-matrix
    for (int i = 0; i < p; ++i) {
        tmpSum += A[ROW * p + i] * B[i * m + COL];
    }

    printf("ROW:%d COL:%d sum: %d \n", ROW, COL, tmpSum); // DEBUG

    C[ROW * m + COL] = tmpSum;

}

extern "C"
void use_cudamult(int n, int p, int m, float* A, float* B, float* C)
{
    //int threadsPerBlock = 512;
    //int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
    int numThreads = max(max(n, p), m);
    dim3 threadsPerBlock(numThreads, numThreads);
    dim3 blocksPerGrid(1, 1);
    //threadsPerBlock.x = 512;
    //threadsPerBlock.y = 512;
    //blocksPerGrid.x = ceil(double(N) / double(threadsPerBlock.x));
    //blocksPerGrid.y = ceil(double(N) / double(threadsPerBlock.y));


    cudamult << <blocksPerGrid, threadsPerBlock >> > (n, p, m, A, B, C);
    cudaDeviceSynchronize();
}

cuda_t.pro

QT -= gui

CONFIG += c++14 console
CONFIG -= app_bundle

# The following define makes your compiler emit warnings if you use
# any feature of Qt which as been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS

# You can also make your code fail to compile if you use deprecated APIs.
# In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
SOURCES += main.cpp


# This makes the .cu files appear in your project
OTHER_FILES +=  ./mycuda.cu


CUDA_DIR = /usr/local/cuda-10.1

CUDA_ARCH = sm_32 # as supported by the Tegra K1


INCLUDEPATH += $$CUDA_DIR/include

#LIBS += -L $$CUDA_DIR/lib64 -lcudart -lcuda

#osx: LIBS += -F/Library/Frameworks -framework CUDA


cuda.commands = $$CUDA_DIR/bin/nvcc -c -arch=$$CUDA_ARCH -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}

cuda.dependency_type = TYPE_C

cuda.depend_command = $$CUDA_DIR/bin/nvcc -M ${QMAKE_FILE_NAME}

cuda.input = CUDA_SOURCES

cuda.output = ${QMAKE_FILE_BASE}_cuda.o

QMAKE_EXTRA_COMPILERS += cuda

DISTFILES +=

HEADERS += \
    timer.h \
    matrixmul.h \
    hip_matrix_mul.h \
    cu_matrix_mul.h

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...