Question

Хорошо, на данный момент я могу создать массив на хост-компьютере (типа float) и скопировать его в gpu, а затем вернуть его на хост как другой массив (чтобы проверить, была ли копия успешной, сравнив к оригиналу).

Затем я создаю массив CUDA из массива на графическом процессоре. Затем я связываю этот массив с текстурой CUDA.

Теперь я хочу прочитать эту текстуру обратно и сравнить с исходным массивом (снова, чтобы проверить, правильно ли он скопирован). Я видел пример кода, который использует функцию readTexel(), показанную ниже. Кажется, это не работает для меня ... (в основном все работает, за исключением раздела в функции bindToTexture(float* deviceArray), начинающегося со строки readTexels(SIZE, testArrayDevice)).

Есть предложения по другому способу сделать это? Или есть какие-то очевидные проблемы, которые я пропустил в своем коде?

Спасибо за помощь, ребята!

#include <stdio.h>
#include <assert.h>
#include <cuda.h>

#define SIZE 20;

//Create a channel description to use.
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

//Create a texture to use.
texture<float, 2, cudaReadModeElementType> cudaTexture;
//cudaTexture.filterMode = cudaFilterModeLinear;
//cudaTexture.normalized = false;

__global__ void readTexels(int amount, float *Array)
{
  int index = blockIdx.x * blockDim.x + threadIdx.x;

  if (index < amount)
  {
    float x = tex1D(cudaTexture, float(index));
    Array[index] = x;
  }
}

float* copyToGPU(float* hostArray, int size)
{
  //Create pointers, one for the array to be on the device, and one for bringing it back to the host for testing.
  float* deviceArray;
  float* testArray;

  //Allocate some memory for the two arrays so they don't get overwritten.
  testArray = (float *)malloc(sizeof(float)*size);

  //Allocate some memory for the array to be put onto the GPU device.
  cudaMalloc((void **)&deviceArray, sizeof(float)*size);

  //Actually copy the array from hostArray to deviceArray.
  cudaMemcpy(deviceArray, hostArray, sizeof(float)*size, cudaMemcpyHostToDevice);

  //Copy the deviceArray back to testArray in host memory for testing.
  cudaMemcpy(testArray, deviceArray, sizeof(float)*size, cudaMemcpyDeviceToHost);

  //Make sure contents of testArray match the original contents in hostArray.
  for (int i = 0; i < size; i++)
  {
    if (hostArray[i] != testArray[i])
    {
      printf("Location [%d] does not match in hostArray and testArray.\n", i);
    }
  }

  //Don't forget free these arrays after you're done!
  free(testArray);

  return deviceArray; //TODO: FREE THE DEVICE ARRAY VIA cudaFree(deviceArray);
}

cudaArray* bindToTexture(float* deviceArray)
{
  //Create a CUDA array to translate deviceArray into.
  cudaArray* cuArray;

  //Allocate memory for the CUDA array.
  cudaMallocArray(&cuArray, &cudaTexture.channelDesc, SIZE, 1);

  //Copy the deviceArray into the CUDA array.
  cudaMemcpyToArray(cuArray, 0, 0, deviceArray, sizeof(float)*SIZE, cudaMemcpyHostToDevice);

  //Release the deviceArray
  cudaFree(deviceArray);

  //Bind the CUDA array to the texture.
  cudaBindTextureToArray(cudaTexture, cuArray);

  //Make a test array on the device and on the host to verify that the texture has been saved correctly.
  float* testArrayDevice;
  float* testArrayHost;

  //Allocate memory for the two test arrays.
  cudaMalloc((void **)&testArray, sizeof(float)*SIZE);
  testArrayHost = (float *)malloc(sizeof(float)*SIZE);

  //Read the texels of the texture to the test array in the device.
  readTexels(SIZE, testArrayDevice);

  //Copy the device test array to the host test array.
  cudaMemcpy(testArrayHost, testArrayDevice, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);

  //Print contents of the array out.
  for (int i = 0; i < SIZE; i++)
  {
    printf("%f\n", testArrayHost[i]);
  }

  //Free the memory for the test arrays.
  free(testArrayHost);
  cudaFree(testArrayDevice);

  return cuArray; //TODO: UNBIND THE CUDA TEXTURE VIA cudaUnbindTexture(cudaTexture);
  //TODO: FREE THE CUDA ARRAY VIA cudaFree(cuArray);
}


int main(void)
{
  float* hostArray;

  hostArray = (float *)malloc(sizeof(float)*SIZE);

  for (int i = 0; i < SIZE; i++)
  {
    hostArray[i] = 10.f + i;
  }

  float* deviceAddy = copyToGPU(hostArray, SIZE);

  free(hostArray);

  return 0;
}

cibercitizen1 · Answer 1 · 06 марта 2010

Коротко:

------------- в вашем main.cu ----------------------------- -------------------------------------------------- --------

-1. Определить текстуру как глобальную переменную


       texture refTexture; // global variable !
       // meaning: address the texture with (x,y) (2D) and get an unsinged int

В основной функции:

-2. Используйте массивы в сочетании с текстурой <pre> cudaArray* myArray; // declar. // ask for memory cudaMallocArray ( &myArray, &refTex.channelDesc, /* with this you don't need to fill a channel descriptor */ width, height);

-3. копировать данные из процессора в графический процессор (в массив) <pre> cudaMemcpyToArray ( arrayCudaEntrada, // destination: the array 0, 0, // offsets sourceData, // pointer uint* widthheightsizeof(uint), // total amount of bytes to be copied cudaMemcpyHostToDevice);

-4. связать текстуру и массив <pre> cudaBindTextureToArray( refTex,arrayCudaEntrada)

-5. изменить некоторые параметры в текстуре <pre> refTextura_In.normalized = false; // don't automatically convert fetched data to [0,1[ refTextura_In.addressMode[0] = cudaAddressModeClamp; // if my indexing is out of bounds: automatically use a valid indexing (0 if negative index, last if too great index) refTextura_In.addressMode[1] = cudaAddressModeClamp; 

---------- в ядре ---------------------------------- ----------------------

<pre> // find out indexes (f,c) to process by this thread uint f = (blockIdx.x * blockDim.x) + threadIdx.x; uint c = (blockIdx.y * blockDim.y) + threadIdx.y; <pre><code> // this is curious and necessary: indexes for reading from a texture // are floats !. Even if you are certain to access (4,5) you have // match the "center" this is (4.5, 5.5) uint read = tex2D( refTex, c+0.5f, f+0.5f); // texRef is a global variable

Теперь вы обрабатываете чтение и запись результатов в другую глобальную зону устройства. память, а не сама текстура!

Tom · Answer 2 · 01 марта 2010

readTexels() - это функция ядра (__global__), то есть она работает на графическом процессоре. Поэтому вам нужно использовать правильный синтаксис для запуска ядра.

Просмотрите руководство по программированию CUDA и некоторые примеры SDK, которые доступны на сайте NVIDIA CUDA , чтобы узнать, как запустить ядро.

Подсказка: в итоге получится что-то вроде readTexels<<<grid,block>>>(...)

Как прочитать текстуру CUDA для тестирования?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Как прочитать текстуру CUDA для тестирования?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы