параллель для Pragma omp не ускоряет мою программу - PullRequest
0 голосов
/ 06 февраля 2020

Я попытался добавить #pragma parallel omp для этой программы трассировки лучей и измеряю одинаковое / очень похожее количество времени обработки с и без выражения pragma.

Это функция:

 void Scene::SaveImage(
        const char *outPngFileName, 
        int pixelsWide, 
        int pixelsHigh, 
        double zoom, 
        int antiAliasFactor) const
        // Oversample the image using the anti-aliasing factor.

        const int largePixelsWide = antiAliasFactor * pixelsWide;
        const int largePixelsHigh = antiAliasFactor * pixelsHigh;
        const int smallerDim = 
            ((pixelsWide < pixelsHigh) ? pixelsWide : pixelsHigh);

        const double largeZoom  = antiAliasFactor * zoom * smallerDim;
        ImageBuffer buffer(largePixelsWide, largePixelsHigh, backgroundColor);

        // The camera is located at the origin.
        Vector camera(0.0, 0.0, 0.0);

        // The camera faces in the -z direction.
        // This allows the +x direction to be to the right,
        // and the +y direction to be upward.
        Vector direction(0.0, 0.0, -1.0);

        const Color fullIntensity(1.0, 1.0, 1.0);

        // We keep a list of (i,j) screen coordinates for pixels
        // we are not able to trace definitive rays for.
        // Later we will come back and fix these pixels.
        PixelList ambiguousPixelList;

        #pragma omp parallel for 
        for (int i=0; i < largePixelsWide; ++i)
            direction.x = (i - largePixelsWide/2.0) / largeZoom;
            for (int j = 0; j < largePixelsHigh; ++j)
                direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;

                    using namespace std;

                    // Assume no active debug point unless we find one below.
                    activeDebugPoint = NULL;    

                    DebugPointList::const_iterator iter = debugPointList.begin();
                    DebugPointList::const_iterator end  = debugPointList.end();
                    for(; iter != end; ++iter)
                        if ((iter->iPixel == i) && (iter->jPixel == j))
                            cout << endl;
                            cout << "Hit breakpoint at (";
                            cout << i << ", " << j <<")" << endl;
                            activeDebugPoint = &(*iter);

                PixelData& pixel = buffer.Pixel(i,j);
                    // Trace a ray from the camera toward the given direction
                    // to figure out what color to assign to this pixel.
                    pixel.color = TraceRay(
                catch (AmbiguousIntersectionException)
                    // Getting here means that somewhere in the recursive 
                    // code for tracing rays, there were multiple 
                    // intersections that had minimum distance from a 
                    // vantage point.  This can be really bad, 
                    // for example causing a ray of light to reflect 
                    // inward into a solid.

                    // Mark the pixel as ambiguous, so that any other
                    // ambiguous pixels nearby know not to use it.
                    pixel.isAmbiguous = true;

                    // Keep a list of all ambiguous pixel coordinates
                    // so that we can rapidly enumerate through them
                    // in the disambiguation pass.
                    ambiguousPixelList.push_back(PixelCoordinates(i, j));

        // Leave no chance of a dangling pointer into debug points.
        activeDebugPoint = NULL;

        // Go back and "heal" ambiguous pixels as best we can.
        PixelList::const_iterator iter = ambiguousPixelList.begin();
        PixelList::const_iterator end  = ambiguousPixelList.end();
        for (; iter != end; ++iter)
            const PixelCoordinates& p = *iter;
            ResolveAmbiguousPixel(buffer, p.i, p.j);

        // We want to scale the arbitrary range of
        // color component values to the range 0..255
        // allowed by PNG format.  We therefore find
        // the maximum red, green, or blue value anywhere
        // in the image.
        const double max = buffer.MaxColorValue();

        // Downsample the image buffer to an integer array of RGBA 
        // values that LodePNG understands.
        const unsigned char OPAQUE_ALPHA_VALUE = 255;
        const unsigned BYTES_PER_PIXEL = 4;

        // The number of bytes in buffer to be passed to LodePNG.
        const unsigned RGBA_BUFFER_SIZE = 
            pixelsWide * pixelsHigh * BYTES_PER_PIXEL;

        std::vector<unsigned char> rgbaBuffer(RGBA_BUFFER_SIZE);
        unsigned rgbaIndex = 0;
        const double patchSize = antiAliasFactor * antiAliasFactor;
        //#pragma parallel for collapse(3)
        for (int j=0; j < pixelsHigh; ++j)
            for (int i=0; i < pixelsWide; ++i)
                Color sum(0.0, 0.0, 0.0);
                for (int di=0; di < antiAliasFactor; ++di)
                    //#pragma parallel omp for reduction(+:sum)
                    for (int dj=0; dj < antiAliasFactor; ++dj)
                        sum += buffer.Pixel(
                            antiAliasFactor*i + di, 
                            antiAliasFactor*j + dj).color;
                sum /= patchSize;

                // Convert to integer red, green, blue, alpha values,
                // all of which must be in the range 0..255.
                rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.red,   max);
                rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.green, max);
                rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.blue,  max);
                rgbaBuffer[rgbaIndex++] = OPAQUE_ALPHA_VALUE;

        // Write the PNG file
        const unsigned error = lodepng::encode(

        // If there was an encoding error, throw an exception.
        if (error != 0)
            std::string message = "PNG encoder error: ";
            message += lodepng_error_text(error);
            throw ImagerException(message.c_str());


    // The following function searches through all solid objects
    // for the first solid (if any) that contains the given point.
    // In the case of ties, the solid that was inserted into the
    // scene first wins.  This arbitrary convention allows the
    // composer of a scene to decide which of multiple overlapping
    // objects should control the index of refraction for any
    // overlapping volumes of space.
    const SolidObject* Scene::PrimaryContainer(const Vector& point) const
        SolidObjectList::const_iterator iter = solidObjectList.begin();
        SolidObjectList::const_iterator end  = solidObjectList.end();
        for (; iter != end; ++iter)
            const SolidObject* solid = *iter;
            if (solid->Contains(point))
                return solid;

        return NULL;

Это фрагмент, к которому я добавил его:

        #pragma omp parallel for 
        for (int i=0; i < largePixelsWide; ++i)
            direction.x = (i - largePixelsWide/2.0) / largeZoom;
            for (int j = 0; j < largePixelsHigh; ++j)
                direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;

Я убедился, что все мои настройки VS17 верны, и -fopenmp main. cpp в аргументах команды. Дополнительная информация: это в другом файле с именем scene. cpp, заголовочный файл omp добавлен в scene. cpp.

Мои вопросы: как мне заставить это работать, и если есть другие части, где Я могу добавить это, где / как я бы go об этом. Спасибо.
