Question

Я пытаюсь выучить металл для научного программирования. Я попытался создать простое ядро с морфологической дилатацией. Проблема, с которой я сталкиваюсь, заключается в том, что объем памяти увеличивается на пару килобайт каждый раз, когда я вызываю dilate для изображения.

Я проверил утечку памяти, запустив метод dilate в цикле for для 10000 итераций, и наблюдал, как выделенная память в отладочном навигаторе Xcode увеличилась с 16 МБ до 17 МБ.

Что-нибудь, что вы видите в моем коде, могло бы способствовать утечке памяти? Я также подтолкнул проект к Github на случай, если это поможет.

class MorphologyIOS : public Morphology
{
public:
    MorphologyIOS(
        const uint kernel,
        const uint width,
        const uint height
    ) {
        device_ = MTLCreateSystemDefaultDevice();
        kernelSize_ = kernel;
        buffer_ = [device_ newBufferWithBytes:&kernelSize_ length:4 options:MTLStorageModeShared];
        library_ = [device_ newDefaultLibrary];
        commandQueue_ = [device_ newCommandQueue];
        identityFunction_ = [library_ newFunctionWithName:@"identity"];

        MTLTextureDescriptor* readDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
            width:width height:height mipmapped:false];

        MTLTextureDescriptor* writeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
            width:width height:height mipmapped:false];

        [writeDesc setUsage:MTLTextureUsageShaderWrite];

        inTexture_ = [device_ newTextureWithDescriptor:readDesc];
        outTexture_ = [device_ newTextureWithDescriptor:writeDesc];

        entireImage_ = MTLRegionMake2D(0, 0, width, height);

        pipelineState_ = [device_ newComputePipelineStateWithFunction:identityFunction_ error:NULL];

    }

    virtual ~MorphologyIOS() override {}

    virtual std::shared_ptr<unsigned char> dilate(
        const std::shared_ptr<unsigned char>& inImage
    ) override {
        void* result = malloc(outTexture_.width * outTexture_.height);
        std::shared_ptr<unsigned char> outImage;
        @autoreleasepool
        {
            commandBuffer_ = [commandQueue_ commandBuffer];
            commandEncoder_ = [commandBuffer_ computeCommandEncoder];
            [commandEncoder_ setComputePipelineState:pipelineState_];

            [inTexture_ replaceRegion:entireImage_ mipmapLevel:0 withBytes:inImage.get() bytesPerRow:outTexture_.width];

            [commandEncoder_ setTexture:inTexture_ atIndex:0];
            [commandEncoder_ setTexture:outTexture_ atIndex:1];
            [commandEncoder_ setBuffer:buffer_ offset:0 atIndex:0];

            MTLSize threadGroupCount = MTLSizeMake(10, 10, 1);
            MTLSize threadGroups = MTLSizeMake(inTexture_.width / threadGroupCount.width,
                inTexture_.height / threadGroupCount.height, 1);

            [commandEncoder_ dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCount];
            [commandEncoder_ endEncoding];
            [commandBuffer_ commit];
            [commandBuffer_ waitUntilCompleted];

            [outTexture_ getBytes:result bytesPerRow:outTexture_.width fromRegion:entireImage_ mipmapLevel:0];
            outImage.reset(reinterpret_cast<unsigned char*>(result));
        }

        return outImage;
    }
private:
    id<MTLDevice> device_;
    uint kernelSize_;
    id<MTLBuffer> buffer_;
    id<MTLLibrary> library_;
    id<MTLComputePipelineState> pipelineState_;
    id<MTLCommandQueue> commandQueue_;
    id<MTLFunction> identityFunction_;
    id<MTLCommandBuffer> commandBuffer_;
    id<MTLComputeCommandEncoder> commandEncoder_;
    id<MTLTexture> inTexture_;
    id<MTLTexture> outTexture_;
    MTLRegion entireImage_;
};

И мое ядро выглядит так:

kernel void dilation(
    texture2d<uint, access::read> inTexture [[texture(0)]],
    texture2d<uint, access::write> outTexture [[texture(1)]],
    device uint *kernelSize [[buffer(0)]],
    uint2 gid [[thread_position_in_grid]]
) {
    uint halfKernel = kernelSize[0] / 2;
    uint minX = gid.x >= halfKernel ? gid.x - halfKernel : 0;
    uint minY = gid.y >= halfKernel ? gid.y - halfKernel : 0;
    uint maxX = gid.x + halfKernel < inTexture.get_width() ? gid.x + halfKernel : inTexture.get_width();
    uint maxY = gid.y + halfKernel < inTexture.get_height() ? gid.y + halfKernel : inTexture.get_height();
    uint maxValue = 0;
    for (uint i = minX; i <= maxX; i++)
    {
        for (uint j = minY; j <= maxY; j++)
        {
            uint4 value = inTexture.read(uint2(i, j));
            if (maxValue < value[0])
                maxValue = value[0];
        }
    }
    outTexture.write(maxValue, gid);
}

warrenm · Answer 1 · 30 октября 2018

Это не столько ошибка, сколько артефакт слоя захвата / проверки, выполняющего некоторую бухгалтерию от вашего имени. Поскольку это не произойдет при использовании в реальных условиях, вероятно, не о чем беспокоиться.

Утечка памяти в простой металлической программе

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Утечка памяти в простой металлической программе

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы