How to synchronize loading of textures to the GPU while the main rendering loop is running

I wish to periodically upload images to the GPU (depending on user input) while my main rendering loop is running.

My current implementation works fine if the image uploading is not done asynchronously (but this will cause the application to lag while the images are uploaded)

When the data transfer is done asynchronously, the implementation will sometimes crash on vkQueueSubmit(), likely because of a lack of synchronization between the thread uploading the images to the GPU, and the main rendering loop.

What is the correct way to apply synchronization to this?

Probable issue:

  • I record command buffers in the main loop and also in the image uploading process. What kind of synchronization is needed to allow this?

Main Rendering Loop

void App::drawFrame()
{
    inFlightFences_[currentFrame_].wait();

    uint32_t imageIndex;
    VkResult result = vkAcquireNextImageKHR(vulkanLogicalDevice_->handle(), vulkanSwapChain_.handle(), std::numeric_limits<uint64_t>::max(), imageAvailableSemaphores_[currentFrame_].handle(), VK_NULL_HANDLE, &imageIndex);

    if (result == VK_ERROR_OUT_OF_DATE_KHR) {
        recreateSwapChain();
        return;
    }
    else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) {
        throw std::runtime_error("failed to acquire swap chain image!");
    }

    updateUniformBuffer(imageIndex);
    vulkanCommandBuffers_.recordCommandBuffer(imageIndex, swapChainFrameBuffers_, vulkanRenderPass_, vulkanSwapChain_, vulkanGraphicsPipeline_, vulkanVertexBuffer_, vulkanIndexBuffer_, vulkanDescriptorSets_);    

    VkSubmitInfo submitInfo = {};
    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

    VkSemaphore waitSemaphores[] = { imageAvailableSemaphores_[currentFrame_].handle() };
    VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
    submitInfo.waitSemaphoreCount = 1;
    submitInfo.pWaitSemaphores = waitSemaphores;
    submitInfo.pWaitDstStageMask = waitStages;
    submitInfo.commandBufferCount = 1;
    submitInfo.pCommandBuffers = &vulkanCommandBuffers_[imageIndex];

    VkSemaphore signalSemaphores[] = { renderFinishedSemaphores_[currentFrame_].handle() };
    submitInfo.signalSemaphoreCount = 1;
    submitInfo.pSignalSemaphores = signalSemaphores;

    inFlightFences_[currentFrame_].reset(); // vkResetFences(vulkanLogicalDevice_->handle(), 1, &inFlightFences[currentFrame]);

    if (vkQueueSubmit(vulkanLogicalDevice_->getGraphicsQueue(), 1, &submitInfo, inFlightFences_[currentFrame_].handle()) != VK_SUCCESS) {
        throw std::runtime_error("failed to submit draw command buffer!");
    }

    VkPresentInfoKHR presentInfo = {};
    presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
    presentInfo.waitSemaphoreCount = 1;
    presentInfo.pWaitSemaphores = signalSemaphores;

    VkSwapchainKHR swapChains[] = { vulkanSwapChain_.handle() };
    presentInfo.swapchainCount = 1;
    presentInfo.pSwapchains = swapChains;

    presentInfo.pImageIndices = &imageIndex;

    result = vkQueuePresentKHR(vulkanLogicalDevice_->getPresentQueue(), &presentInfo);

    if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR || window_.frameBufferResized()) {
        window_.setFrameBufferResized(false);
        recreateSwapChain();
    }
    else if (result != VK_SUCCESS) {
        throw std::runtime_error("failed to present swap chain image!");
    }

    currentFrame_ = (currentFrame_ + 1) % MAX_FRAMES_IN_FLIGHT;
}

On user input, upload images:

ThreadPool::get().run([files, this]() {
            for (auto &filename : files)
            {
                Texture texture(filename);
                VulkanTexture fullImage(parent_->logicalDevice(), texture, *(parent_->physicalDevice()), *(parent_->commandPool()));
                GuiTexture smallImage(parent_->logicalDevice(), parent_->physicalDevice(), parent_->commandPool(), filename, fullImage.vulkanImage(), 400, 400);
                parent_->guiImage().push_back(std::move(smallImage));
            }
        });

class Texture just loads an image from file to CPU ram

VulkanTexture::VulkanTexture(const std::shared_ptr<VulkanLogicalDevice>& logicalDevice, const VulkanPhysicalDevice &physicalDevice, const VulkanCommandPool &commandPool, const VulkanImage & sourceImage, uint32_t width, uint32_t height)
    : logicalDevice_(logicalDevice)
{
    vulkanImage_ = VulkanImage(logicalDevice, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
    deviceMemory_ = VulkanDeviceMemory(logicalDevice, vulkanImage_, physicalDevice, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

    vulkanImage_.transitionImageLayout(commandPool, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
    vulkanImage_.copyImageToImage(sourceImage, commandPool, VK_FILTER_NEAREST);
    vulkanImage_.transitionImageLayout(commandPool, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); //transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}

GuiTexture::GuiTexture(const std::shared_ptr<VulkanLogicalDevice>& logicalDevice, const std::shared_ptr < VulkanPhysicalDevice > & physicalDevice, const std::shared_ptr<VulkanCommandPool>& commandPool, const std::string &filePath, const VulkanImage &sourceImage, uint32_t width, uint32_t height)
    : texture_(logicalDevice, *physicalDevice, *commandPool, sourceImage, width, height)
    , sampler_(logicalDevice)
    , imgView_(logicalDevice, texture_.handle(), VK_FORMAT_R8G8B8A8_UNORM) // VK_FORMAT_R8G8B8A8_UNORM??
    , imGuiTexture_(nullptr)
    , filePath_(filePath)
{
    imGuiTexture_ = ImGui_ImplGlfwVulkan_AddTexture(sampler_.handle(), imgView_.handle());
}


VulkanImage::VulkanImage(const std::shared_ptr<VulkanLogicalDevice>& logicalDevice, uint32_t width, uint32_t height, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage)
    : logicalDevice_(logicalDevice)
    , image_(VK_NULL_HANDLE)
    , imageInfo_({})
    , vkImageLayout_(VK_IMAGE_LAYOUT_UNDEFINED)
{
    imageInfo_.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
    imageInfo_.imageType = VK_IMAGE_TYPE_2D;
    imageInfo_.extent.width = width;
    imageInfo_.extent.height = height;
    imageInfo_.extent.depth = 1;
    imageInfo_.mipLevels = 1;
    imageInfo_.arrayLayers = 1;
    imageInfo_.format = format;
    imageInfo_.tiling = tiling;
    imageInfo_.initialLayout = vkImageLayout_ = VK_IMAGE_LAYOUT_UNDEFINED;
    imageInfo_.usage = usage;
    imageInfo_.samples = VK_SAMPLE_COUNT_1_BIT;
    imageInfo_.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

    if (vkCreateImage(logicalDevice->handle(), &imageInfo_, nullptr, &image_) != VK_SUCCESS) {
        throw std::runtime_error("failed to create image!");
    }
}

void VulkanDeviceMemory::copyToGpu(void * cpuMemory, VkDeviceSize numBytes)
{
    void* gpuMemory;
    vkMapMemory(logicalDevice_->handle(), deviceMemory_, 0, numBytes, 0, &gpuMemory); // buffer.getBufferInfo().size
    memcpy(gpuMemory, cpuMemory, numBytes); // (size_t)vertexBuffer_.getBufferInfo().size
    vkUnmapMemory(logicalDevice_->handle(), deviceMemory_);
}


void VulkanImage::transitionImageLayout(const VulkanCommandPool & commandPool, VkImageLayout newLayout)
{
    VkCommandBuffer commandBuffer = commandPool.beginSingleTimeCommands();

    VkImageMemoryBarrier barrier = {};
    barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    barrier.oldLayout = vkImageLayout_; // imageLayout_
    barrier.newLayout = newLayout;
    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    barrier.image = image_;
    barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    barrier.subresourceRange.baseMipLevel = 0;
    barrier.subresourceRange.levelCount = 1;
    barrier.subresourceRange.baseArrayLayer = 0;
    barrier.subresourceRange.layerCount = 1;

    VkPipelineStageFlags sourceStage;
    VkPipelineStageFlags destinationStage;

    if (vkImageLayout_ == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
        barrier.srcAccessMask = 0;
        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;

        sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
    }
    else if (vkImageLayout_ == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
        barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;

        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
        destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
    }
    else {
        throw std::invalid_argument("unsupported layout transition!");
    }

    vkCmdPipelineBarrier(
        commandBuffer,
        sourceStage, destinationStage,
        0,
        0, nullptr,
        0, nullptr,
        1, &barrier
    );

    commandPool.endSingleTimeCommands(commandBuffer); // endSingleTimeCommands(commandBuffer);
    vkImageLayout_ = newLayout;
}


void VulkanImage::copyImageToImage(const VulkanImage & sourceImage, const VulkanCommandPool &commandPool, VkFilter filter)
{
    VkImageBlit vkImgBlit = {};
    vkImgBlit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    vkImgBlit.srcSubresource.baseArrayLayer = 0;
    vkImgBlit.srcSubresource.layerCount = 1; // number of layers to copy, must be > 0
    vkImgBlit.srcSubresource.mipLevel = 0;
    vkImgBlit.srcOffsets[1].x = sourceImage.imageInfo_.extent.width;
    vkImgBlit.srcOffsets[1].y = sourceImage.imageInfo_.extent.height;
    //vkImgBlit.srcOffsets[0].z;
    vkImgBlit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    vkImgBlit.dstSubresource.baseArrayLayer = 0;
    vkImgBlit.dstSubresource.layerCount = 1;
    vkImgBlit.dstSubresource.mipLevel = 0;
    vkImgBlit.dstOffsets[1].x = imageInfo_.extent.width;
    vkImgBlit.dstOffsets[1].y = imageInfo_.extent.height;

    auto commandBuffer = commandPool.beginSingleTimeCommands();
    vkCmdBlitImage(commandBuffer, sourceImage.handle(), sourceImage.getImageLayout(), image_, vkImageLayout_, 1, &vkImgBlit, filter);
    commandPool.endSingleTimeCommands(commandBuffer);
}


VkCommandBuffer VulkanCommandPool::beginSingleTimeCommands() const
{
    VkCommandBufferAllocateInfo allocInfo = {};
    allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    allocInfo.commandPool = commandPool_;
    allocInfo.commandBufferCount = 1;

    VkCommandBuffer commandBuffer;
    vkAllocateCommandBuffers(logicalDevice_->handle(), &allocInfo, &commandBuffer);

    VkCommandBufferBeginInfo beginInfo = {};
    beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

    vkBeginCommandBuffer(commandBuffer, &beginInfo);

    return commandBuffer;
}

void VulkanCommandPool::endSingleTimeCommands(VkCommandBuffer commandBuffer) const
{
    vkEndCommandBuffer(commandBuffer);

    VkSubmitInfo submitInfo = {};
    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submitInfo.commandBufferCount = 1;
    submitInfo.pCommandBuffers = &commandBuffer;

    vkQueueSubmit(logicalDevice_->getGraphicsQueue(), 1, &submitInfo, VK_NULL_HANDLE);
    vkQueueWaitIdle(logicalDevice_->getGraphicsQueue());

    vkFreeCommandBuffers(logicalDevice_->handle(), commandPool_, 1, &commandBuffer);
}

Store image for drawing later

void GuiImages::push_back(GuiTexture &&texture)
{
    std::unique_lock<std::mutex> lock(texturesMutex_);
    textures_.push_back(std::move(texture));
}

Code Snipet: Draws textures using imgui later

std::vector<ImTextureID> imTextureIDs;
for (auto & it : textures_)
    imTextureIDs.push_back(it.getImGuiTexture());


for (int i = 0; i < textures_.size(); ++i)
    {
        float last_button_x2 = ImGui::GetItemRectMax().x;
        float window_visible_x2 = ImGui::GetWindowPos().x + ImGui::GetWindowContentRegionMax().x;

        if (i != 0 && (last_button_x2 + tbnailSize) < window_visible_x2) 
            ImGui::SameLine();

        ImGui::PushID(i);

        if (ImGui::ImageButton(imTextureIDs[i], ImVec2(tbnailSize, tbnailSize), ImVec2(0, 0), ImVec2(1, 1), 0, ImColor(0, 0, 0)))
        {
            parent_->guiCanvas().setTexture(GuiTexture(parent_->logicalDevice(), parent_->physicalDevice(), parent_->commandPool(), textures_[i].getFilePath()));
        }

        //std::cout << ImGui::IsItemVisible();

        ImGui::PopID();
    }

1 answer

  • answered 2018-11-08 10:50 ratchet freak

    I record command buffers in the main loop and also in the image uploading process. What kind of synchronization is needed to allow this?

    Make sure each thread uses its own command buffer pool. And that only one thread at a time can submit to each queue. This means in your case a mutex around vkQueueSubmit and vkQueueWaitIdle.

    {
        std::unique_lock<std::mutex> lock(logicalDevice_->getGraphicsQueueMutex());
        vkQueueSubmit(logicalDevice_->getGraphicsQueue(), 1, &submitInfo, VK_NULL_HANDLE);
        vkQueueWaitIdle(logicalDevice_->getGraphicsQueue());
    }
    

    though it's better to use a fence to wait on it if you really need to wait right now:

    {
        std::unique_lock<std::mutex> lock(logicalDevice_->getGraphicsQueueMutex());
        vkQueueSubmit(logicalDevice_->getGraphicsQueue(), 1, &submitInfo, fence_);
    }
    vkWaitForFences(logicalDevice_->handle(), 1, &fence_, TRUE, ~uint64_t(0));
    vkResetFences(logicalDevice_->handle(), 1, &fence_); //reset for next use
    

    or make the calling code pass the fence if it needs to wait.

    You also need to synchronize access to textures_ on the main thread.


    To comment on your code in general:

    Batch your commands!

    There is no reason to split the transition->copy->transition into 3 separate commands with waitIdle after each submit.

    Instead put it in a single command buffer and use a Transfer capable queue for that. Then add a semaphore to sync it against usage in the main queue.

    auto cmdBuffer = commandPool.beginSingleTimeCommands();
    vulkanImage_.transitionImageLayout(cmdBuffer , VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
    vulkanImage_.copyImageToImage(sourceImage, cmdBuffer, VK_FILTER_NEAREST);
    vulkanImage_.transitionImageLayout(cmdBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); //transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
    commandPool.endSingleTimeCommands(commandBuffer, &semaphore); 
    

    On first use of the semaphore in the graphics thread you set the semaphore member to null and recycle it to signal the sync already happened.

    Or you use a fence to wait on it before using it in another command buffer.