Merge pull request 'fix(vulkan): Async Presentation & Shader Logic' (#135) from fix/async-shader into main

Reviewed-on: https://git.citron-emu.org/Citron/Emulator/pulls/135
This commit is contained in:
Collecting
2026-02-12 00:09:59 +01:00
7 changed files with 61 additions and 49 deletions

View File

@@ -25,14 +25,14 @@ using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair; using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_, ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
DescriptorPool& descriptor_pool, std::mutex& pipeline_cache_mutex_, DescriptorPool& descriptor_pool,
GuestDescriptorQueue& guest_descriptor_queue_, GuestDescriptorQueue& guest_descriptor_queue_,
Common::ThreadWorker* thread_worker, Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics, PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_) vk::ShaderModule spv_module_)
: device{device_}, : device{device_}, pipeline_cache(pipeline_cache_), pipeline_cache_mutex(pipeline_cache_mutex_),
pipeline_cache(pipeline_cache_), guest_descriptor_queue{guest_descriptor_queue_}, info{info_}, guest_descriptor_queue{guest_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) { spv_module(std::move(spv_module_)) {
if (shader_notify) { if (shader_notify) {
shader_notify->MarkShaderBuilding(); shader_notify->MarkShaderBuilding();
@@ -58,6 +58,7 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
if (device.IsKhrPipelineExecutablePropertiesEnabled()) { if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
} }
std::scoped_lock cache_lock{pipeline_cache_mutex};
pipeline = device.GetLogical().CreateComputePipeline( pipeline = device.GetLogical().CreateComputePipeline(
{ {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,

View File

@@ -3,10 +3,12 @@
#pragma once #pragma once
#include <array>
#include <atomic> #include <atomic>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/thread_worker.h" #include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h" #include "shader_recompiler/shader_info.h"
@@ -29,7 +31,7 @@ class Scheduler;
class ComputePipeline { class ComputePipeline {
public: public:
explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache, explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache,
DescriptorPool& descriptor_pool, std::mutex& pipeline_cache_mutex, DescriptorPool& descriptor_pool,
GuestDescriptorQueue& guest_descriptor_queue, GuestDescriptorQueue& guest_descriptor_queue,
Common::ThreadWorker* thread_worker, Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics, PipelineStatistics* pipeline_statistics,
@@ -48,6 +50,7 @@ public:
private: private:
const Device& device; const Device& device;
vk::PipelineCache& pipeline_cache; vk::PipelineCache& pipeline_cache;
std::mutex& pipeline_cache_mutex;
GuestDescriptorQueue& guest_descriptor_queue; GuestDescriptorQueue& guest_descriptor_queue;
Shader::Info info; Shader::Info info;

View File

@@ -88,7 +88,8 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
bool IsLine(VkPrimitiveTopology topology) { bool IsLine(VkPrimitiveTopology topology) {
static constexpr std::array line_topologies{ static constexpr std::array line_topologies{
VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
// VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
}; };
return std::ranges::find(line_topologies, topology) == line_topologies.end(); return std::ranges::find(line_topologies, topology) == line_topologies.end();
@@ -237,15 +238,16 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
GraphicsPipeline::GraphicsPipeline( GraphicsPipeline::GraphicsPipeline(
Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify, vk::PipelineCache& pipeline_cache_, std::mutex& pipeline_cache_mutex_,
const Device& device_, DescriptorPool& descriptor_pool, VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
GuestDescriptorQueue& guest_descriptor_queue_, Common::ThreadWorker* worker_thread, GuestDescriptorQueue& guest_descriptor_queue_, Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos) const std::array<const Shader::Info*, NUM_STAGES>& infos)
: key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
pipeline_cache(pipeline_cache_), scheduler{scheduler_}, pipeline_cache(pipeline_cache_), pipeline_cache_mutex(pipeline_cache_mutex_),
guest_descriptor_queue{guest_descriptor_queue_}, spv_modules{std::move(stages)} { scheduler{scheduler_}, guest_descriptor_queue{guest_descriptor_queue_},
spv_modules{std::move(stages)} {
if (shader_notify) { if (shader_notify) {
shader_notify->MarkShaderBuilding(); shader_notify->MarkShaderBuilding();
} }
@@ -925,6 +927,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
if (device.IsKhrPipelineExecutablePropertiesEnabled()) { if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
} }
std::scoped_lock lock{pipeline_cache_mutex};
pipeline = device.GetLogical().CreateGraphicsPipeline( pipeline = device.GetLogical().CreateGraphicsPipeline(
{ {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,

View File

@@ -71,11 +71,12 @@ class GraphicsPipeline {
public: public:
explicit GraphicsPipeline( explicit GraphicsPipeline(
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify, vk::PipelineCache& pipeline_cache, std::mutex& pipeline_cache_mutex,
const Device& device, DescriptorPool& descriptor_pool, VideoCore::ShaderNotify* shader_notify, const Device& device,
GuestDescriptorQueue& guest_descriptor_queue, Common::ThreadWorker* worker_thread, DescriptorPool& descriptor_pool, GuestDescriptorQueue& guest_descriptor_queue,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics,
const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos); const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
@@ -131,6 +132,7 @@ private:
TextureCache& texture_cache; TextureCache& texture_cache;
BufferCache& buffer_cache; BufferCache& buffer_cache;
vk::PipelineCache& pipeline_cache; vk::PipelineCache& pipeline_cache;
std::mutex& pipeline_cache_mutex;
Scheduler& scheduler; Scheduler& scheduler;
GuestDescriptorQueue& guest_descriptor_queue; GuestDescriptorQueue& guest_descriptor_queue;

View File

@@ -677,11 +677,6 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
if (pipeline->IsBuilt()) { if (pipeline->IsBuilt()) {
return pipeline; return pipeline;
} }
if (!use_asynchronous_shaders) {
return pipeline;
}
// When asynchronous shaders are enabled, avoid blocking the main thread completely.
// Skip the draw until the pipeline is ready to prevent stutter.
return nullptr; return nullptr;
} }
@@ -769,10 +764,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
previous_stage = &program; previous_stage = &program;
} }
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>( auto pipeline{std::make_unique<GraphicsPipeline>(
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, pipeline_cache_mutex,
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key, &shader_notify, device, descriptor_pool, guest_descriptor_queue, thread_worker, statistics,
std::move(modules), infos); render_pass_cache, key, std::move(modules), infos)};
return pipeline;
} catch (const vk::Exception& exception) { } catch (const vk::Exception& exception) {
if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) { if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
@@ -804,6 +800,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
GraphicsEnvironments environments; GraphicsEnvironments environments;
GetGraphicsEnvironments(environments, graphics_key.unique_hashes); GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
std::scoped_lock lock{pools_mutex};
main_pools.ReleaseContents(); main_pools.ReleaseContents();
auto pipeline{ auto pipeline{
CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), nullptr, true)}; CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), nullptr, true)};
@@ -830,6 +827,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes); env.SetCachedSize(shader->size_bytes);
std::scoped_lock lock{pools_mutex};
main_pools.ReleaseContents(); main_pools.ReleaseContents();
auto pipeline{CreateComputePipeline(main_pools, key, env, nullptr, true)}; auto pipeline{CreateComputePipeline(main_pools, key, env, nullptr, true)};
if (!pipeline || pipeline_cache_filename.empty()) { if (!pipeline || pipeline_cache_filename.empty()) {
@@ -874,9 +872,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
spv_module.SetObjectNameEXT(name.c_str()); spv_module.SetObjectNameEXT(name.c_str());
} }
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool, return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, pipeline_cache_mutex,
guest_descriptor_queue, thread_worker, statistics, descriptor_pool, guest_descriptor_queue, thread_worker,
&shader_notify, program.info, std::move(spv_module)); statistics, &shader_notify, program.info,
std::move(spv_module));
} catch (const vk::Exception& exception) { } catch (const vk::Exception& exception) {
if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) { if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
@@ -904,6 +903,7 @@ void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& fi
file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size()) file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size())
.write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
std::scoped_lock lock{pipeline_cache_mutex};
size_t cache_size = 0; size_t cache_size = 0;
std::vector<char> cache_data; std::vector<char> cache_data;
if (pipeline_cache) { if (pipeline_cache) {

View File

@@ -8,6 +8,7 @@
#include <cstddef> #include <cstddef>
#include <filesystem> #include <filesystem>
#include <memory> #include <memory>
#include <mutex>
#include <type_traits> #include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@@ -176,6 +177,8 @@ public:
static constexpr u64 MEMORY_PRESSURE_COOLDOWN = 300; static constexpr u64 MEMORY_PRESSURE_COOLDOWN = 300;
ShaderPools main_pools; ShaderPools main_pools;
std::mutex pools_mutex;
std::mutex pipeline_cache_mutex;
Shader::Profile profile; Shader::Profile profile;
Shader::HostTranslateInfo host_info; Shader::HostTranslateInfo host_info;

View File

@@ -174,7 +174,7 @@ bool Swapchain::AcquireNextImage() {
break; break;
} }
scheduler.Wait(resource_ticks[image_index]); scheduler.GetMasterSemaphore().Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick(); resource_ticks[image_index] = scheduler.CurrentTick();
return is_suboptimal || is_outdated; return is_suboptimal || is_outdated;