mirror of
https://git.eden-emu.dev/archive/citron
synced 2026-04-20 03:30:45 -04:00
Merge branch 'vulkan-descriptor-queue-optimization' into 'master'
vulkan: Optimize descriptor update queue performance See merge request citron/rewrite!55
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <variant>
|
#include <variant>
|
||||||
@@ -14,31 +15,109 @@ namespace Vulkan {
|
|||||||
|
|
||||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||||
: device{device_}, scheduler{scheduler_} {
|
: device{device_}, scheduler{scheduler_} {
|
||||||
payload_start = payload.data();
|
|
||||||
payload_cursor = payload.data();
|
payload = std::make_unique<DescriptorUpdateEntry[]>(PAYLOAD_SIZE);
|
||||||
|
payload_start = payload.get();
|
||||||
|
payload_cursor = payload_start;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
|
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
|
||||||
|
|
||||||
void UpdateDescriptorQueue::TickFrame() {
|
void UpdateDescriptorQueue::TickFrame() {
|
||||||
|
|
||||||
|
total_entries_processed += GetCurrentSize();
|
||||||
|
|
||||||
if (++frame_index >= FRAMES_IN_FLIGHT) {
|
if (++frame_index >= FRAMES_IN_FLIGHT) {
|
||||||
frame_index = 0;
|
frame_index = 0;
|
||||||
}
|
}
|
||||||
payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
|
payload_start = payload.get() + frame_index * FRAME_PAYLOAD_SIZE;
|
||||||
payload_cursor = payload_start;
|
payload_cursor = payload_start;
|
||||||
|
|
||||||
|
if (frame_index == 0 && overflow_events > 0) {
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Descriptor queue stats: {} entries processed, {} overflow events",
|
||||||
|
total_entries_processed, overflow_events);
|
||||||
|
total_entries_processed = 0;
|
||||||
|
overflow_events = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateDescriptorQueue::Acquire() {
|
void UpdateDescriptorQueue::Acquire() {
|
||||||
// Minimum number of entries required.
|
|
||||||
// This is the maximum number of entries a single draw call might use.
|
static constexpr size_t MIN_ENTRIES = 0x800;
|
||||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
|
||||||
|
|
||||||
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
||||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
HandleOverflow();
|
||||||
scheduler.WaitWorker();
|
|
||||||
payload_cursor = payload_start;
|
|
||||||
}
|
}
|
||||||
upload_start = payload_cursor;
|
upload_start = payload_cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
void UpdateDescriptorQueue::EnsureCapacity(size_t required_entries) {
|
||||||
|
if (std::distance(payload_start, payload_cursor) + required_entries >= FRAME_PAYLOAD_SIZE) {
|
||||||
|
HandleOverflow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateDescriptorQueue::HandleOverflow() {
|
||||||
|
overflow_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
overflow_events++;
|
||||||
|
|
||||||
|
LOG_WARNING(Render_Vulkan, "Descriptor payload overflow ({}), waiting for worker thread",
|
||||||
|
overflow_count.load(std::memory_order_relaxed));
|
||||||
|
|
||||||
|
scheduler.WaitWorker();
|
||||||
|
payload_cursor = payload_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GuestDescriptorQueue::PreAllocateForFrame(size_t estimated_entries) {
|
||||||
|
|
||||||
|
if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 2) {
|
||||||
|
|
||||||
|
payload_cursor += estimated_entries;
|
||||||
|
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for guest frame", estimated_entries);
|
||||||
|
} else if (estimated_entries > FRAME_PAYLOAD_SIZE / 2) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Estimated entries ({}) too large for pre-allocation", estimated_entries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GuestDescriptorQueue::OptimizeForGuestMemory() {
|
||||||
|
|
||||||
|
if (payload_cursor != payload_start) {
|
||||||
|
payload_cursor = payload_start;
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Optimized guest memory layout - reset cursor to frame start");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overflow_events > 10) {
|
||||||
|
LOG_INFO(Render_Vulkan, "High overflow events ({}), consider increasing frame payload size", overflow_events);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputePassDescriptorQueue::PreAllocateForComputePass(size_t estimated_entries) {
|
||||||
|
|
||||||
|
if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 4) {
|
||||||
|
payload_cursor += estimated_entries;
|
||||||
|
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for compute pass", estimated_entries);
|
||||||
|
} else if (estimated_entries > FRAME_PAYLOAD_SIZE / 4) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Estimated compute entries ({}) too large for pre-allocation", estimated_entries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputePassDescriptorQueue::OptimizeForComputeWorkload() {
|
||||||
|
|
||||||
|
const size_t current_usage = GetCurrentSize();
|
||||||
|
const size_t usage_threshold = FRAME_PAYLOAD_SIZE / 4;
|
||||||
|
|
||||||
|
if (current_usage < usage_threshold && current_usage > 0) {
|
||||||
|
payload_cursor = payload_start;
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Optimized compute workload - reset for better memory efficiency (usage: {}/{})",
|
||||||
|
current_usage, FRAME_PAYLOAD_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overflow_events > 5) {
|
||||||
|
LOG_INFO(Render_Vulkan, "Compute pass overflow events: {}, consider batch optimization", overflow_events);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
|
#include <memory>
|
||||||
|
#include <span>
|
||||||
|
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
@@ -28,16 +32,10 @@ struct DescriptorUpdateEntry {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
class UpdateDescriptorQueue final {
|
class UpdateDescriptorQueue {
|
||||||
// This should be plenty for the vast majority of cases. Most desktop platforms only
|
|
||||||
// provide up to 3 swapchain images.
|
|
||||||
static constexpr size_t FRAMES_IN_FLIGHT = 8;
|
|
||||||
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000;
|
|
||||||
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||||
~UpdateDescriptorQueue();
|
virtual ~UpdateDescriptorQueue();
|
||||||
|
|
||||||
void TickFrame();
|
void TickFrame();
|
||||||
|
|
||||||
@@ -48,6 +46,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||||
|
EnsureCapacity(1);
|
||||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||||
.sampler = sampler,
|
.sampler = sampler,
|
||||||
.imageView = image_view,
|
.imageView = image_view,
|
||||||
@@ -56,6 +55,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AddImage(VkImageView image_view) {
|
void AddImage(VkImageView image_view) {
|
||||||
|
EnsureCapacity(1);
|
||||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||||
.sampler = VK_NULL_HANDLE,
|
.sampler = VK_NULL_HANDLE,
|
||||||
.imageView = image_view,
|
.imageView = image_view,
|
||||||
@@ -64,6 +64,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||||
|
EnsureCapacity(1);
|
||||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||||
.buffer = buffer,
|
.buffer = buffer,
|
||||||
.offset = offset,
|
.offset = offset,
|
||||||
@@ -72,10 +73,56 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AddTexelBuffer(VkBufferView texel_buffer) {
|
void AddTexelBuffer(VkBufferView texel_buffer) {
|
||||||
|
EnsureCapacity(1);
|
||||||
*(payload_cursor++) = texel_buffer;
|
*(payload_cursor++) = texel_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
void AddSampledImages(std::span<const VkImageView> image_views, VkSampler sampler) {
|
||||||
|
const size_t count = image_views.size();
|
||||||
|
EnsureCapacity(count);
|
||||||
|
for (VkImageView image_view : image_views) {
|
||||||
|
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||||
|
.sampler = sampler,
|
||||||
|
.imageView = image_view,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddBuffers(std::span<const VkBuffer> buffers, VkDeviceSize offset, VkDeviceSize size) {
|
||||||
|
const size_t count = buffers.size();
|
||||||
|
EnsureCapacity(count);
|
||||||
|
for (VkBuffer buffer : buffers) {
|
||||||
|
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.range = size,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset() noexcept {
|
||||||
|
payload_cursor = payload_start;
|
||||||
|
upload_start = payload_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetCurrentSize() const noexcept {
|
||||||
|
return std::distance(payload_start, payload_cursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CanAdd(size_t count) const noexcept {
|
||||||
|
return std::distance(payload_start, payload_cursor) + count < FRAME_PAYLOAD_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
static constexpr size_t FRAMES_IN_FLIGHT = 12;
|
||||||
|
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x40000;
|
||||||
|
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
||||||
|
|
||||||
|
void EnsureCapacity(size_t required_entries);
|
||||||
|
void HandleOverflow();
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
|
|
||||||
@@ -83,11 +130,29 @@ private:
|
|||||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||||
DescriptorUpdateEntry* payload_start = nullptr;
|
DescriptorUpdateEntry* payload_start = nullptr;
|
||||||
const DescriptorUpdateEntry* upload_start = nullptr;
|
const DescriptorUpdateEntry* upload_start = nullptr;
|
||||||
std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
|
|
||||||
|
std::unique_ptr<DescriptorUpdateEntry[]> payload;
|
||||||
|
|
||||||
|
std::atomic<size_t> overflow_count{0};
|
||||||
|
|
||||||
|
size_t total_entries_processed{0};
|
||||||
|
size_t overflow_events{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: should these be separate classes instead?
|
class GuestDescriptorQueue final : public UpdateDescriptorQueue {
|
||||||
using GuestDescriptorQueue = UpdateDescriptorQueue;
|
public:
|
||||||
using ComputePassDescriptorQueue = UpdateDescriptorQueue;
|
using UpdateDescriptorQueue::UpdateDescriptorQueue;
|
||||||
|
|
||||||
} // namespace Vulkan
|
void PreAllocateForFrame(size_t estimated_entries);
|
||||||
|
void OptimizeForGuestMemory();
|
||||||
|
};
|
||||||
|
|
||||||
|
class ComputePassDescriptorQueue final : public UpdateDescriptorQueue {
|
||||||
|
public:
|
||||||
|
using UpdateDescriptorQueue::UpdateDescriptorQueue;
|
||||||
|
|
||||||
|
void PreAllocateForComputePass(size_t estimated_entries);
|
||||||
|
void OptimizeForComputeWorkload();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
||||||
Reference in New Issue
Block a user