feat(externals): Add SPIR-V Tools to help Optimize Performance

2026-03-22 17:46:08 -04:00 · 2026-02-14 16:26:43 -05:00
parent 4af75b7544
commit 57f7999b76
12 changed files with 110 additions and 14 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -68,3 +68,6 @@
 [submodule "Vulkan-Utility-Libraries"]
 	path = externals/Vulkan-Utility-Libraries
 	url = https://github.com/KhronosGroup/Vulkan-Utility-Libraries.git
+[submodule "externals/spirv-tools"]
+	path = externals/spirv-tools
+	url = https://github.com/KhronosGroup/SPIRV-Tools.git
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -48,6 +48,12 @@ if (NOT MSVC)
        -Wno-string-concatenation)
 endif()

+# SPIRV-Tools
+set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/spirv-tools/external/spirv-headers")
+set(SPIRV_SKIP_TESTS ON)
+set(SPIRV_SKIP_EXECUTABLES ON)
+add_subdirectory(spirv-tools)
+
 # MicroProfile
 add_library(microprofile INTERFACE)
 target_include_directories(microprofile INTERFACE ./microprofile)
--- a/externals/spirv-tools
+++ b/externals/spirv-tools
--- a/src/citron/configuration/shared_translation.cpp
+++ b/src/citron/configuration/shared_translation.cpp
@@ -250,6 +250,8 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
        Settings, renderer_force_max_clock, tr("Force maximum clocks (Vulkan only)"),
        tr("Runs work in the background while waiting for graphics commands to keep the GPU from "
           "lowering its clock speed."));
+    INSERT(Settings, optimize_spirv_output, tr("SPIR-V Shader Optimization"),
+           tr("Optimizes SPIR-V shaders for potentially better performance."));
    INSERT(Settings, max_anisotropy, tr("Anisotropic Filtering:"),
           tr("Controls the quality of texture rendering at oblique angles.\nIt's a light setting "
              "and safe to set at 16x on most GPUs."));
@@ -655,6 +657,13 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
             PAIR(ConfirmStop, Ask_Never, tr("Never ask")),
         }});

+    translations->insert(
+        {Settings::EnumMetadata<Settings::Values::SpirvShaderOptimization>::Index(),
+         {
+             PAIR(Values::SpirvShaderOptimization, Auto, tr("Auto")),
+             PAIR(Values::SpirvShaderOptimization, Off, tr("Off")),
+         }});
+
 #undef PAIR
 #undef CTX_PAIR

--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -601,6 +601,14 @@ struct Values {
                                          Category::RendererDebug};
    Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
                                         Category::RendererDebug};
+    enum class SpirvShaderOptimization : u32 {
+        Off,
+        Auto,
+    };
+
+    SwitchableSetting<SpirvShaderOptimization> optimize_spirv_output{
+        linkage, SpirvShaderOptimization::Auto, "optimize_spirv_output",
+        Category::RendererAdvanced};

    // System
    SwitchableSetting<Language, true> language_index{linkage,
@@ -840,4 +848,18 @@ void RestoreGlobalState(bool is_powered_on);
 bool IsConfiguringGlobal();
 void SetConfiguringGlobal(bool is_global);

+template <>
+struct EnumMetadata<Values::SpirvShaderOptimization> {
+    static constexpr u32 Index() {
+        return 45;
+    }
+
+    static constexpr std::array<std::pair<const char*, Values::SpirvShaderOptimization>, 2>
+    Canonicalizations() {
+        return {{
+            {"off", Values::SpirvShaderOptimization::Off},
+            {"auto", Values::SpirvShaderOptimization::Auto},
+        }};
+    }
+};
 } // namespace Settings
--- a/src/core/hle/service/nvnflinger/buffer_queue_core.h
+++ b/src/core/hle/service/nvnflinger/buffer_queue_core.h
@@ -26,6 +26,8 @@ class IProducerListener;

 class BufferQueueCore final {
    friend class BufferQueueProducer;
+    friend class BufferQueueProducer; // Typo in original file? No, it's friend class
+                                      // BufferQueueProducer; friend class BufferQueueConsumer;
    friend class BufferQueueConsumer;

 public:
@@ -69,6 +71,7 @@ private:
    const s32 max_acquired_buffer_count{}; // This is always zero on HOS
    bool buffer_has_been_queued{};
    u64 frame_counter{};
+
    u32 transform_hint{};
    bool is_allocating{};
    mutable std::condition_variable_any is_allocating_condition;
--- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp
+++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp
@@ -4,12 +4,14 @@
 // Parts of this implementation were based on:
 // https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueProducer.cpp

+#include <chrono>
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/service/kernel_helpers.h"
+
 #include "core/hle/service/nvnflinger/buffer_queue_core.h"
 #include "core/hle/service/nvnflinger/buffer_queue_producer.h"
 #include "core/hle/service/nvnflinger/consumer_listener.h"
--- a/src/core/hle/service/nvnflinger/buffer_slot.h
+++ b/src/core/hle/service/nvnflinger/buffer_slot.h
@@ -34,6 +34,9 @@ struct BufferSlot final {
    bool needs_cleanup_on_release{};
    bool attached_by_consumer{};
    bool is_preallocated{};
+
+    s64 queue_time{};
+    s64 presentation_time{};
 };

 } // namespace Service::android
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -242,7 +242,7 @@ add_library(shader_recompiler STATIC
    varying_state.h
 )

-target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
+target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit SPIRV-Tools-opt)

 if (MSVC)
    target_compile_options(shader_recompiler PRIVATE
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -8,6 +8,10 @@
 #include <utility>
 #include <vector>

+#include <spirv-tools/optimizer.hpp>
+#include "common/settings.h"
+
+#include "common/logging/log.h"
 #include "common/settings.h"
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
@@ -17,6 +21,17 @@

 namespace Shader::Backend::SPIRV {
 namespace {
+
+thread_local std::unique_ptr<spvtools::Optimizer> thread_optimizer;
+
+spvtools::Optimizer& GetThreadOptimizer() {
+    if (!thread_optimizer) {
+        thread_optimizer = std::make_unique<spvtools::Optimizer>(SPV_ENV_VULKAN_1_3);
+        thread_optimizer->RegisterPerformancePasses();
+    }
+    return *thread_optimizer;
+}
+
 template <class Func>
 struct FuncTraits {};

@@ -483,7 +498,7 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
 } // Anonymous namespace

 std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
-                           IR::Program& program, Bindings& bindings) {
+                           IR::Program& program, Bindings& bindings, bool optimize) {
    EmitContext ctx{profile, runtime_info, program, bindings};
    const Id main{DefineMain(ctx, program)};
    DefineEntryPoint(program, ctx, main);
@@ -495,9 +510,29 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
    SetupCapabilities(profile, program.info, ctx);
    SetupTransformFeedbackCapabilities(ctx, main);
    PatchPhiNodes(program, ctx);
+
+    if (!optimize) {
        return ctx.Assemble();
    }

+    std::vector<u32> spirv = ctx.Assemble();
+
+    // Use thread-local optimizer instead of creating a new one
+    auto& spv_opt = GetThreadOptimizer();
+    spv_opt.SetMessageConsumer([](spv_message_level_t, const char*, const spv_position_t&,
+                                  const char* m) { LOG_ERROR(HW_GPU, "spirv-opt: {}", m); });
+
+    spvtools::OptimizerOptions opt_options;
+    opt_options.set_run_validator(false);
+
+    std::vector<u32> result;
+    if (!spv_opt.Run(spirv.data(), spirv.size(), &result, opt_options)) {
+        LOG_ERROR(HW_GPU, "Failed to optimize SPIRV output, continuing without optimization");
+        return spirv;
+    }
+    return result;
+}
+
 Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
    const size_t num_args{inst->NumArgs()};
    boost::container::small_vector<Id, 32> blocks;
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -31,11 +31,13 @@ constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, do
 constexpr u32 RENDERAREA_LAYOUT_OFFSET = offsetof(RenderAreaLayout, render_area);

 [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
-                                         IR::Program& program, Bindings& bindings);
+                                         IR::Program& program, Bindings& bindings,
+                                         bool optimize = false);

-[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
+[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program,
+                                                bool optimize = false) {
    Bindings binding;
-    return EmitSPIRV(profile, {}, program, binding);
+    return EmitSPIRV(profile, {}, program, binding, optimize);
 }

 } // namespace Shader::Backend::SPIRV
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -411,9 +411,12 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
    dynamic_features = DynamicFeatures{
        .has_extended_dynamic_state = allow_eds1 && device.IsExtExtendedDynamicStateSupported(),
        .has_extended_dynamic_state_2 = allow_eds2 && device.IsExtExtendedDynamicState2Supported(),
-        .has_extended_dynamic_state_2_extra = allow_eds2 && device.IsExtExtendedDynamicState2ExtrasSupported(),
-        .has_extended_dynamic_state_3_blend = allow_eds3 && device.IsExtExtendedDynamicState3BlendingSupported(),
-        .has_extended_dynamic_state_3_enables = allow_eds3 && device.IsExtExtendedDynamicState3EnablesSupported(),
+        .has_extended_dynamic_state_2_extra =
+            allow_eds2 && device.IsExtExtendedDynamicState2ExtrasSupported(),
+        .has_extended_dynamic_state_3_blend =
+            allow_eds3 && device.IsExtExtendedDynamicState3BlendingSupported(),
+        .has_extended_dynamic_state_3_enables =
+            allow_eds3 && device.IsExtExtendedDynamicState3EnablesSupported(),
        .has_dynamic_vertex_input = allow_eds3 && device.IsExtVertexInputDynamicStateSupported(),
    };
 }
@@ -435,7 +438,8 @@ void PipelineCache::EvictOldPipelines() {
    }
    last_memory_pressure_frame = current_frame;

-    const u64 evict_before_frame = current_frame > FRAMES_TO_KEEP ? current_frame - FRAMES_TO_KEEP : 0;
+    const u64 evict_before_frame =
+        current_frame > FRAMES_TO_KEEP ? current_frame - FRAMES_TO_KEEP : 0;

    size_t evicted_graphics = 0;
    size_t evicted_compute = 0;
@@ -747,7 +751,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(

        const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
        ConvertLegacyToGeneric(program, runtime_info);
-        std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding);
+        bool optimize = Settings::values.optimize_spirv_output.GetValue() ==
+                        Settings::Values::SpirvShaderOptimization::Auto;
+        std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding, optimize);
        // Reserve space to reduce allocations during shader compilation
        code.reserve(std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)));
        device.SaveShader(code);
@@ -766,7 +772,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(

 } catch (const vk::Exception& exception) {
    if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
-        LOG_ERROR(Render_Vulkan, "Out of device memory during graphics pipeline creation, attempting recovery");
+        LOG_ERROR(Render_Vulkan,
+                  "Out of device memory during graphics pipeline creation, attempting recovery");
        EvictOldPipelines();
        return nullptr;
    }
@@ -850,7 +857,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
    }

    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
-    std::vector<u32> code = EmitSPIRV(profile, program);
+    bool optimize = Settings::values.optimize_spirv_output.GetValue() ==
+                    Settings::Values::SpirvShaderOptimization::Auto;
+    std::vector<u32> code = EmitSPIRV(profile, program, optimize);
    // Reserve space to reduce allocations during shader compilation
    code.reserve(std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)));
    device.SaveShader(code);
@@ -866,7 +875,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(

 } catch (const vk::Exception& exception) {
    if (exception.GetResult() == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
-        LOG_ERROR(Render_Vulkan, "Out of device memory during compute pipeline creation, attempting recovery");
+        LOG_ERROR(Render_Vulkan,
+                  "Out of device memory during compute pipeline creation, attempting recovery");
        EvictOldPipelines();
        return nullptr;
    }