android: Add shader building overlay with performance graph

- Add new settings for shader building overlay and performance graph - Create ShaderBuildingOverlayView with animated shader building indicator - Implement JNI bridge to get shader building count from core - Add performance metrics display (FPS, frametime, emulation speed) - Include real-time frametime graph with min/avg/max statistics - Add menu options to toggle overlay and graph independently - Integrate with existing overlay system in EmulationFragment - Optimize Vulkan pipeline cache loading with pre-reservation - Improve async shader building to reduce main thread blocking Signed-off-by: Zephyron <zephyron@citron-emu.org>
2026-03-31 00:18:30 -04:00 · 2025-08-17 15:48:06 +10:00
parent 85324599a6
commit 06f13f3cb1
11 changed files with 429 additions and 29 deletions
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -281,6 +281,8 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
        size_t total{};
        size_t built{};
        bool has_loaded{};
+        size_t total_compute{};
+        size_t total_graphics{};
    } state;

    const auto queue_work{[&](Common::UniqueFunction<void, Context*>&& work) {
@@ -306,6 +308,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
            }
        });
        ++state.total;
+        ++state.total_compute;
    }};
    const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
        GraphicsPipelineKey key;
@@ -327,11 +330,22 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
            }
        });
        ++state.total;
+        ++state.total_graphics;
    }};
    LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);

    LOG_INFO(Render_OpenGL, "Total Pipeline Count: {}", state.total);

+    // Pre-reserve cache maps to reduce rehashing during load/build
+    {
+        std::scoped_lock lock{state.mutex};
+        if (state.total_compute > 0) {
+            compute_cache.reserve(state.total_compute);
+        }
+        if (state.total_graphics > 0) {
+            graphics_cache.reserve(state.total_graphics);
+        }
+    }
    std::unique_lock lock{state.mutex};
    callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
    state.has_loaded = true;
@@ -391,18 +405,8 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
    if (!use_asynchronous_shaders) {
        return pipeline;
    }
-    // If something is using depth, we can assume that games are not rendering anything which
-    // will be used one time.
-    if (maxwell3d->regs.zeta_enable) {
-        return nullptr;
-    }
-    // If games are using a small index count, we can assume these are full screen quads.
-    // Usually these shaders are only used once for building textures so we can assume they
-    // can't be built async
-    const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
-    if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
-        return pipeline;
-    }
+    // When asynchronous shaders are enabled, avoid blocking the main thread completely.
+    // Skip the draw until the pipeline is ready to prevent stutter.
    return nullptr;
 }

@@ -587,7 +591,9 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
    info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();

    std::string code{};
+    code.reserve(8 * 1024); // reduce reallocs for typical small-to-medium shaders
    std::vector<u32> code_spirv;
+    code_spirv.reserve(16 * 1024 / sizeof(u32));
    switch (device.GetShaderBackend()) {
    case Settings::ShaderBackend::Glsl:
        code = EmitGLSL(profile, program);
@@ -608,7 +614,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
 }

 std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
-    return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
+    // Use all available logical threads to maximize build throughput.
+    return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U),
                                          "GlShaderBuilder",
                                          [this] { return Context{emu_window}; });
 }
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -265,7 +265,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program

 size_t GetTotalPipelineWorkers() {
    const size_t max_core_threads =
-        std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
+        std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL);
 #ifdef ANDROID
    // Leave at least a few cores free in android
    constexpr size_t free_cores = 3ULL;
@@ -484,6 +484,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
        size_t built{};
        bool has_loaded{};
        std::unique_ptr<PipelineStatistics> statistics;
+        size_t total_compute{};
+        size_t total_graphics{};
    } state;

    if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
@@ -506,6 +508,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
            }
        });
        ++state.total;
+        ++state.total_compute;
    }};
    const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
        GraphicsPipelineCacheKey key;
@@ -543,12 +546,23 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
            }
        });
        ++state.total;
+        ++state.total_graphics;
    }};
    VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
                               load_graphics);

    LOG_INFO(Render_Vulkan, "Total Pipeline Count: {}", state.total);

+    // Pre-reserve space in caches to reduce rehashing during async builds
+    {
+        std::scoped_lock lock{state.mutex};
+        if (state.total_compute > 0) {
+            compute_cache.reserve(state.total_compute);
+        }
+        if (state.total_graphics > 0) {
+            graphics_cache.reserve(state.total_graphics);
+        }
+    }
    std::unique_lock lock{state.mutex};
    callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
    state.has_loaded = true;
@@ -589,18 +603,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
    if (!use_asynchronous_shaders) {
        return pipeline;
    }
-    // If something is using depth, we can assume that games are not rendering anything which
-    // will be used one time.
-    if (maxwell3d->regs.zeta_enable) {
-        return nullptr;
-    }
-    // If games are using a small index count, we can assume these are full screen quads.
-    // Usually these shaders are only used once for building textures so we can assume they
-    // can't be built async
-    const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
-    if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
-        return pipeline;
-    }
+    // When asynchronous shaders are enabled, avoid blocking the main thread completely.
+    // Skip the draw until the pipeline is ready to prevent stutter.
    return nullptr;
 }

@@ -673,7 +677,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(

        const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
        ConvertLegacyToGeneric(program, runtime_info);
-        const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
+        std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding);
+        code.reserve(std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)));
        device.SaveShader(code);
        modules[stage_index] = BuildShader(device, code);
        if (device.HasDebuggingToolAttached()) {
@@ -767,7 +772,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
    }

    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
-    const std::vector<u32> code{EmitSPIRV(profile, program)};
+    std::vector<u32> code = EmitSPIRV(profile, program);
+    code.reserve(std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)));
    device.SaveShader(code);
    vk::ShaderModule spv_module{BuildShader(device, code)};
    if (device.HasDebuggingToolAttached()) {