diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index e0b5a6a67..f632399dd 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -672,7 +672,11 @@ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_) } } -HostMemory::~HostMemory() = default; +HostMemory::~HostMemory() { + // We leave this empty. + // The "impl" unique_ptr handles the cleanup automatically and correctly. + // Manually calling munmap here causes a "double-free" crash. +} HostMemory::HostMemory(HostMemory&&) noexcept = default; diff --git a/src/common/host_memory.h b/src/common/host_memory.h index 72fbb05af..a73134360 100644 --- a/src/common/host_memory.h +++ b/src/common/host_memory.h @@ -69,8 +69,9 @@ public: return address >= virtual_base && address < virtual_base + virtual_size; } -private: size_t backing_size{}; + +private: size_t virtual_size{}; // Low level handler for the platform dependent memory routines diff --git a/src/core/core.cpp b/src/core/core.cpp index 5f14647b0..0e45db065 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -4,9 +4,34 @@ #include #include +#include #include +#include +#include #include +#ifdef __linux__ +#include +#ifndef __ANDROID__ +#include +#endif +#endif + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include + +#undef GetCurrentTime +#undef ERROR +#undef GetMessage +#endif + #include "audio_core/audio_core.h" #include "common/fs/fs.h" #include "common/logging/log.h" @@ -65,6 +90,31 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" +static u64 GetCurrentRSS() { +#ifdef _WIN32 + PROCESS_MEMORY_COUNTERS_EX pmc; + GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)); + return static_cast(pmc.WorkingSetSize) / 1024 / 1024; +#elif defined(__linux__) + u64 rss = 0; + std::ifstream stat_file("/proc/self/status"); + std::string line; + while (std::getline(stat_file, line)) { + if (line.compare(0, 8, "RssAnon:") == 0) { + size_t start = line.find_first_of("0123456789"); + size_t end = line.find_last_of("0123456789"); + if (start != std::string::npos && end != std::string::npos) { + rss = std::stoull(line.substr(start, end - start + 1)); + } + break; + } + } + return rss / 1024; +#else + return 0; // macOS/Other implementation +#endif +} + MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); @@ -117,7 +167,10 @@ struct System::Impl { reporter{system}, applet_manager{system}, frontend_applets{system}, profile_manager{} {} void Initialize(System& system) { - device_memory = std::make_unique(); + // Only create the memory bucket if it literally does not exist (First launch) + if (!device_memory) { + device_memory = std::make_unique(); + } is_multicore = Settings::values.use_multi_core.GetValue(); extended_memory_layout = @@ -126,7 +179,7 @@ struct System::Impl { core_timing.SetMulticore(is_multicore); core_timing.Initialize([&system]() { system.RegisterHostThread(); }); - // Create a default fs if one doesn't already exist. + // LEAVE THESE ALONE if they exist. No more make_shared/unique here. if (virtual_filesystem == nullptr) { virtual_filesystem = std::make_shared(); } @@ -134,9 +187,7 @@ struct System::Impl { content_provider = std::make_unique(); } - // Create default implementations of applets if one is not provided. frontend_applets.SetDefaultAppletsIfMissing(); - is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue(); kernel.SetMulticore(is_multicore); @@ -145,20 +196,22 @@ struct System::Impl { } void ReinitializeIfNecessary(System& system) { - const bool must_reinitialize = - is_multicore != Settings::values.use_multi_core.GetValue() || - extended_memory_layout != (Settings::values.memory_layout_mode.GetValue() != - Settings::MemoryLayout::Memory_4Gb); + const bool layout_changed = extended_memory_layout != (Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb); + const bool must_reinitialize = !device_memory || is_multicore != Settings::values.use_multi_core.GetValue() || layout_changed; if (!must_reinitialize) { return; } + if (layout_changed) { + device_memory.reset(); + } + LOG_DEBUG(Kernel, "Re-initializing"); + // Update the tracked values before re-initializing is_multicore = Settings::values.use_multi_core.GetValue(); - extended_memory_layout = - Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; + extended_memory_layout = (Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb); Initialize(system); } @@ -400,21 +453,16 @@ struct System::Impl { } void ShutdownMainProcess() { + const u64 mem_before = GetCurrentRSS(); SetShuttingDown(true); - // Log last frame performance stats if game was loaded if (perf_stats) { const auto perf_results = GetAndResetPerfStats(); constexpr auto performance = Common::Telemetry::FieldType::Performance; - - telemetry_session->AddField(performance, "Shutdown_EmulationSpeed", - perf_results.emulation_speed * 100.0); - telemetry_session->AddField(performance, "Shutdown_Framerate", - perf_results.average_game_fps); - telemetry_session->AddField(performance, "Shutdown_Frametime", - perf_results.frametime * 1000.0); - telemetry_session->AddField(performance, "Mean_Frametime_MS", - perf_stats->GetMeanFrametime()); + telemetry_session->AddField(performance, "Shutdown_EmulationSpeed", perf_results.emulation_speed * 100.0); + telemetry_session->AddField(performance, "Shutdown_Framerate", perf_results.average_game_fps); + telemetry_session->AddField(performance, "Shutdown_Frametime", perf_results.frametime * 1000.0); + telemetry_session->AddField(performance, "Mean_Frametime_MS", perf_stats->GetMeanFrametime()); } is_powered_on = false; @@ -428,11 +476,11 @@ struct System::Impl { stop_event.request_stop(); core_timing.SyncPause(false); Network::CancelPendingSocketOperations(); + kernel.SuspendEmulation(true); kernel.CloseServices(); kernel.ShutdownCores(); - // FIX: Shut down all major systems BEFORE destroying the ServiceManager. fs_controller.Reset(); cheat_engine.reset(); telemetry_session.reset(); @@ -442,24 +490,42 @@ struct System::Impl { gpu_core.reset(); host1x_core.reset(); - // Now it is safe to destroy the services and the ServiceManager. services.reset(); service_manager.reset(); perf_stats.reset(); cpu_manager.Shutdown(); debugger.reset(); + + // Kernel is the VERY last thing to go kernel.Shutdown(); + stop_event = {}; Network::RestartSocketOperations(); + arp_manager.ResetAll(); - if (auto room_member = room_network.GetRoomMember().lock()) { - Network::GameInfo game_info{}; - room_member->SendGameInfo(game_info); + + if (device_memory) { + #ifdef __linux__ + madvise(device_memory->buffer.BackingBasePointer(), device_memory->buffer.backing_size, MADV_DONTNEED); + + // Only call malloc_trim on non-Android Linux (glibc) + #ifndef __ANDROID__ + malloc_trim(0); + #endif + + // Give the kernel time to update /proc/stats + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + #elif defined(_WIN32) + VirtualAlloc(device_memory->buffer.BackingBasePointer(), device_memory->buffer.backing_size, MEM_RESET, PAGE_READWRITE); + #endif } - // Reset all glue registrations - arp_manager.ResetAll(); + const u64 mem_after = GetCurrentRSS(); + const u64 shaved = (mem_before > mem_after) ? (mem_before - mem_after) : 0; + + LOG_INFO(Core, "Shutdown Memory Audit: [Before: {}MB] -> [After: {}MB] | Total Shaved: {}MB", + mem_before, mem_after, shaved); LOG_DEBUG(Core, "Shutdown OK"); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 33fea028e..e9e4bc52a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -212,7 +212,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra }); } -RasterizerVulkan::~RasterizerVulkan() = default; +RasterizerVulkan::~RasterizerVulkan() { + // 1. Tell the GPU to finish current work + scheduler.Finish(); + + // 2. Force runtimes to release internal references/handles FIRST + // This ensures VkBuffer/VkImage handles are gone before the memory they sit on is freed + buffer_cache_runtime.Finish(); + texture_cache_runtime.Finish(); + + // 3. Clear the Staging Pool slabs + staging_pool.TriggerCacheRelease(MemoryUsage::Upload); + staging_pool.TriggerCacheRelease(MemoryUsage::Download); + staging_pool.TriggerCacheRelease(MemoryUsage::DeviceLocal); + + // 4. Nuke the Vulkan slabs + memory_allocator.NukeAllAllocations(); +} template void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 7e6d8d17d..a6b331e05 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -322,4 +322,17 @@ void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) { } } +void StagingBufferPool::Nuke() { + auto nuke_cache = [](StagingBuffersCache& cache) { + for (auto& level : cache) { + level.entries.clear(); + level.entries.shrink_to_fit(); + } + }; + nuke_cache(device_local_cache); + nuke_cache(upload_cache); + nuke_cache(download_cache); + stream_buffer.reset(); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 149e10e1c..ca2c10e0a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -43,6 +43,8 @@ public: void TickFrame(); + void Nuke(); + u64 GetMemoryUsage() const; void SetProgramId(u64 program_id_) { diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index eab9c8f73..b8efdbbf4 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -383,4 +383,11 @@ std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty return std::nullopt; } +void MemoryAllocator::NukeAllAllocations() { + // This calls the destructor for every MemoryAllocation slab. + // Each slab contains a Vulkan handle that will now call vkFreeMemory. + allocations.clear(); + allocations.shrink_to_fit(); +} + } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index a52fe436a..751f289d9 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -124,6 +124,8 @@ public: /// Commits memory required by the buffer and binds it. MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); + void NukeAllAllocations(); + private: /// Tries to allocate a chunk of memory. bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);