feat(video_core): implement comprehensive VRAM management system

Add automatic VRAM leak prevention with configurable garbage collection
to prevent device loss crashes during extended play sessions.

New Settings:
- vram_limit_mb: Configurable VRAM limit (0 = auto-detect 80%)
- gc_aggressiveness: Off/Light/Moderate/Heavy/Extreme levels
- texture_eviction_frames: Frames before texture eviction (default 2)
- buffer_eviction_frames: Frames before buffer eviction (default 5)
- sparse_texture_priority_eviction: Prioritize large sparse textures
- log_vram_usage: Enable VRAM statistics logging

Core Changes:
- Enhanced texture cache with LRU eviction and sparse texture priority
- Enhanced buffer cache with configurable eviction thresholds
- Added VRAM pressure monitoring using VK_EXT_memory_budget
- Emergency GC triggers at 90%/95% VRAM usage thresholds

Platform Support:
- Desktop: Settings in Graphics > Advanced tab
- Android: Settings in Zep Zone category

Fixes VRAM climbing steadily during gameplay until device loss.
Target: Stable VRAM usage below configured limit for 2+ hours.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron
2026-01-25 15:21:02 +10:00
parent 44f9cb6347
commit 3e2137a470
16 changed files with 938 additions and 48 deletions

View File

@@ -26,24 +26,60 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
gpu_modified_ranges.Clear();
inline_buffer_id = NULL_BUFFER_ID;
// FIXED: VRAM leak prevention - Initialize buffer VRAM management from settings
const u32 configured_limit_mb = Settings::values.vram_limit_mb.GetValue();
if (!runtime.CanReportMemoryUsage()) {
minimum_memory = DEFAULT_EXPECTED_MEMORY;
critical_memory = DEFAULT_CRITICAL_MEMORY;
vram_limit_bytes = configured_limit_mb > 0 ? static_cast<u64>(configured_limit_mb) * 1_MiB
: 6_GiB;
return;
}
const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_local_memory - 1_GiB;
const s64 min_spacing_critical = device_local_memory - 512_MiB;
const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
minimum_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
// FIXED: VRAM leak prevention - Use configured limit or auto-detect
if (configured_limit_mb > 0) {
vram_limit_bytes = static_cast<u64>(configured_limit_mb) * 1_MiB;
} else {
vram_limit_bytes = static_cast<u64>(device_local_memory * 0.80);
}
// Adjust thresholds based on GC aggressiveness setting
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
f32 expected_ratio = 0.5f;
f32 critical_ratio = 0.7f;
switch (gc_level) {
case Settings::GCAggressiveness::Off:
expected_ratio = 0.90f;
critical_ratio = 0.95f;
break;
case Settings::GCAggressiveness::Light:
expected_ratio = 0.70f;
critical_ratio = 0.85f;
break;
case Settings::GCAggressiveness::Moderate:
expected_ratio = 0.50f;
critical_ratio = 0.70f;
break;
case Settings::GCAggressiveness::Heavy:
expected_ratio = 0.40f;
critical_ratio = 0.60f;
break;
case Settings::GCAggressiveness::Extreme:
expected_ratio = 0.30f;
critical_ratio = 0.50f;
break;
}
minimum_memory = static_cast<u64>(vram_limit_bytes * expected_ratio);
critical_memory = static_cast<u64>(vram_limit_bytes * critical_ratio);
LOG_INFO(Render_Vulkan,
"Buffer cache VRAM initialized: limit={}MB, minimum={}MB, critical={}MB",
vram_limit_bytes / 1_MiB, minimum_memory / 1_MiB, critical_memory / 1_MiB);
}
template <class P>
@@ -51,20 +87,90 @@ BufferCache<P>::~BufferCache() = default;
template <class P>
void BufferCache<P>::RunGarbageCollector() {
// FIXED: VRAM leak prevention - Enhanced buffer GC with settings integration
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
if (gc_level == Settings::GCAggressiveness::Off) {
return; // GC disabled by user
}
const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
const bool emergency_gc = total_used_memory >= static_cast<u64>(vram_limit_bytes * BUFFER_VRAM_CRITICAL_THRESHOLD);
// FIXED: VRAM leak prevention - Get eviction frames from settings
const u64 eviction_frames = Settings::values.buffer_eviction_frames.GetValue();
// Adjust based on GC level
u64 base_ticks = eviction_frames;
int base_iterations = 32;
switch (gc_level) {
case Settings::GCAggressiveness::Light:
base_ticks = eviction_frames * 2;
base_iterations = 16;
break;
case Settings::GCAggressiveness::Moderate:
base_ticks = eviction_frames;
base_iterations = 32;
break;
case Settings::GCAggressiveness::Heavy:
base_ticks = std::max(1ULL, eviction_frames / 2);
base_iterations = 64;
break;
case Settings::GCAggressiveness::Extreme:
base_ticks = 1;
base_iterations = 128;
break;
default:
break;
}
u64 ticks_to_destroy;
int num_iterations;
if (emergency_gc) {
ticks_to_destroy = 1;
num_iterations = base_iterations * 4;
LOG_WARNING(Render_Vulkan, "Buffer cache emergency GC: usage={}MB, limit={}MB",
total_used_memory / 1_MiB, vram_limit_bytes / 1_MiB);
} else if (aggressive_gc) {
ticks_to_destroy = std::max(1ULL, base_ticks / 2);
num_iterations = base_iterations * 2;
} else {
ticks_to_destroy = base_ticks;
num_iterations = base_iterations;
}
u64 bytes_freed = 0;
const auto clean_up = [this, &num_iterations, &bytes_freed](BufferId buffer_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& buffer = slot_buffers[buffer_id];
const u64 buffer_size = buffer.SizeBytes();
DownloadBufferMemory(buffer);
DeleteBuffer(buffer_id);
bytes_freed += buffer_size;
--buffer_count;
if (buffer_size >= LARGE_BUFFER_THRESHOLD) {
large_buffer_memory -= buffer_size;
--large_buffer_count;
}
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
evicted_buffer_bytes += bytes_freed;
// FIXED: VRAM leak prevention - Log buffer eviction if enabled
if (Settings::values.log_vram_usage.GetValue() && bytes_freed > 0) {
LOG_INFO(Render_Vulkan, "Buffer GC: evicted {}MB, total={}MB, usage={}MB/{}MB",
bytes_freed / 1_MiB, evicted_buffer_bytes / 1_MiB, total_used_memory / 1_MiB,
vram_limit_bytes / 1_MiB);
}
}
template <class P>
@@ -96,9 +202,22 @@ void BufferCache<P>::TickFrame() {
if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
if (total_used_memory >= minimum_memory) {
// FIXED: VRAM leak prevention - Enhanced buffer GC triggering
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
const bool should_gc = gc_level != Settings::GCAggressiveness::Off &&
(total_used_memory >= minimum_memory ||
total_used_memory >= static_cast<u64>(vram_limit_bytes * BUFFER_VRAM_WARNING_THRESHOLD));
if (should_gc) {
RunGarbageCollector();
}
// FIXED: VRAM leak prevention - Force additional GC if still above critical
if (total_used_memory >= critical_memory && gc_level != Settings::GCAggressiveness::Off) {
RunGarbageCollector();
}
++frame_tick;
delayed_destruction_ring.Tick();
@@ -1420,12 +1539,31 @@ template <bool insert>
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
Buffer& buffer = slot_buffers[buffer_id];
const auto size = buffer.SizeBytes();
const u64 aligned_size = Common::AlignUp(size, 1024);
const bool is_large = aligned_size >= LARGE_BUFFER_THRESHOLD;
if (insert) {
total_used_memory += Common::AlignUp(size, 1024);
total_used_memory += aligned_size;
buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
// FIXED: VRAM leak prevention - Track buffer statistics
++buffer_count;
if (is_large) {
large_buffer_memory += aligned_size;
++large_buffer_count;
}
} else {
total_used_memory -= Common::AlignUp(size, 1024);
total_used_memory -= aligned_size;
lru_cache.Free(buffer.getLRUID());
// FIXED: VRAM leak prevention - Update buffer statistics on removal
if (buffer_count > 0) {
--buffer_count;
}
if (is_large && large_buffer_count > 0) {
large_buffer_memory -= aligned_size;
--large_buffer_count;
}
}
const DAddr device_addr_begin = buffer.CpuAddr();
const DAddr device_addr_end = device_addr_begin + size;

View File

@@ -175,6 +175,12 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
// FIXED: VRAM leak prevention - Enhanced buffer eviction constants
static constexpr u64 DEFAULT_BUFFER_EVICTION_FRAMES = 5;
static constexpr size_t LARGE_BUFFER_THRESHOLD = 8_MiB;
static constexpr f32 BUFFER_VRAM_WARNING_THRESHOLD = 0.70f;
static constexpr f32 BUFFER_VRAM_CRITICAL_THRESHOLD = 0.85f;
// Debug Flags.
static constexpr bool DISABLE_DOWNLOADS = true;
@@ -350,6 +356,31 @@ public:
RunGarbageCollector();
}
// FIXED: VRAM leak prevention - Enhanced public interface for buffer VRAM management
/// Get buffer VRAM usage statistics
struct BufferVRAMStats {
u64 total_used_bytes;
u64 large_buffer_bytes;
u64 evicted_total;
u32 buffer_count;
u32 large_buffer_count;
};
[[nodiscard]] BufferVRAMStats GetBufferVRAMStats() const noexcept {
return BufferVRAMStats{
.total_used_bytes = total_used_memory,
.large_buffer_bytes = large_buffer_memory,
.evicted_total = evicted_buffer_bytes,
.buffer_count = buffer_count,
.large_buffer_count = large_buffer_count,
};
}
/// Check if buffer VRAM pressure is high
[[nodiscard]] bool IsBufferVRAMPressureHigh() const noexcept {
return total_used_memory >= minimum_memory;
}
void BindHostIndexBuffer();
void BindHostVertexBuffers();
@@ -488,6 +519,13 @@ public:
u64 critical_memory = 0;
BufferId inline_buffer_id;
// FIXED: VRAM leak prevention - Enhanced buffer memory tracking
u64 vram_limit_bytes = 0; // Configured VRAM limit for buffers
u64 large_buffer_memory = 0; // Memory used by large buffers (>8MB)
u64 evicted_buffer_bytes = 0; // Total bytes evicted since start
u32 buffer_count = 0; // Total buffer count
u32 large_buffer_count = 0; // Large buffer count
std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
Common::ScratchBuffer<u8> tmp_buffer;
};

View File

@@ -159,6 +159,25 @@ void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebu
render_window.OnFrameDisplayed();
};
// FIXED: VRAM leak prevention - Check VRAM pressure before rendering
if (device.CanReportMemoryUsage()) {
const u64 current_usage = device.GetDeviceMemoryUsage();
const u64 total_vram = device.GetDeviceLocalMemory();
const u32 configured_limit = Settings::values.vram_limit_mb.GetValue();
const u64 vram_limit = configured_limit > 0
? static_cast<u64>(configured_limit) * 1024ULL * 1024ULL
: static_cast<u64>(total_vram * 0.80);
// If VRAM usage is above 90% of limit, trigger emergency GC on texture/buffer caches
if (current_usage >= static_cast<u64>(vram_limit * 0.90)) {
LOG_WARNING(Render_Vulkan,
"VRAM pressure critical: {}MB/{}MB ({:.1f}%), triggering emergency GC",
current_usage / (1024ULL * 1024ULL), vram_limit / (1024ULL * 1024ULL),
(static_cast<f32>(current_usage) / vram_limit) * 100.0f);
rasterizer.TriggerMemoryGC();
}
}
RenderAppletCaptureLayer(framebuffers);
if (!render_window.IsShown()) {
@@ -201,6 +220,30 @@ void RendererVulkan::Report() const {
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
LOG_INFO(Render_Vulkan, "Available VRAM: {:.2f} GiB", available_vram);
// FIXED: VRAM leak prevention - Report VRAM management settings
const u32 vram_limit_mb = Settings::values.vram_limit_mb.GetValue();
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
const u32 texture_eviction = Settings::values.texture_eviction_frames.GetValue();
const u32 buffer_eviction = Settings::values.buffer_eviction_frames.GetValue();
if (vram_limit_mb > 0) {
LOG_INFO(Render_Vulkan, "VRAM Limit: {} MB (configured)", vram_limit_mb);
} else {
LOG_INFO(Render_Vulkan, "VRAM Limit: Auto ({:.0f} MB, 80% of available)",
available_vram * 0.8 * 1024.0);
}
LOG_INFO(Render_Vulkan, "GC Aggressiveness: {}, Texture eviction: {} frames, Buffer eviction: {} frames",
static_cast<u32>(gc_level), texture_eviction, buffer_eviction);
// FIXED: VRAM leak prevention - Report VK_EXT_memory_budget support
if (device.CanReportMemoryUsage()) {
const auto current_usage = device.GetDeviceMemoryUsage();
LOG_INFO(Render_Vulkan, "VK_EXT_memory_budget: Supported, Current usage: {:.2f} GiB",
static_cast<f64>(current_usage) / f64{1_GiB});
} else {
LOG_INFO(Render_Vulkan, "VK_EXT_memory_budget: Not supported (using estimates)");
}
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);

View File

@@ -861,6 +861,17 @@ u64 RasterizerVulkan::GetStagingMemoryUsage() const {
}
}
// FIXED: VRAM leak prevention - Trigger garbage collection on texture/buffer caches
void RasterizerVulkan::TriggerMemoryGC() {
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
// Trigger GC on both caches
texture_cache.TriggerGarbageCollection();
buffer_cache.TriggerGarbageCollection();
LOG_DEBUG(Render_Vulkan, "Manual memory GC triggered");
}
bool RasterizerVulkan::AccelerateConditionalRendering() {
gpu_memory->FlushCaching();
return query_cache.AccelerateHostConditionalRendering();

View File

@@ -125,6 +125,10 @@ public:
u64 GetBufferMemoryUsage() const;
u64 GetTextureMemoryUsage() const;
u64 GetStagingMemoryUsage() const;
// FIXED: VRAM leak prevention - Trigger garbage collection on texture/buffer caches
void TriggerMemoryGC();
bool AccelerateConditionalRendering() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,

View File

@@ -50,21 +50,59 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
// FIXED: VRAM leak prevention - Initialize VRAM limit from settings
const u32 configured_limit_mb = Settings::values.vram_limit_mb.GetValue();
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_local_memory - 1_GiB;
const s64 min_spacing_critical = device_local_memory - 512_MiB;
const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
expected_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2);
// FIXED: VRAM leak prevention - Use configured limit or auto-detect (80% of VRAM)
if (configured_limit_mb > 0) {
vram_limit_bytes = static_cast<u64>(configured_limit_mb) * 1_MiB;
} else {
// Auto-detect: use 80% of available VRAM as limit
vram_limit_bytes = static_cast<u64>(device_local_memory * 0.80);
}
// Adjust thresholds based on VRAM limit and GC aggressiveness setting
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
f32 expected_ratio = 0.6f;
f32 critical_ratio = 0.8f;
switch (gc_level) {
case Settings::GCAggressiveness::Off:
expected_ratio = 0.95f;
critical_ratio = 0.99f;
break;
case Settings::GCAggressiveness::Light:
expected_ratio = 0.75f;
critical_ratio = 0.90f;
break;
case Settings::GCAggressiveness::Moderate:
expected_ratio = 0.60f;
critical_ratio = 0.80f;
break;
case Settings::GCAggressiveness::Heavy:
expected_ratio = 0.50f;
critical_ratio = 0.70f;
break;
case Settings::GCAggressiveness::Extreme:
expected_ratio = 0.40f;
critical_ratio = 0.60f;
break;
}
expected_memory = static_cast<u64>(vram_limit_bytes * expected_ratio);
critical_memory = static_cast<u64>(vram_limit_bytes * critical_ratio);
minimum_memory = static_cast<u64>(vram_limit_bytes * 0.25f);
LOG_INFO(Render_Vulkan,
"VRAM Management initialized: limit={}MB, expected={}MB, critical={}MB, gc_level={}",
vram_limit_bytes / 1_MiB, expected_memory / 1_MiB, critical_memory / 1_MiB,
static_cast<u32>(gc_level));
} else {
vram_limit_bytes = configured_limit_mb > 0 ? static_cast<u64>(configured_limit_mb) * 1_MiB
: 6_GiB; // Default 6GB if no info
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
@@ -73,37 +111,111 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
template <class P>
void TextureCache<P>::RunGarbageCollector() {
// FIXED: VRAM leak prevention - Enhanced garbage collector with settings integration
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
if (gc_level == Settings::GCAggressiveness::Off) {
return; // GC disabled by user
}
// Reset per-frame stats
if (last_gc_frame != frame_tick) {
evicted_this_frame = 0;
gc_runs_this_frame = 0;
last_gc_frame = frame_tick;
}
++gc_runs_this_frame;
bool high_priority_mode = false;
bool aggressive_mode = false;
bool emergency_mode = false;
u64 ticks_to_destroy = 0;
size_t num_iterations = 0;
u64 bytes_freed = 0;
const auto Configure = [&](bool allow_aggressive) {
// FIXED: VRAM leak prevention - Get eviction frames from settings
const u64 eviction_frames = Settings::values.texture_eviction_frames.GetValue();
const bool sparse_priority = Settings::values.sparse_texture_priority_eviction.GetValue();
const auto Configure = [&](bool allow_aggressive, bool allow_emergency) {
high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
emergency_mode = allow_emergency && total_used_memory >= static_cast<u64>(vram_limit_bytes * VRAM_USAGE_EMERGENCY_THRESHOLD);
// FIXED: VRAM leak prevention - Adjust iterations based on GC level
u64 base_ticks = eviction_frames;
size_t base_iterations = 10;
switch (gc_level) {
case Settings::GCAggressiveness::Light:
base_ticks = eviction_frames * 2;
base_iterations = 5;
break;
case Settings::GCAggressiveness::Moderate:
base_ticks = eviction_frames;
base_iterations = 10;
break;
case Settings::GCAggressiveness::Heavy:
base_ticks = std::max(1ULL, eviction_frames / 2);
base_iterations = 20;
break;
case Settings::GCAggressiveness::Extreme:
base_ticks = 1;
base_iterations = 40;
break;
default:
break;
}
if (emergency_mode) {
ticks_to_destroy = 1;
num_iterations = base_iterations * 4;
} else if (aggressive_mode) {
ticks_to_destroy = std::max(1ULL, base_ticks / 2);
num_iterations = base_iterations * 2;
} else if (high_priority_mode) {
ticks_to_destroy = base_ticks;
num_iterations = static_cast<size_t>(base_iterations * 1.5);
} else {
ticks_to_destroy = base_ticks * 2;
num_iterations = base_iterations;
}
};
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode,
&emergency_mode, &bytes_freed, sparse_priority](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& image = slot_images[image_id];
// Skip images being decoded
if (True(image.flags & ImageFlagBits::IsDecoding)) {
// This image is still being decoded, deleting it will invalidate the slot
// used by the async decoder thread.
return false;
}
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
// FIXED: VRAM leak prevention - Prioritize sparse textures if enabled
const bool is_sparse = True(image.flags & ImageFlagBits::Sparse);
const u64 image_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
const bool is_large = image_size >= LARGE_TEXTURE_THRESHOLD;
// Skip costly loads unless aggressive/emergency mode, unless it's a large sparse texture
if (!aggressive_mode && !emergency_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
if (!(sparse_priority && is_sparse && image_size >= SPARSE_EVICTION_PRIORITY_THRESHOLD)) {
return false;
}
}
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode && must_download) {
// Skip downloads unless high priority or emergency
if (!high_priority_mode && !emergency_mode && must_download) {
return false;
}
// Perform download if needed
if (must_download) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
@@ -112,16 +224,29 @@ void TextureCache<P>::RunGarbageCollector() {
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
// Track eviction statistics
bytes_freed += Common::AlignUp(image_size, 1024);
if (is_sparse) {
sparse_texture_memory -= Common::AlignUp(image_size, 1024);
--sparse_texture_count;
}
if (is_large) {
large_texture_memory -= Common::AlignUp(image_size, 1024);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
// Adjust mode based on remaining memory pressure
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
if (aggressive_mode || emergency_mode) {
num_iterations >>= 2;
aggressive_mode = false;
emergency_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
@@ -132,26 +257,80 @@ void TextureCache<P>::RunGarbageCollector() {
return false;
};
// Try to remove anything old enough and not high priority.
Configure(false);
// FIXED: VRAM leak prevention - First pass: evict sparse textures if priority enabled
if (sparse_priority && sparse_texture_memory > 0 && total_used_memory >= expected_memory) {
Configure(false, false);
// Target sparse textures specifically
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, [this, &Cleanup](ImageId image_id) {
auto& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Sparse)) {
return Cleanup(image_id);
}
return false;
});
}
// Normal pass: remove anything old enough
Configure(false, false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
// If pressure is still too high, prune aggressively.
// Aggressive pass if still above critical
if (total_used_memory >= critical_memory) {
Configure(true);
Configure(true, false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
}
// FIXED: VRAM leak prevention - Emergency pass if still above emergency threshold
if (total_used_memory >= static_cast<u64>(vram_limit_bytes * VRAM_USAGE_EMERGENCY_THRESHOLD)) {
Configure(true, true);
emergency_gc_triggered = true;
LOG_WARNING(Render_Vulkan, "VRAM Emergency GC triggered: usage={}MB, limit={}MB",
total_used_memory / 1_MiB, vram_limit_bytes / 1_MiB);
lru_cache.ForEachItemBelow(frame_tick, Cleanup); // Evict everything below current frame
}
// Update statistics
evicted_this_frame += bytes_freed;
evicted_total += bytes_freed;
// FIXED: VRAM leak prevention - Log VRAM usage if enabled
if (Settings::values.log_vram_usage.GetValue() && bytes_freed > 0) {
LOG_INFO(Render_Vulkan,
"VRAM GC: evicted {}MB this frame, total={}MB, usage={}MB/{}MB ({:.1f}%)",
bytes_freed / 1_MiB, evicted_total / 1_MiB, total_used_memory / 1_MiB,
vram_limit_bytes / 1_MiB,
(static_cast<f32>(total_used_memory) / vram_limit_bytes) * 100.0f);
}
}
template <class P>
void TextureCache<P>::TickFrame() {
// FIXED: VRAM leak prevention - Enhanced frame tick with VRAM monitoring
// Reset emergency flag at start of frame
emergency_gc_triggered = false;
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
if (total_used_memory > minimum_memory) {
// FIXED: VRAM leak prevention - Check if GC should run based on settings
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
const bool should_gc = gc_level != Settings::GCAggressiveness::Off &&
(total_used_memory > minimum_memory ||
total_used_memory >= static_cast<u64>(vram_limit_bytes * VRAM_USAGE_WARNING_THRESHOLD));
if (should_gc) {
RunGarbageCollector();
}
// FIXED: VRAM leak prevention - Force additional GC if still above critical after normal GC
if (total_used_memory >= critical_memory && gc_level != Settings::GCAggressiveness::Off) {
// Run GC again if we're still above critical
RunGarbageCollector();
}
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@@ -166,6 +345,183 @@ void TextureCache<P>::TickFrame() {
}
async_buffers_death_ring.clear();
}
// FIXED: VRAM leak prevention - Periodic VRAM usage logging
if (Settings::values.log_vram_usage.GetValue() && (frame_tick % 300 == 0)) {
const f32 usage_ratio = vram_limit_bytes > 0
? static_cast<f32>(total_used_memory) / vram_limit_bytes
: 0.0f;
LOG_INFO(Render_Vulkan,
"VRAM Status: {}MB/{}MB ({:.1f}%), textures={}, sparse={}, evicted_total={}MB",
total_used_memory / 1_MiB, vram_limit_bytes / 1_MiB, usage_ratio * 100.0f,
texture_count, sparse_texture_count, evicted_total / 1_MiB);
}
}
// FIXED: VRAM leak prevention - Implementation of new VRAM management methods
template <class P>
void TextureCache<P>::ForceEmergencyGC() {
LOG_WARNING(Render_Vulkan, "Force emergency GC triggered: usage={}MB, limit={}MB",
total_used_memory / 1_MiB, vram_limit_bytes / 1_MiB);
emergency_gc_triggered = true;
u64 bytes_freed = 0;
// Evict 10% of textures immediately, prioritizing sparse and large textures
const u64 target_bytes = total_used_memory / 10;
bytes_freed += EvictSparseTexturesPriority(target_bytes / 2);
bytes_freed += EvictToFreeMemory(target_bytes - bytes_freed);
evicted_this_frame += bytes_freed;
evicted_total += bytes_freed;
LOG_INFO(Render_Vulkan, "Emergency GC freed {}MB", bytes_freed / 1_MiB);
}
template <class P>
typename TextureCache<P>::VRAMStats TextureCache<P>::GetVRAMStats() const noexcept {
const f32 usage_ratio = vram_limit_bytes > 0
? static_cast<f32>(total_used_memory) / vram_limit_bytes
: 0.0f;
return VRAMStats{
.total_used_bytes = total_used_memory,
.texture_bytes = total_used_memory - sparse_texture_memory,
.sparse_texture_bytes = sparse_texture_memory,
.evicted_this_frame = evicted_this_frame,
.evicted_total = evicted_total,
.texture_count = texture_count,
.sparse_texture_count = sparse_texture_count,
.usage_ratio = usage_ratio,
};
}
template <class P>
void TextureCache<P>::SetVRAMLimit(u64 limit_bytes) {
vram_limit_bytes = limit_bytes;
// Recalculate thresholds
const auto gc_level = Settings::values.gc_aggressiveness.GetValue();
f32 expected_ratio = 0.6f;
f32 critical_ratio = 0.8f;
switch (gc_level) {
case Settings::GCAggressiveness::Off:
expected_ratio = 0.95f;
critical_ratio = 0.99f;
break;
case Settings::GCAggressiveness::Light:
expected_ratio = 0.75f;
critical_ratio = 0.90f;
break;
case Settings::GCAggressiveness::Moderate:
expected_ratio = 0.60f;
critical_ratio = 0.80f;
break;
case Settings::GCAggressiveness::Heavy:
expected_ratio = 0.50f;
critical_ratio = 0.70f;
break;
case Settings::GCAggressiveness::Extreme:
expected_ratio = 0.40f;
critical_ratio = 0.60f;
break;
}
expected_memory = static_cast<u64>(vram_limit_bytes * expected_ratio);
critical_memory = static_cast<u64>(vram_limit_bytes * critical_ratio);
minimum_memory = static_cast<u64>(vram_limit_bytes * 0.25f);
LOG_INFO(Render_Vulkan, "VRAM limit updated: {}MB, expected={}MB, critical={}MB",
vram_limit_bytes / 1_MiB, expected_memory / 1_MiB, critical_memory / 1_MiB);
}
template <class P>
bool TextureCache<P>::IsVRAMPressureHigh() const noexcept {
return total_used_memory >= expected_memory;
}
template <class P>
bool TextureCache<P>::IsVRAMPressureCritical() const noexcept {
return total_used_memory >= static_cast<u64>(vram_limit_bytes * VRAM_USAGE_EMERGENCY_THRESHOLD);
}
template <class P>
u64 TextureCache<P>::EvictToFreeMemory(u64 target_bytes) {
u64 bytes_freed = 0;
const u64 start_memory = total_used_memory;
lru_cache.ForEachItemBelow(frame_tick, [this, &bytes_freed, target_bytes](ImageId image_id) {
if (bytes_freed >= target_bytes) {
return true;
}
auto& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::IsDecoding)) {
return false;
}
const u64 image_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, false);
bytes_freed += Common::AlignUp(image_size, 1024);
return false;
});
return start_memory - total_used_memory;
}
template <class P>
u64 TextureCache<P>::EvictSparseTexturesPriority(u64 target_bytes) {
if (!Settings::values.sparse_texture_priority_eviction.GetValue()) {
return 0;
}
u64 bytes_freed = 0;
// Collect sparse textures and sort by size (largest first)
std::vector<std::pair<ImageId, u64>> sparse_textures;
lru_cache.ForEachItemBelow(frame_tick, [this, &sparse_textures](ImageId image_id) {
auto& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Sparse) &&
False(image.flags & ImageFlagBits::IsDecoding)) {
const u64 size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
sparse_textures.emplace_back(image_id, size);
}
return false;
});
// Sort by size descending (largest first for priority eviction)
std::sort(sparse_textures.begin(), sparse_textures.end(),
[](const auto& a, const auto& b) { return a.second > b.second; });
for (const auto& [image_id, size] : sparse_textures) {
if (bytes_freed >= target_bytes) {
break;
}
auto& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, false);
bytes_freed += Common::AlignUp(size, 1024);
--sparse_texture_count;
sparse_texture_memory -= Common::AlignUp(size, 1024);
}
if (bytes_freed > 0) {
LOG_DEBUG(Render_Vulkan, "Sparse texture priority eviction freed {}MB", bytes_freed / 1_MiB);
}
return bytes_freed;
}
template <class P>
@@ -2018,7 +2374,22 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
const u64 aligned_size = Common::AlignUp(tentative_size, 1024);
total_used_memory += aligned_size;
// FIXED: VRAM leak prevention - Track texture statistics
++texture_count;
const bool is_sparse = True(image.flags & ImageFlagBits::Sparse);
const bool is_large = aligned_size >= LARGE_TEXTURE_THRESHOLD;
if (is_sparse) {
sparse_texture_memory += aligned_size;
++sparse_texture_count;
}
if (is_large) {
large_texture_memory += aligned_size;
}
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {

View File

@@ -113,6 +113,14 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
// FIXED: VRAM leak prevention - Enhanced eviction constants
static constexpr size_t SPARSE_EVICTION_PRIORITY_THRESHOLD = 4_MiB; // Prioritize sparse textures > 4MB
static constexpr size_t LARGE_TEXTURE_THRESHOLD = 16_MiB; // Large texture threshold
static constexpr u64 DEFAULT_EVICTION_FRAMES = 2; // Default frames before eviction
static constexpr f32 VRAM_USAGE_WARNING_THRESHOLD = 0.75f; // 75% - start warning
static constexpr f32 VRAM_USAGE_CRITICAL_THRESHOLD = 0.85f; // 85% - aggressive GC
static constexpr f32 VRAM_USAGE_EMERGENCY_THRESHOLD = 0.95f; // 95% - emergency eviction
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
@@ -296,6 +304,42 @@ public:
RunGarbageCollector();
}
// FIXED: VRAM leak prevention - Enhanced public interface for VRAM management
/// Force emergency garbage collection when VRAM pressure is critical
void ForceEmergencyGC();
/// Get current VRAM usage statistics
struct VRAMStats {
u64 total_used_bytes;
u64 texture_bytes;
u64 sparse_texture_bytes;
u64 evicted_this_frame;
u64 evicted_total;
u32 texture_count;
u32 sparse_texture_count;
f32 usage_ratio; // Current usage / limit
};
[[nodiscard]] VRAMStats GetVRAMStats() const noexcept;
/// Get configured VRAM limit in bytes
[[nodiscard]] u64 GetVRAMLimit() const noexcept { return vram_limit_bytes; }
/// Set VRAM limit (0 = auto-detect)
void SetVRAMLimit(u64 limit_bytes);
/// Check if VRAM pressure is high
[[nodiscard]] bool IsVRAMPressureHigh() const noexcept;
/// Check if VRAM pressure is critical (emergency)
[[nodiscard]] bool IsVRAMPressureCritical() const noexcept;
/// Evict oldest textures to free target_bytes of VRAM
u64 EvictToFreeMemory(u64 target_bytes);
/// Evict sparse textures with priority (large unmapped pages first)
u64 EvictSparseTexturesPriority(u64 target_bytes);
/// Fills image_view_ids in the image views in indices
template <bool has_blacklists>
void FillImageViews(DescriptorTable<TICEntry>& table,
@@ -450,6 +494,18 @@ public:
u64 expected_memory;
u64 critical_memory;
// FIXED: VRAM leak prevention - Enhanced memory tracking
u64 vram_limit_bytes = 0; // Configured VRAM limit (0 = auto)
u64 sparse_texture_memory = 0; // Memory used by sparse textures
u64 large_texture_memory = 0; // Memory used by large textures (>16MB)
u64 evicted_this_frame = 0; // Bytes evicted in current frame
u64 evicted_total = 0; // Total bytes evicted since start
u32 gc_runs_this_frame = 0; // Number of GC runs this frame
u32 texture_count = 0; // Total texture count
u32 sparse_texture_count = 0; // Sparse texture count
u64 last_gc_frame = 0; // Last frame GC was run
bool emergency_gc_triggered = false; // Emergency GC flag
struct BufferDownload {
GPUVAddr address;
size_t size;